| 1 | ! ============================================================================== |
| 2 | ! Module: parser |
| 3 | ! Purpose: Command line parsing and tokenization |
| 4 | ! ============================================================================== |
| 5 | module parser |
| 6 | use shell_types |
| 7 | use system_interface |
| 8 | use variables ! includes check_nounset |
| 9 | use glob |
| 10 | use error_handling |
| 11 | use performance |
| 12 | use iso_fortran_env, only: error_unit, input_unit |
| 13 | #ifdef USE_C_STRINGS |
| 14 | use iso_c_binding, only: c_char, c_int, c_ptr, c_size_t, c_f_pointer |
| 15 | #endif |
| 16 | implicit none |
| 17 | |
| 18 | ! Export backtick conversion for new parser |
| 19 | public :: convert_backticks_to_dollar_paren |
| 20 | public :: needs_compound_continuation |
| 21 | public :: remove_line_continuations |
| 22 | |
| 23 | contains |
| 24 | |
| 25 | subroutine parse_pipeline(input, pipeline) |
| 26 | character(len=*), intent(in) :: input |
| 27 | type(pipeline_t), intent(out) :: pipeline |
| 28 | |
| 29 | character(len=len(input)) :: working_input |
| 30 | integer :: start, cmd_count |
| 31 | integer :: i, newline_pos |
| 32 | type(command_t), allocatable :: temp_commands(:) |
| 33 | logical :: background, in_quotes, in_param_expansion, in_for_arith, after_case_in |
| 34 | character(len=1) :: quote_char |
| 35 | integer :: paren_depth, brace_depth, case_depth |
| 36 | |
| 37 | integer(int64) :: parse_start_time |
| 38 | |
| 39 | ! Start performance timing |
| 40 | call start_timer('parse_pipeline', parse_start_time) |
| 41 | |
| 42 | ! Validate input |
| 43 | if (.not. validate_command(input)) then |
| 44 | call parser_error(101, 'Invalid command input', 'parse_pipeline') |
| 45 | pipeline%num_commands = 0 |
| 46 | return |
| 47 | end if |
| 48 | |
| 49 | call debug_log('Parsing pipeline: ' // trim(input), 'parse_pipeline') |
| 50 | |
| 51 | allocate(temp_commands(MAX_PIPELINE)) |
| 52 | call track_allocation(MAX_PIPELINE * 1024, 'temp_commands') |
| 53 | |
| 54 | ! Strip comments (# to end of line, but not inside quotes or ${}) |
| 55 | working_input = input |
| 56 | in_quotes = .false. |
| 57 | quote_char = ' ' |
| 58 | in_param_expansion = .false. |
| 59 | do i = 1, len_trim(working_input) |
| 60 | if (in_quotes) then |
| 61 | if (working_input(i:i) == quote_char) then |
| 62 | in_quotes = .false. |
| 63 | end if |
| 64 | else |
| 65 | ! Track ${...} parameter expansion |
| 66 | if (i > 1 .and. working_input(i:i) == '{') then |
| 67 | if (working_input(i-1:i-1) == '$') in_param_expansion = .true. |
| 68 | else if (in_param_expansion .and. working_input(i:i) == '}') then |
| 69 | in_param_expansion = .false. |
| 70 | end if |
| 71 | |
| 72 | if (working_input(i:i) == '"' .or. working_input(i:i) == "'") then |
| 73 | in_quotes = .true. |
| 74 | quote_char = working_input(i:i) |
| 75 | else if (working_input(i:i) == '#' .and. .not. in_param_expansion) then |
| 76 | ! Only treat # as comment if not part of $# or ${...} |
| 77 | if (i > 1) then; if (working_input(i-1:i-1) == '$') then |
| 78 | ! This is $#, not a comment |
| 79 | cycle |
| 80 | end if; end if |
| 81 | ! Found comment - remove from # to end of line (but keep newline and rest) |
| 82 | ! Find the next newline |
| 83 | newline_pos = index(working_input(i:), char(10)) |
| 84 | if (newline_pos > 0) then |
| 85 | ! There's a newline - remove comment but keep newline and everything after |
| 86 | working_input = working_input(:i-1) // working_input(i+newline_pos-1:) |
| 87 | else |
| 88 | ! No newline - truncate to end |
| 89 | working_input = working_input(:i-1) |
| 90 | end if |
| 91 | exit |
| 92 | end if |
| 93 | end if |
| 94 | end do |
| 95 | cmd_count = 0 |
| 96 | start = 1 |
| 97 | background = .false. |
| 98 | |
| 99 | ! Check for background execution (&) |
| 100 | if (len_trim(working_input) > 0) then |
| 101 | if (working_input(len_trim(working_input):len_trim(working_input)) == '&') then |
| 102 | background = .true. |
| 103 | working_input = working_input(:len_trim(working_input)-1) |
| 104 | end if |
| 105 | end if |
| 106 | |
| 107 | ! Convert backticks to $() format BEFORE tokenization |
| 108 | ! This ensures complete backtick expressions are converted together |
| 109 | working_input = convert_backticks_to_dollar_paren(working_input) |
| 110 | |
| 111 | ! Parse commands and separators (track quotes to avoid splitting inside them) |
| 112 | i = 1 |
| 113 | in_quotes = .false. |
| 114 | quote_char = ' ' |
| 115 | in_for_arith = .false. |
| 116 | paren_depth = 0 |
| 117 | brace_depth = 0 |
| 118 | case_depth = 0 |
| 119 | after_case_in = .false. |
| 120 | do while (i <= len_trim(working_input)) |
| 121 | ! Track quote state |
| 122 | if (.not. in_quotes) then |
| 123 | if (working_input(i:i) == '"' .or. working_input(i:i) == "'") then |
| 124 | in_quotes = .true. |
| 125 | quote_char = working_input(i:i) |
| 126 | end if |
| 127 | else |
| 128 | if (working_input(i:i) == quote_char) then |
| 129 | in_quotes = .false. |
| 130 | end if |
| 131 | end if |
| 132 | |
| 133 | ! Only check for operators outside quotes |
| 134 | if (.not. in_quotes) then |
| 135 | ! Detect for (( at the beginning of command |
| 136 | if (.not. in_for_arith .and. i <= len_trim(working_input)-1) then |
| 137 | if (working_input(i:i+1) == '((') then |
| 138 | ! Check if 'for' appears before this |
| 139 | if (start <= i-1) then |
| 140 | if (index(working_input(start:i-1), 'for') > 0) then |
| 141 | in_for_arith = .true. |
| 142 | paren_depth = 0 ! Will be counted as we process |
| 143 | end if |
| 144 | end if |
| 145 | end if |
| 146 | end if |
| 147 | |
| 148 | ! Track parentheses depth (for subshells and for ((..))) |
| 149 | ! Skip tracking for $( which is command substitution |
| 150 | if (working_input(i:i) == '(') then |
| 151 | ! Check if this is $( command substitution - if so, skip tracking |
| 152 | if (i == 1) then |
| 153 | paren_depth = paren_depth + 1 |
| 154 | else if (working_input(i-1:i-1) /= '$') then |
| 155 | paren_depth = paren_depth + 1 |
| 156 | end if |
| 157 | else if (working_input(i:i) == ')') then |
| 158 | if (paren_depth > 0) then |
| 159 | paren_depth = paren_depth - 1 |
| 160 | ! Exit for (( when we've closed all parens |
| 161 | if (in_for_arith .and. paren_depth == 0) then |
| 162 | in_for_arith = .false. |
| 163 | end if |
| 164 | end if |
| 165 | end if |
| 166 | |
| 167 | ! Track brace depth for function definitions |
| 168 | if (working_input(i:i) == '{') then |
| 169 | brace_depth = brace_depth + 1 |
| 170 | else if (working_input(i:i) == '}') then |
| 171 | brace_depth = brace_depth - 1 |
| 172 | end if |
| 173 | |
| 174 | ! Track case statement depth (case...esac) |
| 175 | ! Check for 'case' keyword at word boundary |
| 176 | if (i <= len_trim(working_input) - 3) then |
| 177 | if (working_input(i:i+3) == 'case') then |
| 178 | ! Verify it's a word boundary (space or start of command before it) |
| 179 | block |
| 180 | logical :: at_word_boundary |
| 181 | if (i == 1) then |
| 182 | at_word_boundary = .true. |
| 183 | else |
| 184 | at_word_boundary = (working_input(i-1:i-1) == ' ' .or. working_input(i-1:i-1) == ';') |
| 185 | end if |
| 186 | if (at_word_boundary) then |
| 187 | ! Verify word boundary after (space or special char) |
| 188 | if (i+4 > len_trim(working_input) .or. working_input(i+4:i+4) == ' ' .or. & |
| 189 | working_input(i+4:i+4) == ';') then |
| 190 | case_depth = case_depth + 1 |
| 191 | after_case_in = .false. ! Reset when we see 'case' |
| 192 | end if |
| 193 | end if |
| 194 | end block |
| 195 | end if |
| 196 | end if |
| 197 | |
| 198 | ! Check for ' in ' keyword inside case statement (split after it) |
| 199 | if (case_depth > 0 .and. .not. after_case_in) then |
| 200 | if (i <= len_trim(working_input) - 2) then |
| 201 | if (working_input(i:i+1) == 'in') then |
| 202 | ! Verify word boundary before (space) |
| 203 | if (i > 1) then; if (working_input(i-1:i-1) == ' ') then |
| 204 | ! Verify word boundary after (space or end) |
| 205 | if (i+2 > len_trim(working_input) .or. working_input(i+2:i+2) == ' ') then |
| 206 | ! Split after ' in ' |
| 207 | ! Find the end of 'in' plus any trailing space |
| 208 | if (i+2 <= len_trim(working_input) .and. working_input(i+2:i+2) == ' ') then |
| 209 | cmd_count = cmd_count + 1 |
| 210 | if (cmd_count <= MAX_PIPELINE) then |
| 211 | call parse_single_command(working_input(start:i+1), temp_commands(cmd_count)) |
| 212 | temp_commands(cmd_count)%separator = SEP_SEMICOLON |
| 213 | end if |
| 214 | start = i + 3 ! Skip 'in ' |
| 215 | after_case_in = .true. |
| 216 | else |
| 217 | cmd_count = cmd_count + 1 |
| 218 | if (cmd_count <= MAX_PIPELINE) then |
| 219 | call parse_single_command(working_input(start:i+1), temp_commands(cmd_count)) |
| 220 | temp_commands(cmd_count)%separator = SEP_SEMICOLON |
| 221 | end if |
| 222 | start = i + 2 ! Skip 'in' |
| 223 | after_case_in = .true. |
| 224 | end if |
| 225 | i = start - 1 ! Will be incremented at end of loop |
| 226 | cycle |
| 227 | end if |
| 228 | end if; end if ! i > 1 guard |
| 229 | end if |
| 230 | end if |
| 231 | end if |
| 232 | |
| 233 | ! Check for 'esac' keyword at word boundary |
| 234 | if (i <= len_trim(working_input) - 3) then |
| 235 | if (working_input(i:i+3) == 'esac') then |
| 236 | ! Verify it's a word boundary (space or start before it) |
| 237 | block |
| 238 | logical :: esac_word_boundary |
| 239 | if (i == 1) then |
| 240 | esac_word_boundary = .true. |
| 241 | else |
| 242 | esac_word_boundary = (working_input(i-1:i-1) == ' ' .or. working_input(i-1:i-1) == ';') |
| 243 | end if |
| 244 | if (esac_word_boundary) then |
| 245 | ! Verify word boundary after (space, semicolon, or end) |
| 246 | if (i+4 > len_trim(working_input) .or. working_input(i+4:i+4) == ' ' .or. & |
| 247 | working_input(i+4:i+4) == ';') then |
| 248 | case_depth = case_depth - 1 |
| 249 | if (case_depth < 0) case_depth = 0 ! Prevent negative |
| 250 | after_case_in = .false. ! Reset after esac |
| 251 | end if |
| 252 | end if |
| 253 | end block |
| 254 | end if |
| 255 | end if |
| 256 | |
| 257 | ! Check for operators (but skip if inside parentheses/subshell) |
| 258 | if (i <= len_trim(working_input) - 1 .and. paren_depth == 0) then |
| 259 | if (working_input(i:i+1) == '&&') then |
| 260 | cmd_count = cmd_count + 1 |
| 261 | if (cmd_count <= MAX_PIPELINE) then |
| 262 | call parse_single_command(working_input(start:i-1), temp_commands(cmd_count)) |
| 263 | temp_commands(cmd_count)%separator = SEP_AND |
| 264 | end if |
| 265 | start = i + 2 |
| 266 | i = i + 2 |
| 267 | cycle |
| 268 | else if (working_input(i:i+1) == '||') then |
| 269 | cmd_count = cmd_count + 1 |
| 270 | if (cmd_count <= MAX_PIPELINE) then |
| 271 | call parse_single_command(working_input(start:i-1), temp_commands(cmd_count)) |
| 272 | temp_commands(cmd_count)%separator = SEP_OR |
| 273 | end if |
| 274 | start = i + 2 |
| 275 | i = i + 2 |
| 276 | cycle |
| 277 | end if |
| 278 | end if |
| 279 | |
| 280 | block |
| 281 | logical :: pipe_not_after_pipe, pipe_not_after_gt, pipe_not_before_pipe |
| 282 | if (i == 1) then |
| 283 | pipe_not_after_pipe = .true. |
| 284 | pipe_not_after_gt = .true. |
| 285 | else |
| 286 | pipe_not_after_pipe = (working_input(i-1:i-1) /= '|') |
| 287 | pipe_not_after_gt = (working_input(i-1:i-1) /= '>') |
| 288 | end if |
| 289 | if (i == len_trim(working_input)) then |
| 290 | pipe_not_before_pipe = .true. |
| 291 | else |
| 292 | pipe_not_before_pipe = (working_input(i+1:i+1) /= '|') |
| 293 | end if |
| 294 | if (working_input(i:i) == '|' .and. pipe_not_after_pipe .and. & |
| 295 | pipe_not_after_gt .and. pipe_not_before_pipe) then |
| 296 | ! Don't split on | if we're in a case pattern (after 'case...in') or inside parentheses (subshell) |
| 297 | if (.not. after_case_in .and. paren_depth == 0) then |
| 298 | cmd_count = cmd_count + 1 |
| 299 | if (cmd_count <= MAX_PIPELINE) then |
| 300 | call parse_single_command(working_input(start:i-1), temp_commands(cmd_count)) |
| 301 | temp_commands(cmd_count)%separator = SEP_PIPE |
| 302 | end if |
| 303 | start = i + 1 |
| 304 | end if |
| 305 | else if (working_input(i:i) == '&') then |
| 306 | ! Check it's not part of && (which is already handled above) |
| 307 | ! Also check it's not part of >& or <& (FD redirection) |
| 308 | block |
| 309 | logical :: amp_not_after_special, amp_not_before_amp |
| 310 | if (i == 1) then |
| 311 | amp_not_after_special = .true. |
| 312 | else |
| 313 | amp_not_after_special = (working_input(i-1:i-1) /= '&' .and. & |
| 314 | working_input(i-1:i-1) /= '>' .and. & |
| 315 | working_input(i-1:i-1) /= '<') |
| 316 | end if |
| 317 | if (i == len_trim(working_input)) then |
| 318 | amp_not_before_amp = .true. |
| 319 | else |
| 320 | amp_not_before_amp = (working_input(i+1:i+1) /= '&') |
| 321 | end if |
| 322 | if (amp_not_after_special .and. amp_not_before_amp) then |
| 323 | ! Single & - mark current command as background and split |
| 324 | cmd_count = cmd_count + 1 |
| 325 | if (cmd_count <= MAX_PIPELINE) then |
| 326 | call parse_single_command(working_input(start:i-1), temp_commands(cmd_count)) |
| 327 | temp_commands(cmd_count)%separator = SEP_SEMICOLON ! Execute like semicolon |
| 328 | temp_commands(cmd_count)%background = .true. ! But run in background |
| 329 | end if |
| 330 | start = i + 1 |
| 331 | end if |
| 332 | end block ! amp_not_after_special bounds-safe check |
| 333 | else if (working_input(i:i) == ';') then |
| 334 | ! Check for ;; (double semicolon) which is used in case statements |
| 335 | if (i < len_trim(working_input) .and. working_input(i+1:i+1) == ';') then |
| 336 | ! This is ;; - inside case statements it's a pattern terminator |
| 337 | if (case_depth > 0) then |
| 338 | cmd_count = cmd_count + 1 |
| 339 | if (cmd_count <= MAX_PIPELINE) then |
| 340 | call parse_single_command(working_input(start:i-1), temp_commands(cmd_count)) |
| 341 | temp_commands(cmd_count)%separator = SEP_SEMICOLON |
| 342 | end if |
| 343 | start = i + 2 ! Skip both semicolons |
| 344 | i = i + 1 ! Will be incremented again at end of loop |
| 345 | after_case_in = .false. ! Reset after ;;, ready for next pattern |
| 346 | else |
| 347 | ! POSIX: ;; outside case is treated as two semicolons (null command + separator) |
| 348 | ! Parse command before first semicolon (if any) |
| 349 | if (i > start) then |
| 350 | cmd_count = cmd_count + 1 |
| 351 | if (cmd_count <= MAX_PIPELINE) then |
| 352 | call parse_single_command(working_input(start:i-1), temp_commands(cmd_count)) |
| 353 | temp_commands(cmd_count)%separator = SEP_SEMICOLON |
| 354 | end if |
| 355 | end if |
| 356 | start = i + 2 ! Skip both semicolons |
| 357 | i = i + 1 ! Will be incremented again at end of loop |
| 358 | end if |
| 359 | ! Only split on single semicolon if not inside for (( ... )), function braces, subshells, or case statement |
| 360 | else if (in_for_arith .or. brace_depth > 0 .or. paren_depth > 0 .or. case_depth > 0) then |
| 361 | ! Skip - we're inside for (( ... )), function { ... }, subshell ( ... ), or case...esac |
| 362 | else |
| 363 | ! POSIX: semicolon at start is a null command - just skip it |
| 364 | if (i == start) then |
| 365 | start = i + 1 |
| 366 | else |
| 367 | cmd_count = cmd_count + 1 |
| 368 | if (cmd_count <= MAX_PIPELINE) then |
| 369 | call parse_single_command(working_input(start:i-1), temp_commands(cmd_count)) |
| 370 | temp_commands(cmd_count)%separator = SEP_SEMICOLON |
| 371 | end if |
| 372 | start = i + 1 |
| 373 | end if |
| 374 | end if |
| 375 | end if |
| 376 | end block ! pipe_not_after_pipe bounds-safe check |
| 377 | end if |
| 378 | |
| 379 | i = i + 1 |
| 380 | end do |
| 381 | |
| 382 | ! Don't forget the last command |
| 383 | if (start <= len_trim(working_input)) then |
| 384 | cmd_count = cmd_count + 1 |
| 385 | if (cmd_count <= MAX_PIPELINE) then |
| 386 | call parse_single_command(working_input(start:), temp_commands(cmd_count)) |
| 387 | temp_commands(cmd_count)%separator = SEP_NONE |
| 388 | end if |
| 389 | end if |
| 390 | |
| 391 | ! Set background flag for last command |
| 392 | if (cmd_count > 0 .and. background) then |
| 393 | temp_commands(cmd_count)%background = .true. |
| 394 | end if |
| 395 | |
| 396 | ! Copy to pipeline with explicit token copying |
| 397 | pipeline%num_commands = cmd_count |
| 398 | if (cmd_count > 0) then |
| 399 | allocate(pipeline%commands(cmd_count)) |
| 400 | do i = 1, cmd_count |
| 401 | ! Copy non-allocatable components |
| 402 | pipeline%commands(i)%num_tokens = temp_commands(i)%num_tokens |
| 403 | pipeline%commands(i)%append_output = temp_commands(i)%append_output |
| 404 | pipeline%commands(i)%append_error = temp_commands(i)%append_error |
| 405 | pipeline%commands(i)%force_clobber = temp_commands(i)%force_clobber |
| 406 | pipeline%commands(i)%redirect_stderr_to_stdout = temp_commands(i)%redirect_stderr_to_stdout |
| 407 | pipeline%commands(i)%redirect_stdout_to_stderr = temp_commands(i)%redirect_stdout_to_stderr |
| 408 | pipeline%commands(i)%redirect_both_to_file = temp_commands(i)%redirect_both_to_file |
| 409 | pipeline%commands(i)%background = temp_commands(i)%background |
| 410 | pipeline%commands(i)%separator = temp_commands(i)%separator |
| 411 | pipeline%commands(i)%is_command_group = temp_commands(i)%is_command_group |
| 412 | pipeline%commands(i)%is_subshell = temp_commands(i)%is_subshell |
| 413 | |
| 414 | ! Copy redirection array |
| 415 | pipeline%commands(i)%num_redirections = temp_commands(i)%num_redirections |
| 416 | pipeline%commands(i)%redirections = temp_commands(i)%redirections |
| 417 | |
| 418 | ! Copy prefix assignments (VAR=value command) |
| 419 | pipeline%commands(i)%num_prefix_assignments = temp_commands(i)%num_prefix_assignments |
| 420 | if (allocated(temp_commands(i)%prefix_assignments) .and. & |
| 421 | temp_commands(i)%num_prefix_assignments > 0) then |
| 422 | pipeline%commands(i)%prefix_assignments = temp_commands(i)%prefix_assignments |
| 423 | end if |
| 424 | |
| 425 | ! Copy allocatable components explicitly |
| 426 | if (allocated(temp_commands(i)%tokens)) then |
| 427 | allocate(character(len=MAX_TOKEN_LEN) :: pipeline%commands(i)%tokens(temp_commands(i)%num_tokens)) |
| 428 | pipeline%commands(i)%tokens = temp_commands(i)%tokens |
| 429 | end if |
| 430 | |
| 431 | if (allocated(temp_commands(i)%input_file)) then |
| 432 | pipeline%commands(i)%input_file = temp_commands(i)%input_file |
| 433 | end if |
| 434 | if (allocated(temp_commands(i)%output_file)) then |
| 435 | pipeline%commands(i)%output_file = temp_commands(i)%output_file |
| 436 | end if |
| 437 | if (allocated(temp_commands(i)%error_file)) then |
| 438 | pipeline%commands(i)%error_file = temp_commands(i)%error_file |
| 439 | end if |
| 440 | if (allocated(temp_commands(i)%heredoc_delimiter)) then |
| 441 | pipeline%commands(i)%heredoc_delimiter = temp_commands(i)%heredoc_delimiter |
| 442 | end if |
| 443 | if (allocated(temp_commands(i)%heredoc_content)) then |
| 444 | pipeline%commands(i)%heredoc_content = temp_commands(i)%heredoc_content |
| 445 | end if |
| 446 | ! Copy heredoc quoted flag |
| 447 | pipeline%commands(i)%heredoc_quoted = temp_commands(i)%heredoc_quoted |
| 448 | |
| 449 | if (allocated(temp_commands(i)%here_string)) then |
| 450 | pipeline%commands(i)%here_string = temp_commands(i)%here_string |
| 451 | end if |
| 452 | if (allocated(temp_commands(i)%group_content)) then |
| 453 | pipeline%commands(i)%group_content = temp_commands(i)%group_content |
| 454 | end if |
| 455 | if (allocated(temp_commands(i)%subshell_content)) then |
| 456 | pipeline%commands(i)%subshell_content = temp_commands(i)%subshell_content |
| 457 | end if |
| 458 | end do |
| 459 | end if |
| 460 | |
| 461 | deallocate(temp_commands) |
| 462 | call track_deallocation(MAX_PIPELINE * 1024, 'temp_commands') |
| 463 | |
| 464 | ! End performance timing |
| 465 | call end_timer('parse_pipeline', parse_start_time, total_parse_time) |
| 466 | total_commands = total_commands + 1 |
| 467 | |
| 468 | ! Trigger auto memory optimization periodically |
| 469 | if (mod(total_commands, 50_int64) == 0) then |
| 470 | call auto_optimize_memory() |
| 471 | end if |
| 472 | end subroutine |
| 473 | |
| 474 | subroutine parse_single_command(input, cmd) |
| 475 | character(len=*), intent(in) :: input |
| 476 | type(command_t), intent(out) :: cmd |
| 477 | |
| 478 | character(len=len(input)) :: working_input |
| 479 | integer :: pos, end_pos, source_fd |
| 480 | character(len=MAX_TOKEN_LEN) :: temp_str |
| 481 | |
| 482 | |
| 483 | working_input = adjustl(input) |
| 484 | ! write(error_unit, '(a,a)') 'DEBUG parse_single_command input: ', trim(working_input) |
| 485 | |
| 486 | ! Handle subshell grouping ( ... ) |
| 487 | ! Check if input starts with ( and ends with ) |
| 488 | if (len_trim(working_input) >= 3) then |
| 489 | if (working_input(1:1) == '(') then |
| 490 | ! Find the position of the closing ) |
| 491 | pos = len_trim(working_input) |
| 492 | if (working_input(pos:pos) == ')') then |
| 493 | ! This is a subshell - mark it and store the inner content |
| 494 | cmd%is_subshell = .true. |
| 495 | cmd%subshell_content = adjustl(working_input(2:pos-1)) |
| 496 | ! Don't tokenize the content - it will be re-parsed during execution |
| 497 | cmd%num_tokens = 0 |
| 498 | return |
| 499 | end if |
| 500 | end if |
| 501 | end if |
| 502 | |
| 503 | ! Handle command grouping { ... } |
| 504 | ! Check if input starts with { and ends with } |
| 505 | if (len_trim(working_input) >= 3) then |
| 506 | if (working_input(1:1) == '{') then |
| 507 | ! Find the position of the closing } |
| 508 | pos = len_trim(working_input) |
| 509 | if (working_input(pos:pos) == '}') then |
| 510 | ! This is a command group - mark it and store the inner content |
| 511 | cmd%is_command_group = .true. |
| 512 | cmd%group_content = adjustl(working_input(2:pos-1)) |
| 513 | ! Don't tokenize the content - it will be re-parsed during execution |
| 514 | cmd%num_tokens = 0 |
| 515 | return |
| 516 | end if |
| 517 | end if |
| 518 | end if |
| 519 | |
| 520 | ! Skip redirection processing for arithmetic commands ((expression)) and for (( loops |
| 521 | if (len_trim(working_input) >= 2 .and. working_input(1:2) == '((') then |
| 522 | ! This is an arithmetic command - tokenize directly without processing redirects |
| 523 | call tokenize_with_substitution(trim(working_input), cmd%tokens, cmd%num_tokens) |
| 524 | return |
| 525 | end if |
| 526 | |
| 527 | ! Also skip redirection processing for arithmetic for loops for((...)) |
| 528 | if (len_trim(working_input) >= 5 .and. working_input(1:5) == 'for((') then |
| 529 | ! This is an arithmetic for loop - tokenize directly without processing redirects |
| 530 | call tokenize_with_substitution(trim(working_input), cmd%tokens, cmd%num_tokens) |
| 531 | return |
| 532 | end if |
| 533 | |
| 534 | ! Also handle 'for ((' with space (standard bash syntax) |
| 535 | if (len_trim(working_input) >= 6 .and. working_input(1:6) == 'for ((') then |
| 536 | ! This is an arithmetic for loop with space - tokenize directly without processing redirects |
| 537 | call tokenize_with_substitution(trim(working_input), cmd%tokens, cmd%num_tokens) |
| 538 | return |
| 539 | end if |
| 540 | |
| 541 | ! Check for here-string (<<<) - must come before here document |
| 542 | pos = find_outside_quotes(working_input, '<<<') |
| 543 | if (pos > 0) then |
| 544 | call extract_filename(working_input(pos+3:), temp_str) |
| 545 | cmd%here_string = trim(temp_str) |
| 546 | working_input = working_input(:pos-1) |
| 547 | else |
| 548 | ! Check for here document (<<) |
| 549 | pos = find_outside_quotes(working_input, '<<') |
| 550 | if (pos > 0) then |
| 551 | call extract_word(working_input(pos+2:), temp_str) |
| 552 | ! Strip quotes from delimiter if present and track if it was quoted |
| 553 | cmd%heredoc_delimiter = trim(temp_str) |
| 554 | cmd%heredoc_quoted = strip_heredoc_delimiter_quotes(cmd%heredoc_delimiter) |
| 555 | ! Try to extract heredoc content from input if it contains newlines |
| 556 | call extract_heredoc_from_input(input, trim(cmd%heredoc_delimiter), cmd%heredoc_content) |
| 557 | working_input = working_input(:pos-1) |
| 558 | end if |
| 559 | end if |
| 560 | |
| 561 | ! Check for specific 2>&1 redirection (stderr to stdout) - keep this as it's common |
| 562 | ! and is best handled specially |
| 563 | pos = find_outside_quotes(working_input, '2>&1') |
| 564 | if (pos > 0) then |
| 565 | cmd%redirect_stderr_to_stdout = .true. |
| 566 | working_input = working_input(:pos-1) // ' ' // working_input(pos+4:) |
| 567 | end if |
| 568 | |
| 569 | ! Note: >&2, 1>&2, and other FD redirections are now handled by the general |
| 570 | ! FD duplication code below |
| 571 | |
| 572 | ! Check for variable FD redirections >&${var} and <&${var} |
| 573 | pos = find_outside_quotes(working_input, '>&$') |
| 574 | if (pos > 0 .and. pos + 3 <= len_trim(working_input)) then |
| 575 | if (working_input(pos+3:pos+3) == '{') then |
| 576 | ! Found >&${...} pattern |
| 577 | end_pos = index(working_input(pos+4:), '}') |
| 578 | if (end_pos > 0) then |
| 579 | end_pos = pos + 3 + end_pos ! Adjust for full string position |
| 580 | ! Store variable expression and add redirection |
| 581 | if (cmd%num_redirections < 10) then |
| 582 | cmd%num_redirections = cmd%num_redirections + 1 |
| 583 | cmd%redirections(cmd%num_redirections)%type = REDIR_DUP_OUT |
| 584 | cmd%redirections(cmd%num_redirections)%fd = 1 ! Default stdout |
| 585 | cmd%redirections(cmd%num_redirections)%target_fd = -1 ! Will be resolved at runtime |
| 586 | cmd%redirections(cmd%num_redirections)%target_fd_expr = working_input(pos+2:end_pos) ! Include ${...} |
| 587 | ! Remove from working input |
| 588 | working_input = working_input(:pos-1) // ' ' // working_input(end_pos+1:) |
| 589 | end if |
| 590 | end if |
| 591 | end if |
| 592 | end if |
| 593 | |
| 594 | pos = find_outside_quotes(working_input, '<&$') |
| 595 | if (pos > 0 .and. pos + 3 <= len_trim(working_input)) then |
| 596 | if (working_input(pos+3:pos+3) == '{') then |
| 597 | ! Found <&${...} pattern |
| 598 | end_pos = index(working_input(pos+4:), '}') |
| 599 | if (end_pos > 0) then |
| 600 | end_pos = pos + 3 + end_pos ! Adjust for full string position |
| 601 | ! Store variable expression and add redirection |
| 602 | if (cmd%num_redirections < 10) then |
| 603 | cmd%num_redirections = cmd%num_redirections + 1 |
| 604 | cmd%redirections(cmd%num_redirections)%type = REDIR_DUP_IN |
| 605 | cmd%redirections(cmd%num_redirections)%fd = 0 ! Default stdin |
| 606 | cmd%redirections(cmd%num_redirections)%target_fd = -1 ! Will be resolved at runtime |
| 607 | cmd%redirections(cmd%num_redirections)%target_fd_expr = working_input(pos+2:end_pos) ! Include ${...} |
| 608 | ! Remove from working input |
| 609 | working_input = working_input(:pos-1) // ' ' // working_input(end_pos+1:) |
| 610 | end if |
| 611 | end if |
| 612 | end if |
| 613 | end if |
| 614 | |
| 615 | ! Check for general FD duplication >&n FIRST (must come before file redirections) |
| 616 | ! Use find_outside_quotes and iterate to find all patterns |
| 617 | pos = find_outside_quotes(working_input, '>&') |
| 618 | do while (pos > 0) |
| 619 | ! Debug output |
| 620 | ! write(error_unit, '(a,i15,a,a)') 'DEBUG: Found >& at pos ', pos, ' in: ', trim(working_input) |
| 621 | |
| 622 | ! Check what follows >& |
| 623 | if (pos+2 <= len_trim(working_input)) then |
| 624 | ! write(error_unit, '(a,a)') 'DEBUG: Character after >& is: ', working_input(pos+2:pos+2) |
| 625 | |
| 626 | if (working_input(pos+2:pos+2) >= '0' .and. working_input(pos+2:pos+2) <= '9') then |
| 627 | ! Found >&n pattern - literal FD duplication |
| 628 | read(working_input(pos+2:pos+2), *) end_pos |
| 629 | ! write(error_unit, '(a,i15)') 'DEBUG: Processing >&n where n=', end_pos |
| 630 | |
| 631 | ! Check if there's a source FD before >& |
| 632 | source_fd = 1 ! Default to stdout |
| 633 | if (pos > 1 .and. working_input(pos-1:pos-1) >= '0' .and. working_input(pos-1:pos-1) <= '9') then |
| 634 | read(working_input(pos-1:pos-1), *) source_fd |
| 635 | ! write(error_unit, '(a,i15,a,i15)') 'DEBUG: Found source FD=', source_fd, ' redirecting to target FD=', end_pos |
| 636 | end if |
| 637 | |
| 638 | if (cmd%num_redirections < 10) then |
| 639 | cmd%num_redirections = cmd%num_redirections + 1 |
| 640 | cmd%redirections(cmd%num_redirections)%type = REDIR_DUP_OUT |
| 641 | cmd%redirections(cmd%num_redirections)%fd = source_fd |
| 642 | cmd%redirections(cmd%num_redirections)%target_fd = end_pos |
| 643 | end if |
| 644 | ! Remove from working input - also remove source FD if present |
| 645 | if (pos > 1 .and. working_input(pos-1:pos-1) >= '0' .and. working_input(pos-1:pos-1) <= '9') then |
| 646 | working_input = working_input(:pos-2) // ' ' // working_input(pos+3:) |
| 647 | else |
| 648 | working_input = working_input(:pos-1) // ' ' // working_input(pos+3:) |
| 649 | end if |
| 650 | ! write(error_unit, '(a,a)') 'DEBUG: After removal: ', trim(working_input) |
| 651 | ! Search again from beginning since we modified the string |
| 652 | pos = find_outside_quotes(working_input, '>&') |
| 653 | else |
| 654 | ! Not a digit after >&, skip this occurrence and find next |
| 655 | ! write(error_unit, '(a)') 'DEBUG: Not a digit after >&, exiting FD dup handler' |
| 656 | ! For now, just exit - we'll let the general > handler deal with it |
| 657 | exit |
| 658 | end if |
| 659 | else |
| 660 | ! write(error_unit, '(a)') 'DEBUG: No character after >&, exiting' |
| 661 | exit |
| 662 | end if |
| 663 | end do |
| 664 | |
| 665 | ! if (pos == 0) then |
| 666 | ! write(error_unit, '(a)') 'DEBUG: No >& patterns found' |
| 667 | ! end if |
| 668 | |
| 669 | ! Check for <&n patterns |
| 670 | pos = find_outside_quotes(working_input, '<&') |
| 671 | do while (pos > 0) |
| 672 | ! Check what follows <& |
| 673 | if (pos+2 <= len_trim(working_input)) then |
| 674 | if (working_input(pos+2:pos+2) >= '0' .and. working_input(pos+2:pos+2) <= '9') then |
| 675 | ! Found <&n pattern - literal FD duplication |
| 676 | read(working_input(pos+2:pos+2), *) end_pos |
| 677 | if (cmd%num_redirections < 10) then |
| 678 | cmd%num_redirections = cmd%num_redirections + 1 |
| 679 | cmd%redirections(cmd%num_redirections)%type = REDIR_DUP_IN |
| 680 | cmd%redirections(cmd%num_redirections)%fd = 0 ! Default stdin |
| 681 | cmd%redirections(cmd%num_redirections)%target_fd = end_pos |
| 682 | end if |
| 683 | ! Remove from working input |
| 684 | working_input = working_input(:pos-1) // ' ' // working_input(pos+3:) |
| 685 | ! Search again from beginning since we modified the string |
| 686 | pos = find_outside_quotes(working_input, '<&') |
| 687 | else |
| 688 | ! Not a digit after <&, skip this occurrence and find next |
| 689 | ! For now, just exit - we'll let other handlers deal with it |
| 690 | exit |
| 691 | end if |
| 692 | else |
| 693 | exit |
| 694 | end if |
| 695 | end do |
| 696 | |
| 697 | ! Now continue with other redirections |
| 698 | if (.not. (cmd%redirect_stderr_to_stdout)) then |
| 699 | ! Check for &>file or &>>file (both stdout and stderr to file) |
| 700 | pos = find_outside_quotes(working_input, '&>>') |
| 701 | if (pos > 0) then |
| 702 | cmd%redirect_both_to_file = .true. |
| 703 | cmd%append_output = .true. |
| 704 | cmd%append_error = .true. |
| 705 | call extract_filename(working_input(pos+3:), temp_str) |
| 706 | cmd%output_file = trim(temp_str) |
| 707 | cmd%error_file = trim(temp_str) |
| 708 | working_input = working_input(:pos-1) |
| 709 | else |
| 710 | pos = find_outside_quotes(working_input, '&>') |
| 711 | if (pos > 0) then |
| 712 | cmd%redirect_both_to_file = .true. |
| 713 | cmd%append_output = .false. |
| 714 | cmd%append_error = .false. |
| 715 | call extract_filename(working_input(pos+2:), temp_str) |
| 716 | cmd%output_file = trim(temp_str) |
| 717 | cmd%error_file = trim(temp_str) |
| 718 | working_input = working_input(:pos-1) |
| 719 | end if |
| 720 | end if |
| 721 | end if |
| 722 | |
| 723 | ! Check for error redirection (2>>) |
| 724 | pos = find_outside_quotes(working_input, '2>>') |
| 725 | if (pos > 0) then |
| 726 | cmd%append_error = .true. |
| 727 | call extract_filename(working_input(pos+3:), temp_str) |
| 728 | cmd%error_file = trim(temp_str) |
| 729 | working_input = working_input(:pos-1) |
| 730 | else |
| 731 | ! Check for error redirection (2>) |
| 732 | pos = find_outside_quotes(working_input, '2>') |
| 733 | if (pos > 0) then |
| 734 | cmd%append_error = .false. |
| 735 | call extract_filename(working_input(pos+2:), temp_str) |
| 736 | cmd%error_file = trim(temp_str) |
| 737 | working_input = working_input(:pos-1) |
| 738 | end if |
| 739 | end if |
| 740 | |
| 741 | ! Check for force clobber FIRST (>|) before append (>>) |
| 742 | ! This prevents ">|" from matching ">>" |
| 743 | pos = find_outside_quotes(working_input, '>|') |
| 744 | if (pos > 0) then |
| 745 | cmd%append_output = .false. |
| 746 | cmd%force_clobber = .true. |
| 747 | call extract_filename(working_input(pos+2:), temp_str) |
| 748 | cmd%output_file = trim(temp_str) |
| 749 | working_input = working_input(:pos-1) |
| 750 | else |
| 751 | ! Check for output redirection (>>) |
| 752 | pos = find_outside_quotes(working_input, '>>') |
| 753 | if (pos > 0) then |
| 754 | cmd%append_output = .true. |
| 755 | call extract_filename(working_input(pos+2:), temp_str) |
| 756 | cmd%output_file = trim(temp_str) |
| 757 | working_input = working_input(:pos-1) |
| 758 | else |
| 759 | ! Check for output redirection (>) |
| 760 | pos = find_outside_quotes(working_input, '>') |
| 761 | if (pos > 0) then |
| 762 | cmd%append_output = .false. |
| 763 | call extract_filename(working_input(pos+1:), temp_str) |
| 764 | cmd%output_file = trim(temp_str) |
| 765 | working_input = working_input(:pos-1) |
| 766 | end if |
| 767 | end if |
| 768 | end if |
| 769 | |
| 770 | ! Check for input redirection (<) |
| 771 | pos = find_outside_quotes(working_input, '<') |
| 772 | if (pos > 0) then |
| 773 | call extract_filename(working_input(pos+1:), temp_str) |
| 774 | cmd%input_file = trim(temp_str) |
| 775 | working_input = working_input(:pos-1) |
| 776 | end if |
| 777 | |
| 778 | ! Tokenize the remaining command |
| 779 | call tokenize_with_substitution(trim(working_input), cmd%tokens, cmd%num_tokens) |
| 780 | |
| 781 | ! Extract prefix assignments (VAR=value command) |
| 782 | call extract_prefix_assignments(cmd) |
| 783 | |
| 784 | end subroutine |
| 785 | |
| 786 | subroutine extract_filename(input, filename) |
| 787 | character(len=*), intent(in) :: input |
| 788 | character(len=*), intent(out) :: filename |
| 789 | integer :: i |
| 790 | |
| 791 | filename = adjustl(input) |
| 792 | |
| 793 | do i = 1, len_trim(filename) |
| 794 | if (filename(i:i) == ' ' .or. filename(i:i) == char(9)) then |
| 795 | filename = filename(:i-1) |
| 796 | exit |
| 797 | end if |
| 798 | end do |
| 799 | end subroutine |
| 800 | |
| 801 | subroutine extract_word(input, word) |
| 802 | character(len=*), intent(in) :: input |
| 803 | character(len=*), intent(out) :: word |
| 804 | integer :: i |
| 805 | |
| 806 | word = adjustl(input) |
| 807 | |
| 808 | do i = 1, len_trim(word) |
| 809 | if (word(i:i) == ' ' .or. word(i:i) == char(9) .or. word(i:i) == char(10) .or. & |
| 810 | word(i:i) == '<' .or. word(i:i) == '>' .or. & |
| 811 | word(i:i) == '|' .or. word(i:i) == '&' .or. & |
| 812 | word(i:i) == ';') then |
| 813 | word = word(:i-1) |
| 814 | exit |
| 815 | end if |
| 816 | end do |
| 817 | end subroutine |
| 818 | |
| 819 | ! Strip quotes from heredoc delimiter ('EOF' -> EOF, "EOF" -> EOF) |
| 820 | ! Returns .true. if quotes were found and removed |
| 821 | function strip_heredoc_delimiter_quotes(delimiter) result(was_quoted) |
| 822 | character(len=*), intent(inout) :: delimiter |
| 823 | logical :: was_quoted |
| 824 | integer :: len_delim |
| 825 | character(len=1) :: first_char, last_char |
| 826 | |
| 827 | was_quoted = .false. |
| 828 | len_delim = len_trim(delimiter) |
| 829 | if (len_delim < 2) return |
| 830 | |
| 831 | first_char = delimiter(1:1) |
| 832 | last_char = delimiter(len_delim:len_delim) |
| 833 | |
| 834 | ! Check if surrounded by matching quotes |
| 835 | if ((first_char == "'" .and. last_char == "'") .or. & |
| 836 | (first_char == '"' .and. last_char == '"')) then |
| 837 | ! Remove surrounding quotes |
| 838 | delimiter = delimiter(2:len_delim-1) |
| 839 | was_quoted = .true. |
| 840 | end if |
| 841 | end function |
| 842 | |
| 843 | ! Find position of character outside quotes |
| 844 | function find_outside_quotes(str, char) result(pos) |
| 845 | character(len=*), intent(in) :: str, char |
| 846 | integer :: pos |
| 847 | integer :: i |
| 848 | logical :: in_quotes, in_arith |
| 849 | character(len=1) :: quote_char |
| 850 | integer :: arith_depth |
| 851 | |
| 852 | pos = 0 |
| 853 | in_quotes = .false. |
| 854 | in_arith = .false. |
| 855 | quote_char = ' ' |
| 856 | arith_depth = 0 |
| 857 | |
| 858 | do i = 1, len_trim(str) |
| 859 | ! Check for arithmetic expansion start: $(( |
| 860 | if (.not. in_quotes .and. i <= len_trim(str) - 2) then |
| 861 | if (str(i:i+2) == '$((') then |
| 862 | in_arith = .true. |
| 863 | arith_depth = arith_depth + 2 |
| 864 | cycle |
| 865 | end if |
| 866 | end if |
| 867 | |
| 868 | ! Track parentheses inside arithmetic expressions |
| 869 | if (in_arith .and. .not. in_quotes) then |
| 870 | if (str(i:i) == '(') then |
| 871 | arith_depth = arith_depth + 1 |
| 872 | else if (str(i:i) == ')') then |
| 873 | arith_depth = arith_depth - 1 |
| 874 | if (arith_depth == 0) then |
| 875 | in_arith = .false. |
| 876 | end if |
| 877 | end if |
| 878 | end if |
| 879 | |
| 880 | if (.not. in_quotes .and. .not. in_arith) then |
| 881 | if (str(i:i) == '"' .or. str(i:i) == "'") then |
| 882 | in_quotes = .true. |
| 883 | quote_char = str(i:i) |
| 884 | else if (str(i:min(i+len(char)-1, len_trim(str))) == char) then |
| 885 | pos = i |
| 886 | return |
| 887 | end if |
| 888 | else if (in_quotes) then |
| 889 | if (str(i:i) == quote_char) then |
| 890 | in_quotes = .false. |
| 891 | end if |
| 892 | end if |
| 893 | end do |
| 894 | end function |
| 895 | |
| 896 | subroutine tokenize_with_substitution(input, tokens, num_tokens) |
| 897 | character(len=*), intent(in) :: input |
| 898 | character(len=:), allocatable, intent(out) :: tokens(:) |
| 899 | integer, intent(out) :: num_tokens |
| 900 | |
| 901 | character(len=len(input)) :: working_copy |
| 902 | integer :: pos, start, token_count, i |
| 903 | character(len=MAX_TOKEN_LEN), allocatable :: temp_tokens(:) |
| 904 | logical :: in_quotes, in_arith, in_array_literal, in_cmd_subst, escaped |
| 905 | character :: quote_char |
| 906 | integer :: arith_depth, array_depth, cmd_depth |
| 907 | |
| 908 | working_copy = adjustl(input) |
| 909 | if (len_trim(working_copy) == 0) then |
| 910 | num_tokens = 0 |
| 911 | return |
| 912 | end if |
| 913 | |
| 914 | ! Count tokens first - must track quotes, $(( )), and array literals |
| 915 | token_count = 0 |
| 916 | pos = 1 |
| 917 | do while (pos <= len_trim(working_copy)) |
| 918 | ! Skip whitespace |
| 919 | do while (pos <= len_trim(working_copy) .and. working_copy(pos:pos) == ' ') |
| 920 | pos = pos + 1 |
| 921 | end do |
| 922 | if (pos > len_trim(working_copy)) exit |
| 923 | |
| 924 | ! Found start of token |
| 925 | start = pos ! Initialize start to beginning of token |
| 926 | token_count = token_count + 1 |
| 927 | in_quotes = .false. |
| 928 | in_arith = .false. |
| 929 | arith_depth = 0 |
| 930 | quote_char = ' ' |
| 931 | in_array_literal = .false. |
| 932 | array_depth = 0 |
| 933 | in_cmd_subst = .false. |
| 934 | cmd_depth = 0 |
| 935 | escaped = .false. |
| 936 | |
| 937 | ! Skip to end of token (respecting quotes and arithmetic) |
| 938 | ! Continue past len_trim when inside quotes to preserve trailing spaces |
| 939 | do while (pos <= len_trim(working_copy) .or. (in_quotes .and. pos <= len(working_copy))) |
| 940 | ! Handle backslash escaping outside quotes |
| 941 | if (.not. in_quotes .and. .not. escaped .and. working_copy(pos:pos) == '\') then |
| 942 | escaped = .true. |
| 943 | pos = pos + 1 |
| 944 | cycle |
| 945 | end if |
| 946 | |
| 947 | ! Check for quotes (unless escaped) |
| 948 | if (.not. in_arith .and. .not. escaped) then |
| 949 | if (.not. in_quotes .and. (working_copy(pos:pos) == '"' .or. working_copy(pos:pos) == "'")) then |
| 950 | in_quotes = .true. |
| 951 | quote_char = working_copy(pos:pos) |
| 952 | else if (in_quotes .and. working_copy(pos:pos) == quote_char) then |
| 953 | in_quotes = .false. |
| 954 | end if |
| 955 | end if |
| 956 | |
| 957 | ! Check for $(( )) arithmetic expansion and (( )) arithmetic command |
| 958 | if (.not. in_quotes .and. .not. escaped) then |
| 959 | ! First, check for special patterns that start arithmetic mode |
| 960 | if (.not. in_arith .and. .not. in_cmd_subst) then |
| 961 | if (pos <= len_trim(working_copy) - 2 .and. working_copy(pos:pos+2) == '$((') then |
| 962 | in_arith = .true. |
| 963 | arith_depth = 2 |
| 964 | pos = pos + 2 ! Skip the $( |
| 965 | else if (pos <= len_trim(working_copy) - 1 .and. working_copy(pos:pos+1) == '$(') then |
| 966 | ! $( command substitution - but NOT $(( |
| 967 | in_cmd_subst = .true. |
| 968 | cmd_depth = 1 |
| 969 | pos = pos + 1 ! Skip the $( |
| 970 | else if (pos == start .and. pos <= len_trim(working_copy) - 1 .and. & |
| 971 | working_copy(pos:pos+1) == '((') then |
| 972 | ! (( at start of token - arithmetic command |
| 973 | in_arith = .true. |
| 974 | arith_depth = 2 |
| 975 | pos = pos + 1 ! Skip the first ( |
| 976 | else if (pos == start+3 .and. start <= len_trim(working_copy) - 4 .and. & |
| 977 | working_copy(start:start+2) == 'for' .and. & |
| 978 | working_copy(pos:pos+1) == '((') then |
| 979 | ! for(( - arithmetic for loop |
| 980 | in_arith = .true. |
| 981 | arith_depth = 0 ! Will be counted below |
| 982 | end if |
| 983 | end if |
| 984 | |
| 985 | ! Then, track parentheses depth if in arithmetic mode |
| 986 | if (in_arith) then |
| 987 | if (working_copy(pos:pos) == '(') then |
| 988 | arith_depth = arith_depth + 1 |
| 989 | else if (working_copy(pos:pos) == ')') then |
| 990 | arith_depth = arith_depth - 1 |
| 991 | if (arith_depth == 0) then |
| 992 | in_arith = .false. |
| 993 | end if |
| 994 | end if |
| 995 | end if |
| 996 | |
| 997 | ! Track parentheses depth if in command substitution mode |
| 998 | if (in_cmd_subst) then |
| 999 | if (working_copy(pos:pos) == '(') then |
| 1000 | cmd_depth = cmd_depth + 1 |
| 1001 | else if (working_copy(pos:pos) == ')') then |
| 1002 | cmd_depth = cmd_depth - 1 |
| 1003 | if (cmd_depth == 0) then |
| 1004 | in_cmd_subst = .false. |
| 1005 | end if |
| 1006 | end if |
| 1007 | end if |
| 1008 | end if |
| 1009 | |
| 1010 | ! Check for array literal: var=(...) |
| 1011 | if (.not. in_quotes .and. .not. in_arith) then |
| 1012 | if (pos > 1 .and. pos <= len_trim(working_copy) - 1 .and. & |
| 1013 | working_copy(pos-1:pos) == '=(') then |
| 1014 | ! Start of array literal |
| 1015 | in_array_literal = .true. |
| 1016 | array_depth = 1 |
| 1017 | else if (in_array_literal) then |
| 1018 | if (working_copy(pos:pos) == '(') then |
| 1019 | array_depth = array_depth + 1 |
| 1020 | else if (working_copy(pos:pos) == ')') then |
| 1021 | array_depth = array_depth - 1 |
| 1022 | if (array_depth == 0) in_array_literal = .false. |
| 1023 | end if |
| 1024 | end if |
| 1025 | end if |
| 1026 | |
| 1027 | ! Check for token boundary (space outside quotes/arithmetic/array/command-subst, and not escaped) |
| 1028 | if (.not. in_quotes .and. .not. in_arith .and. .not. in_array_literal .and. & |
| 1029 | .not. in_cmd_subst .and. .not. escaped .and. working_copy(pos:pos) == ' ') exit |
| 1030 | |
| 1031 | ! Clear escaped flag after processing the escaped character |
| 1032 | if (escaped) escaped = .false. |
| 1033 | |
| 1034 | pos = pos + 1 |
| 1035 | end do |
| 1036 | end do |
| 1037 | |
| 1038 | num_tokens = token_count |
| 1039 | if (num_tokens == 0) return |
| 1040 | |
| 1041 | ! Allocate temporary storage |
| 1042 | allocate(temp_tokens(num_tokens)) |
| 1043 | |
| 1044 | ! Extract tokens into temporary array (same logic as counting) |
| 1045 | pos = 1 |
| 1046 | token_count = 0 |
| 1047 | do while (pos <= len_trim(working_copy) .and. token_count < num_tokens) |
| 1048 | ! Skip whitespace |
| 1049 | do while (pos <= len_trim(working_copy) .and. working_copy(pos:pos) == ' ') |
| 1050 | pos = pos + 1 |
| 1051 | end do |
| 1052 | if (pos > len_trim(working_copy)) exit |
| 1053 | |
| 1054 | start = pos |
| 1055 | in_quotes = .false. |
| 1056 | in_arith = .false. |
| 1057 | arith_depth = 0 |
| 1058 | quote_char = ' ' |
| 1059 | in_array_literal = .false. |
| 1060 | array_depth = 0 |
| 1061 | in_cmd_subst = .false. |
| 1062 | cmd_depth = 0 |
| 1063 | escaped = .false. |
| 1064 | |
| 1065 | ! Find end of token (respecting quotes, arithmetic, and array literals) |
| 1066 | ! Continue past len_trim when inside quotes to preserve trailing spaces |
| 1067 | do while (pos <= len_trim(working_copy) .or. (in_quotes .and. pos <= len(working_copy))) |
| 1068 | ! Handle backslash escaping outside quotes |
| 1069 | if (.not. in_quotes .and. .not. escaped .and. working_copy(pos:pos) == '\') then |
| 1070 | escaped = .true. |
| 1071 | pos = pos + 1 |
| 1072 | cycle |
| 1073 | end if |
| 1074 | |
| 1075 | ! Check for quotes (unless escaped) |
| 1076 | if (.not. in_arith .and. .not. escaped) then |
| 1077 | if (.not. in_quotes .and. (working_copy(pos:pos) == '"' .or. working_copy(pos:pos) == "'")) then |
| 1078 | in_quotes = .true. |
| 1079 | quote_char = working_copy(pos:pos) |
| 1080 | else if (in_quotes .and. working_copy(pos:pos) == quote_char) then |
| 1081 | in_quotes = .false. |
| 1082 | end if |
| 1083 | end if |
| 1084 | |
| 1085 | ! Check for $(( )) arithmetic expansion and (( )) arithmetic command |
| 1086 | if (.not. in_quotes .and. .not. escaped) then |
| 1087 | ! First, check for special patterns that start arithmetic mode |
| 1088 | if (.not. in_arith .and. .not. in_cmd_subst) then |
| 1089 | if (pos <= len_trim(working_copy) - 2 .and. working_copy(pos:pos+2) == '$((') then |
| 1090 | in_arith = .true. |
| 1091 | arith_depth = 2 |
| 1092 | pos = pos + 2 ! Skip the $( |
| 1093 | else if (pos <= len_trim(working_copy) - 1 .and. working_copy(pos:pos+1) == '$(') then |
| 1094 | ! $( command substitution - but NOT $(( |
| 1095 | in_cmd_subst = .true. |
| 1096 | cmd_depth = 1 |
| 1097 | pos = pos + 1 ! Skip the $( |
| 1098 | else if (pos == start .and. pos <= len_trim(working_copy) - 1 .and. & |
| 1099 | working_copy(pos:pos+1) == '((') then |
| 1100 | ! (( at start of token - arithmetic command |
| 1101 | in_arith = .true. |
| 1102 | arith_depth = 2 |
| 1103 | pos = pos + 1 ! Skip the first ( |
| 1104 | else if (pos == start+3 .and. start <= len_trim(working_copy) - 4 .and. & |
| 1105 | working_copy(start:start+2) == 'for' .and. & |
| 1106 | working_copy(pos:pos+1) == '((') then |
| 1107 | ! for(( - arithmetic for loop |
| 1108 | in_arith = .true. |
| 1109 | arith_depth = 0 ! Will be counted below |
| 1110 | end if |
| 1111 | end if |
| 1112 | |
| 1113 | ! Then, track parentheses depth if in arithmetic mode |
| 1114 | if (in_arith) then |
| 1115 | if (working_copy(pos:pos) == '(') then |
| 1116 | arith_depth = arith_depth + 1 |
| 1117 | else if (working_copy(pos:pos) == ')') then |
| 1118 | arith_depth = arith_depth - 1 |
| 1119 | if (arith_depth == 0) then |
| 1120 | in_arith = .false. |
| 1121 | end if |
| 1122 | end if |
| 1123 | end if |
| 1124 | |
| 1125 | ! Track parentheses depth if in command substitution mode |
| 1126 | if (in_cmd_subst) then |
| 1127 | if (working_copy(pos:pos) == '(') then |
| 1128 | cmd_depth = cmd_depth + 1 |
| 1129 | else if (working_copy(pos:pos) == ')') then |
| 1130 | cmd_depth = cmd_depth - 1 |
| 1131 | if (cmd_depth == 0) then |
| 1132 | in_cmd_subst = .false. |
| 1133 | end if |
| 1134 | end if |
| 1135 | end if |
| 1136 | end if |
| 1137 | |
| 1138 | ! Check for array literal: var=(...) |
| 1139 | if (.not. in_quotes .and. .not. in_arith) then |
| 1140 | if (pos > 1 .and. pos <= len_trim(working_copy) - 1 .and. & |
| 1141 | working_copy(pos-1:pos) == '=(') then |
| 1142 | ! Start of array literal |
| 1143 | in_array_literal = .true. |
| 1144 | array_depth = 1 |
| 1145 | else if (in_array_literal) then |
| 1146 | if (working_copy(pos:pos) == '(') then |
| 1147 | array_depth = array_depth + 1 |
| 1148 | else if (working_copy(pos:pos) == ')') then |
| 1149 | array_depth = array_depth - 1 |
| 1150 | if (array_depth == 0) in_array_literal = .false. |
| 1151 | end if |
| 1152 | end if |
| 1153 | end if |
| 1154 | |
| 1155 | ! Check for token boundary (space outside quotes/arithmetic/array/command-subst, and not escaped) |
| 1156 | if (.not. in_quotes .and. .not. in_arith .and. .not. in_array_literal .and. & |
| 1157 | .not. in_cmd_subst .and. .not. escaped .and. working_copy(pos:pos) == ' ') exit |
| 1158 | |
| 1159 | ! Clear escaped flag after processing the escaped character |
| 1160 | if (escaped) escaped = .false. |
| 1161 | |
| 1162 | pos = pos + 1 |
| 1163 | end do |
| 1164 | |
| 1165 | ! Store token (DON'T strip quotes yet - expand_variables needs to see them) |
| 1166 | ! DON'T process backslash escapes yet - glob expansion needs to see them |
| 1167 | token_count = token_count + 1 |
| 1168 | temp_tokens(token_count) = working_copy(start:pos-1) |
| 1169 | end do |
| 1170 | |
| 1171 | ! Now allocate the final deferred-length character array |
| 1172 | ! We'll use MAX_TOKEN_LEN as a uniform length for now |
| 1173 | allocate(character(len=MAX_TOKEN_LEN) :: tokens(num_tokens)) |
| 1174 | do i = 1, num_tokens |
| 1175 | tokens(i) = temp_tokens(i) |
| 1176 | end do |
| 1177 | |
| 1178 | deallocate(temp_tokens) |
| 1179 | end subroutine |
| 1180 | |
| 1181 | ! Helper function to strip outer quotes from a token |
| 1182 | function strip_outer_quotes(token) result(stripped) |
| 1183 | character(len=*), intent(in) :: token |
| 1184 | character(len=len(token)) :: stripped |
| 1185 | integer :: token_len |
| 1186 | |
| 1187 | stripped = token |
| 1188 | token_len = len_trim(token) |
| 1189 | |
| 1190 | ! Check if token has matching outer quotes |
| 1191 | if (token_len >= 2) then |
| 1192 | ! Check for double quotes |
| 1193 | if (token(1:1) == '"' .and. token(token_len:token_len) == '"') then |
| 1194 | stripped = token(2:token_len-1) |
| 1195 | return |
| 1196 | end if |
| 1197 | ! Check for single quotes |
| 1198 | if (token(1:1) == "'" .and. token(token_len:token_len) == "'") then |
| 1199 | stripped = token(2:token_len-1) |
| 1200 | return |
| 1201 | end if |
| 1202 | end if |
| 1203 | end function |
| 1204 | |
| 1205 | ! Helper function to strip ALL quotes from a token (for adjacent quotes like "a"b"c") |
| 1206 | function strip_all_quotes(token) result(stripped) |
| 1207 | character(len=*), intent(in) :: token |
| 1208 | character(len=len(token)) :: stripped |
| 1209 | integer :: i, j, token_len |
| 1210 | logical :: in_single_quote, in_double_quote |
| 1211 | |
| 1212 | stripped = '' |
| 1213 | j = 1 |
| 1214 | token_len = len_trim(token) |
| 1215 | in_single_quote = .false. |
| 1216 | in_double_quote = .false. |
| 1217 | |
| 1218 | do i = 1, token_len |
| 1219 | if (token(i:i) == "'" .and. .not. in_double_quote) then |
| 1220 | ! Toggle single quote mode |
| 1221 | in_single_quote = .not. in_single_quote |
| 1222 | ! Don't include the quote character itself |
| 1223 | else if (token(i:i) == '"' .and. .not. in_single_quote) then |
| 1224 | ! Toggle double quote mode |
| 1225 | in_double_quote = .not. in_double_quote |
| 1226 | ! Don't include the quote character itself |
| 1227 | else |
| 1228 | ! Regular character - include it |
| 1229 | stripped(j:j) = token(i:i) |
| 1230 | j = j + 1 |
| 1231 | end if |
| 1232 | end do |
| 1233 | end function |
| 1234 | |
| 1235 | ! Helper function to process backslash escape sequences outside quotes |
| 1236 | function process_escapes(token) result(processed) |
| 1237 | character(len=*), intent(in) :: token |
| 1238 | character(len=len(token)) :: processed |
| 1239 | integer :: i, j, token_len |
| 1240 | logical :: in_quotes |
| 1241 | character :: quote_char |
| 1242 | |
| 1243 | processed = '' |
| 1244 | i = 1 |
| 1245 | j = 1 |
| 1246 | token_len = len_trim(token) |
| 1247 | in_quotes = .false. |
| 1248 | quote_char = ' ' |
| 1249 | |
| 1250 | do while (i <= token_len) |
| 1251 | ! Track quotes |
| 1252 | if (.not. in_quotes .and. (token(i:i) == '"' .or. token(i:i) == "'")) then |
| 1253 | in_quotes = .true. |
| 1254 | quote_char = token(i:i) |
| 1255 | processed(j:j) = token(i:i) |
| 1256 | j = j + 1 |
| 1257 | i = i + 1 |
| 1258 | else if (in_quotes .and. token(i:i) == quote_char) then |
| 1259 | in_quotes = .false. |
| 1260 | processed(j:j) = token(i:i) |
| 1261 | j = j + 1 |
| 1262 | i = i + 1 |
| 1263 | else if (.not. in_quotes .and. token(i:i) == '\' .and. i < token_len) then |
| 1264 | ! Backslash escape outside quotes - skip the backslash, keep the next char |
| 1265 | i = i + 1 |
| 1266 | processed(j:j) = token(i:i) |
| 1267 | j = j + 1 |
| 1268 | i = i + 1 |
| 1269 | else |
| 1270 | ! Regular character |
| 1271 | processed(j:j) = token(i:i) |
| 1272 | j = j + 1 |
| 1273 | i = i + 1 |
| 1274 | end if |
| 1275 | end do |
| 1276 | end function |
| 1277 | |
| 1278 | subroutine expand_variables(token, expanded, shell, was_quoted_in) |
| 1279 | use expansion, only: expand_braces, arithmetic_expansion_shell, process_param_expansion |
| 1280 | character(len=*), intent(in) :: token |
| 1281 | character(len=:), allocatable, intent(out) :: expanded |
| 1282 | type(shell_state_t), intent(inout) :: shell |
| 1283 | logical, intent(in), optional :: was_quoted_in |
| 1284 | |
| 1285 | character(len=:), allocatable :: result, working_token |
| 1286 | integer :: i, j, var_start, brace_depth, end_pos |
| 1287 | integer :: result_cap |
| 1288 | character(len=256) :: var_name ! Variable names are short; was MAX_TOKEN_LEN (4096) |
| 1289 | character(len=:), allocatable :: var_value, brace_expanded |
| 1290 | character(len=20) :: pid_str |
| 1291 | logical :: is_quoted, is_single_quoted |
| 1292 | logical :: escapes_already_processed ! True if lexer already processed escapes |
| 1293 | |
| 1294 | ! Initialize growing result buffer |
| 1295 | result_cap = max(len(token) * 4, 16384) |
| 1296 | allocate(character(len=result_cap) :: result) |
| 1297 | |
| 1298 | ! Check if token was originally quoted (from lexer metadata or token inspection) |
| 1299 | is_quoted = .false. |
| 1300 | is_single_quoted = .false. |
| 1301 | escapes_already_processed = .false. |
| 1302 | |
| 1303 | ! Use the passed parameter if provided, otherwise fall back to checking the token |
| 1304 | if (present(was_quoted_in)) then |
| 1305 | is_quoted = was_quoted_in |
| 1306 | ! We don't track single vs double quotes in metadata, assume double if quoted |
| 1307 | is_single_quoted = .false. |
| 1308 | ! If was_quoted_in=true but token doesn't have outer quotes, the new lexer |
| 1309 | ! already stripped quotes and processed escapes - don't re-process them |
| 1310 | if (is_quoted .and. len_trim(token) >= 2) then |
| 1311 | if (.not. (token(1:1) == '"' .and. token(len_trim(token):len_trim(token)) == '"')) then |
| 1312 | escapes_already_processed = .true. |
| 1313 | end if |
| 1314 | else if (is_quoted .and. len_trim(token) < 2) then |
| 1315 | ! Short quoted token without surrounding quotes - escapes were processed |
| 1316 | escapes_already_processed = .true. |
| 1317 | end if |
| 1318 | else |
| 1319 | ! Legacy: check if token still has quotes (for backward compatibility) |
| 1320 | if (len_trim(token) >= 2) then |
| 1321 | if (token(1:1) == '"' .and. token(len_trim(token):len_trim(token)) == '"') then |
| 1322 | is_quoted = .true. |
| 1323 | else if (token(1:1) == "'" .and. token(len_trim(token):len_trim(token)) == "'") then |
| 1324 | is_quoted = .true. |
| 1325 | is_single_quoted = .true. |
| 1326 | end if |
| 1327 | end if |
| 1328 | end if |
| 1329 | ! Single quotes preserve everything literally - no expansion at all |
| 1330 | if (is_single_quoted) then |
| 1331 | ! Return the token with outer quotes stripped |
| 1332 | expanded = strip_outer_quotes(token) |
| 1333 | return |
| 1334 | end if |
| 1335 | |
| 1336 | ! For double-quoted tokens that are all whitespace (no special chars), |
| 1337 | ! return the whitespace to preserve the argument |
| 1338 | ! This handles cases like " " or " " where len_trim would be 0 |
| 1339 | ! Now that executor passes correct token length, we preserve exact whitespace count |
| 1340 | if (is_quoted .and. len_trim(token) == 0 .and. len(token) > 0) then |
| 1341 | ! Check if this is truly a whitespace token vs a token with quotes at boundaries |
| 1342 | block |
| 1343 | ! If token has quotes at boundaries, don't count them as content - let normal processing handle it |
| 1344 | if (len(token) >= 2) then |
| 1345 | if ((token(1:1) == '"' .and. token(len(token):len(token)) == '"') .or. & |
| 1346 | (token(1:1) == "'" .and. token(len(token):len(token)) == "'")) then |
| 1347 | ! Token is just quotes with possibly empty content - let normal processing handle it |
| 1348 | else |
| 1349 | ! Token is all whitespace (no quotes) - return the whitespace exactly as-is |
| 1350 | expanded = token |
| 1351 | return |
| 1352 | end if |
| 1353 | else |
| 1354 | ! Short token that's all whitespace - return it as-is |
| 1355 | expanded = token |
| 1356 | return |
| 1357 | end if |
| 1358 | end block |
| 1359 | end if |
| 1360 | |
| 1361 | ! Apply brace expansion ONLY if token is not quoted |
| 1362 | if (.not. is_quoted) then |
| 1363 | brace_expanded = expand_braces(token) |
| 1364 | working_token = brace_expanded |
| 1365 | else |
| 1366 | working_token = token |
| 1367 | end if |
| 1368 | |
| 1369 | i = 1 |
| 1370 | j = 1 |
| 1371 | ! For quoted tokens, use len(token) to preserve trailing whitespace |
| 1372 | ! For unquoted tokens, use len_trim() to skip padding |
| 1373 | if (is_quoted) then |
| 1374 | end_pos = len(token) ! Use actual passed token length, not buffer size |
| 1375 | ! If token has actual quote characters (not sentinels), skip them |
| 1376 | ! This handles tokens from the old parser path which don't use sentinels |
| 1377 | if (end_pos >= 2) then |
| 1378 | if (working_token(1:1) == '"' .and. working_token(end_pos:end_pos) == '"') then |
| 1379 | i = 2 ! Skip opening quote |
| 1380 | end_pos = end_pos - 1 ! Skip closing quote |
| 1381 | end if |
| 1382 | end if |
| 1383 | else |
| 1384 | end_pos = len_trim(working_token) |
| 1385 | end if |
| 1386 | |
| 1387 | ! Track if we're inside single-quoted literal region (between char(2) markers) |
| 1388 | block |
| 1389 | logical :: in_single_quote_literal |
| 1390 | in_single_quote_literal = .false. |
| 1391 | |
| 1392 | do while (i <= end_pos) |
| 1393 | ! Grow result buffer if needed (headroom for single-char writes) |
| 1394 | call ensure_result_cap(j + 256) |
| 1395 | ! Check for single-quote literal START sentinel (char(2)) |
| 1396 | if (working_token(i:i) == char(2)) then |
| 1397 | in_single_quote_literal = .true. |
| 1398 | i = i + 1 |
| 1399 | cycle |
| 1400 | end if |
| 1401 | |
| 1402 | ! Check for single-quote literal END sentinel (char(3)) |
| 1403 | if (working_token(i:i) == char(3)) then |
| 1404 | in_single_quote_literal = .false. |
| 1405 | i = i + 1 |
| 1406 | cycle |
| 1407 | end if |
| 1408 | |
| 1409 | ! In single-quoted literal region, copy everything literally (no expansion) |
| 1410 | if (in_single_quote_literal) then |
| 1411 | result(j:j) = working_token(i:i) |
| 1412 | i = i + 1 |
| 1413 | j = j + 1 |
| 1414 | cycle |
| 1415 | end if |
| 1416 | |
| 1417 | ! Check for double-quote boundary sentinel (char(1)) - skip it |
| 1418 | if (working_token(i:i) == char(1)) then |
| 1419 | i = i + 1 |
| 1420 | cycle |
| 1421 | end if |
| 1422 | |
| 1423 | ! Check for backslash escape |
| 1424 | ! Handle \$ and \` even outside quotes since lexer keeps both chars for these |
| 1425 | if (working_token(i:i) == '\' .and. i < end_pos) then |
| 1426 | if (working_token(i+1:i+1) == '$') then |
| 1427 | ! \$ -> literal $ (lexer keeps both chars, we process here) |
| 1428 | i = i + 1 ! Skip backslash |
| 1429 | result(j:j) = '$' |
| 1430 | i = i + 1 |
| 1431 | j = j + 1 |
| 1432 | cycle |
| 1433 | else if (working_token(i+1:i+1) == '`') then |
| 1434 | ! \` -> literal ` (lexer keeps both chars, we process here) |
| 1435 | i = i + 1 ! Skip backslash |
| 1436 | result(j:j) = '`' |
| 1437 | i = i + 1 |
| 1438 | j = j + 1 |
| 1439 | cycle |
| 1440 | else if (is_quoted .and. .not. is_single_quoted .and. .not. escapes_already_processed) then |
| 1441 | ! In double quotes, backslash also escapes: " \ and newline |
| 1442 | ! BUT skip this if lexer already processed escapes (new lexer path) |
| 1443 | ! Note: \$ and \` are handled above because lexer keeps both chars for them |
| 1444 | if (working_token(i+1:i+1) == '"' .or. working_token(i+1:i+1) == '\') then |
| 1445 | ! Skip the backslash and add the escaped character |
| 1446 | i = i + 1 |
| 1447 | result(j:j) = working_token(i:i) |
| 1448 | i = i + 1 |
| 1449 | j = j + 1 |
| 1450 | cycle |
| 1451 | end if |
| 1452 | end if |
| 1453 | ! Otherwise, keep the backslash (it's not escaping anything special) |
| 1454 | end if |
| 1455 | |
| 1456 | ! POSIX: Tilde expansion is NOT performed inside double quotes |
| 1457 | block |
| 1458 | logical :: tilde_at_word_start |
| 1459 | if (i == 1) then |
| 1460 | tilde_at_word_start = .true. |
| 1461 | else |
| 1462 | tilde_at_word_start = (working_token(i-1:i-1) == ' ') |
| 1463 | end if |
| 1464 | if (working_token(i:i) == '~' .and. tilde_at_word_start & |
| 1465 | .and. .not. is_quoted) then |
| 1466 | ! Tilde expansion |
| 1467 | call process_tilde_expansion(working_token, i, result, j, shell) |
| 1468 | else if (working_token(i:i) == '$' .and. i < len_trim(working_token)) then |
| 1469 | i = i + 1 |
| 1470 | |
| 1471 | ! Check for special variables |
| 1472 | if (working_token(i:i) == '?') then |
| 1473 | write(pid_str, '(i15)') shell%last_exit_status |
| 1474 | pid_str = adjustl(pid_str) ! Left-justify to remove leading spaces |
| 1475 | result(j:j+len_trim(pid_str)-1) = trim(pid_str) |
| 1476 | j = j + len_trim(pid_str) |
| 1477 | i = i + 1 |
| 1478 | else if (working_token(i:i) == '$') then |
| 1479 | ! Use shell%shell_pid (set at startup) so $$ returns same value in subshells |
| 1480 | write(pid_str, '(i0)') shell%shell_pid |
| 1481 | pid_str = adjustl(pid_str) ! Left-justify to remove leading spaces |
| 1482 | result(j:j+len_trim(pid_str)-1) = trim(pid_str) |
| 1483 | j = j + len_trim(pid_str) |
| 1484 | i = i + 1 |
| 1485 | else if (working_token(i:i) == '\' .and. i < len_trim(working_token) .and. working_token(i+1:i+1) == '!') then |
| 1486 | ! Handle bash-escaped $\! (bash adds backslash before ! in some contexts) |
| 1487 | i = i + 1 ! Skip the backslash |
| 1488 | write(pid_str, '(i15)') shell%last_bg_pid |
| 1489 | pid_str = adjustl(pid_str) ! Left-justify to remove leading spaces |
| 1490 | result(j:j+len_trim(pid_str)-1) = trim(pid_str) |
| 1491 | j = j + len_trim(pid_str) |
| 1492 | i = i + 1 |
| 1493 | else if (working_token(i:i) == '!') then |
| 1494 | write(pid_str, '(i15)') shell%last_bg_pid |
| 1495 | pid_str = adjustl(pid_str) ! Left-justify to remove leading spaces |
| 1496 | result(j:j+len_trim(pid_str)-1) = trim(pid_str) |
| 1497 | j = j + len_trim(pid_str) |
| 1498 | i = i + 1 |
| 1499 | else if (working_token(i:i) == '@') then |
| 1500 | ! $@ - all positional parameters |
| 1501 | var_value = get_shell_variable(shell, '@') |
| 1502 | if (len_trim(var_value) > 0) then |
| 1503 | call ensure_result_cap(j + len_trim(var_value)) |
| 1504 | result(j:j+len_trim(var_value)-1) = trim(var_value) |
| 1505 | j = j + len_trim(var_value) |
| 1506 | end if |
| 1507 | i = i + 1 |
| 1508 | else if (working_token(i:i) == '#') then |
| 1509 | ! $# - number of positional parameters |
| 1510 | var_value = get_shell_variable(shell, '#') |
| 1511 | if (len_trim(var_value) > 0) then |
| 1512 | call ensure_result_cap(j + len_trim(var_value)) |
| 1513 | result(j:j+len_trim(var_value)-1) = trim(var_value) |
| 1514 | j = j + len_trim(var_value) |
| 1515 | end if |
| 1516 | i = i + 1 |
| 1517 | else if (working_token(i:i) == '*') then |
| 1518 | ! $* - all positional parameters as single word |
| 1519 | var_value = get_shell_variable(shell, '*') |
| 1520 | if (len_trim(var_value) > 0) then |
| 1521 | call ensure_result_cap(j + len_trim(var_value)) |
| 1522 | result(j:j+len_trim(var_value)-1) = trim(var_value) |
| 1523 | j = j + len_trim(var_value) |
| 1524 | end if |
| 1525 | i = i + 1 |
| 1526 | else if (working_token(i:i) == '-') then |
| 1527 | ! $- - current shell option flags |
| 1528 | var_value = get_shell_variable(shell, '-') |
| 1529 | if (len_trim(var_value) > 0) then |
| 1530 | call ensure_result_cap(j + len_trim(var_value)) |
| 1531 | result(j:j+len_trim(var_value)-1) = trim(var_value) |
| 1532 | j = j + len_trim(var_value) |
| 1533 | end if |
| 1534 | i = i + 1 |
| 1535 | else if (working_token(i:i) == '_') then |
| 1536 | ! Check if this is $_ alone or $_varname |
| 1537 | if (i+1 <= len_trim(working_token) .and. & |
| 1538 | (is_alnum(working_token(i+1:i+1)) .or. working_token(i+1:i+1) == '_')) then |
| 1539 | ! $_varname - underscore-prefixed variable name |
| 1540 | var_start = i |
| 1541 | do while (i <= len_trim(working_token) .and. & |
| 1542 | (is_alnum(working_token(i:i)) .or. working_token(i:i) == '_')) |
| 1543 | i = i + 1 |
| 1544 | end do |
| 1545 | var_name = working_token(var_start:i-1) |
| 1546 | var_value = get_shell_variable(shell, trim(var_name)) |
| 1547 | if (len_trim(var_value) > 0) then |
| 1548 | call ensure_result_cap(j + len_trim(var_value)) |
| 1549 | result(j:j+len_trim(var_value)-1) = trim(var_value) |
| 1550 | j = j + len_trim(var_value) |
| 1551 | end if |
| 1552 | else |
| 1553 | ! $_ - last argument of previous command |
| 1554 | var_value = get_shell_variable(shell, '_') |
| 1555 | if (len_trim(var_value) > 0) then |
| 1556 | call ensure_result_cap(j + len_trim(var_value)) |
| 1557 | result(j:j+len_trim(var_value)-1) = trim(var_value) |
| 1558 | j = j + len_trim(var_value) |
| 1559 | end if |
| 1560 | i = i + 1 |
| 1561 | end if |
| 1562 | else if (working_token(i:i) >= '0' .and. working_token(i:i) <= '9') then |
| 1563 | ! $0, $1, $2, ... - positional parameters |
| 1564 | var_name = working_token(i:i) |
| 1565 | var_value = get_shell_variable(shell, trim(var_name)) |
| 1566 | if (len_trim(var_value) > 0) then |
| 1567 | call ensure_result_cap(j + len_trim(var_value)) |
| 1568 | result(j:j+len_trim(var_value)-1) = trim(var_value) |
| 1569 | j = j + len_trim(var_value) |
| 1570 | end if |
| 1571 | i = i + 1 |
| 1572 | else if (working_token(i:i) == '(') then |
| 1573 | ! Check if it's $(( arithmetic expansion or $( command substitution |
| 1574 | if (i+1 <= len_trim(working_token) .and. working_token(i+1:i+1) == '(') then |
| 1575 | ! $((arithmetic)) expansion |
| 1576 | var_start = i - 1 ! Include the $ character |
| 1577 | i = i + 2 ! Skip both opening parens |
| 1578 | brace_depth = 2 |
| 1579 | |
| 1580 | do while (i <= len_trim(working_token) .and. brace_depth > 0) |
| 1581 | if (working_token(i:i) == '(') then |
| 1582 | brace_depth = brace_depth + 1 |
| 1583 | else if (working_token(i:i) == ')') then |
| 1584 | brace_depth = brace_depth - 1 |
| 1585 | end if |
| 1586 | i = i + 1 |
| 1587 | end do |
| 1588 | |
| 1589 | ! Extract full $((expr)) including delimiters |
| 1590 | var_name = working_token(var_start:i-1) |
| 1591 | |
| 1592 | ! Evaluate arithmetic expansion with shell context |
| 1593 | var_value = arithmetic_expansion_shell(trim(var_name), shell) |
| 1594 | if (len_trim(var_value) > 0) then |
| 1595 | call ensure_result_cap(j + len_trim(var_value)) |
| 1596 | result(j:j+len_trim(var_value)-1) = trim(var_value) |
| 1597 | j = j + len_trim(var_value) |
| 1598 | end if |
| 1599 | else |
| 1600 | ! $(command) command substitution |
| 1601 | i = i + 1 |
| 1602 | var_start = i |
| 1603 | brace_depth = 1 |
| 1604 | |
| 1605 | do while (i <= len_trim(working_token) .and. brace_depth > 0) |
| 1606 | if (working_token(i:i) == '(') then |
| 1607 | brace_depth = brace_depth + 1 |
| 1608 | else if (working_token(i:i) == ')') then |
| 1609 | brace_depth = brace_depth - 1 |
| 1610 | end if |
| 1611 | i = i + 1 |
| 1612 | end do |
| 1613 | |
| 1614 | var_name = working_token(var_start:i-2) ! This is actually the command |
| 1615 | |
| 1616 | ! Execute command substitution |
| 1617 | call execute_command_substitution(trim(var_name), var_value, shell) |
| 1618 | if (allocated(var_value) .and. len(var_value) > 0) then |
| 1619 | call ensure_result_cap(j + len(var_value)) |
| 1620 | result(j:j+len(var_value)-1) = var_value |
| 1621 | j = j + len(var_value) |
| 1622 | end if |
| 1623 | end if |
| 1624 | else if (working_token(i:i) == '{') then |
| 1625 | ! ${VAR} or ${VAR:operation} parameter expansion |
| 1626 | i = i + 1 |
| 1627 | var_start = i |
| 1628 | brace_depth = 1 |
| 1629 | |
| 1630 | do while (i <= len_trim(working_token) .and. brace_depth > 0) |
| 1631 | ! Check for nested ${ pattern (not standalone {) |
| 1632 | if (i > 1 .and. i < len_trim(working_token)) then |
| 1633 | if (working_token(i-1:i) == '${') then |
| 1634 | brace_depth = brace_depth + 1 |
| 1635 | i = i + 1 ! Skip the { part of ${ |
| 1636 | end if |
| 1637 | end if |
| 1638 | if (working_token(i:i) == '}') then |
| 1639 | brace_depth = brace_depth - 1 |
| 1640 | end if |
| 1641 | i = i + 1 |
| 1642 | end do |
| 1643 | |
| 1644 | var_name = working_token(var_start:i-2) |
| 1645 | |
| 1646 | ! Process parameter expansion (delegated to expansion module — issue #12) |
| 1647 | call process_param_expansion(var_name, var_value, shell) |
| 1648 | if (allocated(var_value) .and. len(var_value) > 0) then |
| 1649 | call ensure_result_cap(j + len_trim(var_value)) |
| 1650 | result(j:j+len_trim(var_value)-1) = trim(var_value) |
| 1651 | j = j + len_trim(var_value) |
| 1652 | end if |
| 1653 | else |
| 1654 | ! Simple $VAR syntax |
| 1655 | var_start = i |
| 1656 | do while (i <= len_trim(working_token)) |
| 1657 | if (.not. (is_alnum(working_token(i:i)) .or. working_token(i:i) == '_')) exit |
| 1658 | i = i + 1 |
| 1659 | end do |
| 1660 | |
| 1661 | var_name = working_token(var_start:i-1) |
| 1662 | |
| 1663 | ! If no valid variable name was found, treat $ as literal |
| 1664 | if (len_trim(var_name) == 0) then |
| 1665 | result(j:j) = '$' |
| 1666 | j = j + 1 |
| 1667 | else |
| 1668 | ! Check shell variables first |
| 1669 | ! Check if variable is set before expanding |
| 1670 | if (is_shell_variable_set(shell, trim(var_name))) then |
| 1671 | var_value = get_shell_variable(shell, trim(var_name)) |
| 1672 | ! Use get_shell_variable_length to preserve ALL characters including whitespace |
| 1673 | ! This is crucial for variables like IFS=' ' where the space must be preserved |
| 1674 | brace_depth = get_shell_variable_length(shell, trim(var_name)) |
| 1675 | if (brace_depth > 0) then |
| 1676 | call ensure_result_cap(j + brace_depth) |
| 1677 | result(j:j+brace_depth-1) = var_value(1:brace_depth) |
| 1678 | j = j + brace_depth |
| 1679 | end if |
| 1680 | else |
| 1681 | ! Fall back to environment variables |
| 1682 | var_value = get_environment_var(trim(var_name)) |
| 1683 | if (allocated(var_value) .and. len(var_value) > 0) then |
| 1684 | call ensure_result_cap(j + len(var_value)) |
| 1685 | result(j:j+len(var_value)-1) = var_value |
| 1686 | j = j + len(var_value) |
| 1687 | else |
| 1688 | ! Variable is not set - check if set -u is enabled |
| 1689 | if (check_nounset(shell, trim(var_name))) then |
| 1690 | shell%last_exit_status = 127 ! bash uses 127 for direct expansion errors |
| 1691 | shell%fatal_expansion_error = .true. |
| 1692 | shell%running = .false. ! Stop shell execution |
| 1693 | expanded = '' |
| 1694 | return |
| 1695 | end if |
| 1696 | end if |
| 1697 | end if |
| 1698 | end if |
| 1699 | end if |
| 1700 | else if (working_token(i:i) == '`') then |
| 1701 | ! Backtick command substitution |
| 1702 | i = i + 1 |
| 1703 | var_start = i |
| 1704 | |
| 1705 | ! Find closing backtick |
| 1706 | do while (i <= len_trim(working_token) .and. working_token(i:i) /= '`') |
| 1707 | i = i + 1 |
| 1708 | end do |
| 1709 | |
| 1710 | if (i <= len_trim(working_token) .and. working_token(i:i) == '`') then |
| 1711 | var_name = working_token(var_start:i-1) ! This is the command |
| 1712 | i = i + 1 ! Skip closing backtick |
| 1713 | |
| 1714 | ! Execute command substitution |
| 1715 | call execute_command_substitution(trim(var_name), var_value, shell) |
| 1716 | if (allocated(var_value) .and. len(var_value) > 0) then |
| 1717 | call ensure_result_cap(j + len_trim(var_value)) |
| 1718 | result(j:j+len_trim(var_value)-1) = trim(var_value) |
| 1719 | j = j + len_trim(var_value) |
| 1720 | end if |
| 1721 | else |
| 1722 | ! Unmatched backtick, treat as literal |
| 1723 | result(j:j) = '`' |
| 1724 | j = j + 1 |
| 1725 | end if |
| 1726 | else if (working_token(i:i) == char(1)) then |
| 1727 | ! Skip sentinel character (marks quote boundary from lexer) |
| 1728 | i = i + 1 |
| 1729 | else |
| 1730 | result(j:j) = working_token(i:i) |
| 1731 | i = i + 1 |
| 1732 | j = j + 1 |
| 1733 | end if |
| 1734 | end block ! tilde_at_word_start bounds-safe check |
| 1735 | end do |
| 1736 | end block ! End of in_single_quote_literal block |
| 1737 | |
| 1738 | ! POSIX: Quote removal does NOT apply to the results of parameter expansion |
| 1739 | ! Only apply quote removal to quotes that were literally in the command, not in variable values |
| 1740 | ! So we do NOT call strip_outer_quotes here - that would incorrectly remove quotes from values like $VAR where VAR='"test"' |
| 1741 | ! Don't use trim() - preserve trailing whitespace from variable values |
| 1742 | if (j > 1) then |
| 1743 | expanded = result(1:j-1) |
| 1744 | else |
| 1745 | expanded = '' |
| 1746 | end if |
| 1747 | |
| 1748 | contains |
| 1749 | |
| 1750 | subroutine ensure_result_cap(needed) |
| 1751 | integer, intent(in) :: needed |
| 1752 | character(len=:), allocatable :: tmp |
| 1753 | integer :: new_cap |
| 1754 | if (needed <= result_cap) return |
| 1755 | new_cap = max(result_cap * 2, needed + 4096) |
| 1756 | allocate(character(len=new_cap) :: tmp) |
| 1757 | if (j > 1) tmp(1:j-1) = result(1:j-1) |
| 1758 | call move_alloc(tmp, result) |
| 1759 | result_cap = new_cap |
| 1760 | end subroutine |
| 1761 | |
| 1762 | function is_alnum(ch) result(res) |
| 1763 | character, intent(in) :: ch |
| 1764 | logical :: res |
| 1765 | res = (ch >= 'a' .and. ch <= 'z') .or. & |
| 1766 | (ch >= 'A' .and. ch <= 'Z') .or. & |
| 1767 | (ch >= '0' .and. ch <= '9') |
| 1768 | end function |
| 1769 | |
| 1770 | end subroutine |
| 1771 | |
| 1772 | subroutine read_heredoc(delimiter, content, shell, strip_tabs) |
| 1773 | use shell_types, only: shell_state_t |
| 1774 | use variables, only: get_shell_variable |
| 1775 | character(len=*), intent(in) :: delimiter |
| 1776 | character(len=:), allocatable, intent(out) :: content |
| 1777 | type(shell_state_t), intent(inout) :: shell |
| 1778 | logical, intent(in), optional :: strip_tabs |
| 1779 | |
| 1780 | character(len=MAX_TOKEN_LEN) :: line |
| 1781 | character(len=MAX_HEREDOC_LEN) :: buffer |
| 1782 | integer :: iostat, pos, tab_pos |
| 1783 | logical :: should_expand, do_strip_tabs |
| 1784 | |
| 1785 | ! Determine if we should strip tabs |
| 1786 | do_strip_tabs = .false. |
| 1787 | if (present(strip_tabs)) do_strip_tabs = strip_tabs |
| 1788 | |
| 1789 | ! Check if we have pending heredocs from -c flag (new array-based approach) |
| 1790 | if (shell%num_pending_heredocs > 0 .and. & |
| 1791 | shell%next_pending_heredoc <= shell%num_pending_heredocs) then |
| 1792 | ! Get the next pending heredoc |
| 1793 | buffer = trim(shell%pending_heredocs(shell%next_pending_heredoc)%content) |
| 1794 | |
| 1795 | ! Check if we should expand variables |
| 1796 | should_expand = .not. shell%pending_heredocs(shell%next_pending_heredoc)%quoted |
| 1797 | |
| 1798 | ! Expand variables if needed |
| 1799 | if (should_expand) then |
| 1800 | buffer = expand_heredoc_variables(buffer, shell) |
| 1801 | end if |
| 1802 | |
| 1803 | allocate(character(len=len_trim(buffer)) :: content) |
| 1804 | content = trim(buffer) |
| 1805 | |
| 1806 | ! Advance to next pending heredoc |
| 1807 | shell%next_pending_heredoc = shell%next_pending_heredoc + 1 |
| 1808 | |
| 1809 | ! Clear pending heredocs when all consumed |
| 1810 | if (shell%next_pending_heredoc > shell%num_pending_heredocs) then |
| 1811 | shell%num_pending_heredocs = 0 |
| 1812 | shell%next_pending_heredoc = 1 |
| 1813 | ! Also clear legacy single heredoc |
| 1814 | shell%has_pending_heredoc = .false. |
| 1815 | shell%pending_heredoc = '' |
| 1816 | shell%pending_heredoc_delimiter = '' |
| 1817 | shell%pending_heredoc_quoted = .false. |
| 1818 | shell%pending_heredoc_strip_tabs = .false. |
| 1819 | end if |
| 1820 | return |
| 1821 | end if |
| 1822 | |
| 1823 | ! Legacy: Check single pending heredoc (backward compatibility) |
| 1824 | if (shell%has_pending_heredoc .and. & |
| 1825 | trim(shell%pending_heredoc_delimiter) == trim(delimiter)) then |
| 1826 | ! Use the pre-stored content (tabs already stripped by preprocess_heredocs_for_c if needed) |
| 1827 | buffer = trim(shell%pending_heredoc) |
| 1828 | |
| 1829 | ! Check if we should expand variables |
| 1830 | should_expand = .not. shell%pending_heredoc_quoted |
| 1831 | |
| 1832 | ! Expand variables if needed |
| 1833 | if (should_expand) then |
| 1834 | buffer = expand_heredoc_variables(buffer, shell) |
| 1835 | end if |
| 1836 | |
| 1837 | allocate(character(len=len_trim(buffer)) :: content) |
| 1838 | content = trim(buffer) |
| 1839 | |
| 1840 | ! Clear the pending heredoc |
| 1841 | shell%has_pending_heredoc = .false. |
| 1842 | shell%pending_heredoc = '' |
| 1843 | shell%pending_heredoc_delimiter = '' |
| 1844 | shell%pending_heredoc_quoted = .false. |
| 1845 | shell%pending_heredoc_strip_tabs = .false. |
| 1846 | return |
| 1847 | end if |
| 1848 | |
| 1849 | ! Fall back to reading from stdin |
| 1850 | buffer = '' |
| 1851 | pos = 1 |
| 1852 | |
| 1853 | write(*, '(a)', advance='no') '> ' |
| 1854 | |
| 1855 | do |
| 1856 | read(*, '(a)', iostat=iostat) line |
| 1857 | if (iostat /= 0) exit |
| 1858 | |
| 1859 | ! Strip leading tabs BEFORE delimiter comparison (<<- strips tabs) |
| 1860 | if (do_strip_tabs) then |
| 1861 | tab_pos = 1 |
| 1862 | do while (tab_pos <= len_trim(line) .and. line(tab_pos:tab_pos) == char(9)) |
| 1863 | tab_pos = tab_pos + 1 |
| 1864 | end do |
| 1865 | line = line(tab_pos:) |
| 1866 | end if |
| 1867 | |
| 1868 | if (trim(line) == trim(delimiter)) exit |
| 1869 | |
| 1870 | if (pos > 1) then |
| 1871 | buffer(pos:pos) = char(10) ! newline |
| 1872 | pos = pos + 1 |
| 1873 | end if |
| 1874 | |
| 1875 | buffer(pos:pos+len_trim(line)-1) = trim(line) |
| 1876 | pos = pos + len_trim(line) |
| 1877 | |
| 1878 | write(*, '(a)', advance='no') '> ' |
| 1879 | end do |
| 1880 | |
| 1881 | allocate(character(len=pos-1) :: content) |
| 1882 | content = buffer(:pos-1) |
| 1883 | end subroutine |
| 1884 | |
| 1885 | ! Expand variables in heredoc content |
| 1886 | function expand_heredoc_variables(input, shell) result(output) |
| 1887 | use shell_types, only: shell_state_t |
| 1888 | use variables, only: get_shell_variable |
| 1889 | character(len=*), intent(in) :: input |
| 1890 | type(shell_state_t), intent(in) :: shell |
| 1891 | character(len=MAX_HEREDOC_LEN) :: output |
| 1892 | |
| 1893 | integer :: i, j, var_start, var_end |
| 1894 | character(len=256) :: var_name |
| 1895 | character(len=:), allocatable :: var_value |
| 1896 | |
| 1897 | output = '' |
| 1898 | i = 1 |
| 1899 | j = 1 |
| 1900 | |
| 1901 | do while (i <= len_trim(input)) |
| 1902 | if (input(i:i) == '$' .and. i < len_trim(input)) then |
| 1903 | ! Found potential variable |
| 1904 | var_start = i + 1 |
| 1905 | |
| 1906 | ! Check for ${var} format |
| 1907 | if (input(var_start:var_start) == '{') then |
| 1908 | var_start = var_start + 1 |
| 1909 | var_end = var_start |
| 1910 | do while (var_end <= len_trim(input) .and. input(var_end:var_end) /= '}') |
| 1911 | var_end = var_end + 1 |
| 1912 | end do |
| 1913 | if (var_end <= len_trim(input)) then |
| 1914 | var_name = input(var_start:var_end-1) |
| 1915 | var_value = get_shell_variable(shell, trim(var_name)) |
| 1916 | output(j:j+len_trim(var_value)-1) = trim(var_value) |
| 1917 | j = j + len_trim(var_value) |
| 1918 | i = var_end + 1 |
| 1919 | else |
| 1920 | output(j:j) = '$' |
| 1921 | j = j + 1 |
| 1922 | i = i + 1 |
| 1923 | end if |
| 1924 | else |
| 1925 | ! Check for $var format |
| 1926 | var_end = var_start |
| 1927 | do while (var_end <= len_trim(input) .and. & |
| 1928 | ((input(var_end:var_end) >= 'A' .and. input(var_end:var_end) <= 'Z') .or. & |
| 1929 | (input(var_end:var_end) >= 'a' .and. input(var_end:var_end) <= 'z') .or. & |
| 1930 | (input(var_end:var_end) >= '0' .and. input(var_end:var_end) <= '9') .or. & |
| 1931 | input(var_end:var_end) == '_')) |
| 1932 | var_end = var_end + 1 |
| 1933 | end do |
| 1934 | if (var_end > var_start) then |
| 1935 | var_name = input(var_start:var_end-1) |
| 1936 | var_value = get_shell_variable(shell, trim(var_name)) |
| 1937 | output(j:j+len_trim(var_value)-1) = trim(var_value) |
| 1938 | j = j + len_trim(var_value) |
| 1939 | i = var_end |
| 1940 | else |
| 1941 | output(j:j) = '$' |
| 1942 | j = j + 1 |
| 1943 | i = i + 1 |
| 1944 | end if |
| 1945 | end if |
| 1946 | else |
| 1947 | output(j:j) = input(i:i) |
| 1948 | j = j + 1 |
| 1949 | i = i + 1 |
| 1950 | end if |
| 1951 | end do |
| 1952 | end function |
| 1953 | |
| 1954 | ! Strip leading tabs from each line in heredoc content |
| 1955 | function strip_leading_tabs(input) result(output) |
| 1956 | character(len=*), intent(in) :: input |
| 1957 | character(len=MAX_HEREDOC_LEN) :: output |
| 1958 | integer :: i, j |
| 1959 | logical :: at_line_start |
| 1960 | |
| 1961 | output = '' |
| 1962 | i = 1 |
| 1963 | j = 1 |
| 1964 | at_line_start = .true. |
| 1965 | |
| 1966 | do while (i <= len_trim(input)) |
| 1967 | if (at_line_start .and. input(i:i) == char(9)) then |
| 1968 | ! Skip leading tab |
| 1969 | i = i + 1 |
| 1970 | else |
| 1971 | ! Copy character |
| 1972 | at_line_start = .false. |
| 1973 | output(j:j) = input(i:i) |
| 1974 | if (input(i:i) == char(10)) then |
| 1975 | at_line_start = .true. |
| 1976 | end if |
| 1977 | j = j + 1 |
| 1978 | i = i + 1 |
| 1979 | end if |
| 1980 | end do |
| 1981 | end function |
| 1982 | |
| 1983 | ! Extract heredoc content from input string (for -c mode) |
| 1984 | subroutine extract_heredoc_from_input(input, delimiter, content) |
| 1985 | character(len=*), intent(in) :: input, delimiter |
| 1986 | character(len=:), allocatable, intent(out) :: content |
| 1987 | |
| 1988 | integer :: i, line_start, line_end, content_start |
| 1989 | integer :: newline_pos |
| 1990 | character(len=len(input)) :: current_line |
| 1991 | character(len=MAX_HEREDOC_LEN) :: buffer |
| 1992 | integer :: buffer_pos |
| 1993 | logical :: found_end |
| 1994 | |
| 1995 | ! Check if input contains newlines (heredoc marker) |
| 1996 | newline_pos = index(input, char(10)) |
| 1997 | if (newline_pos == 0) then |
| 1998 | ! No newlines, can't extract heredoc content |
| 1999 | return |
| 2000 | end if |
| 2001 | |
| 2002 | ! Find where heredoc content starts (after first newline following <<DELIM) |
| 2003 | content_start = 0 |
| 2004 | do i = 1, len(input) |
| 2005 | if (input(i:i) == '<' .and. i < len(input) - 1) then |
| 2006 | if (input(i+1:i+1) == '<') then |
| 2007 | ! Found <<, look for newline after delimiter |
| 2008 | do newline_pos = i+2, len(input) |
| 2009 | if (input(newline_pos:newline_pos) == char(10)) then |
| 2010 | content_start = newline_pos + 1 |
| 2011 | exit |
| 2012 | end if |
| 2013 | end do |
| 2014 | if (content_start > 0) exit |
| 2015 | end if |
| 2016 | end if |
| 2017 | end do |
| 2018 | |
| 2019 | if (content_start == 0 .or. content_start > len(input)) then |
| 2020 | ! No content after heredoc marker |
| 2021 | return |
| 2022 | end if |
| 2023 | |
| 2024 | ! Extract lines until we find the delimiter |
| 2025 | buffer = '' |
| 2026 | buffer_pos = 1 |
| 2027 | line_start = content_start |
| 2028 | found_end = .false. |
| 2029 | |
| 2030 | do while (line_start <= len(input) .and. .not. found_end) |
| 2031 | ! Find end of current line (newline or end of string) |
| 2032 | line_end = line_start |
| 2033 | do while (line_end <= len(input) .and. input(line_end:line_end) /= char(10)) |
| 2034 | line_end = line_end + 1 |
| 2035 | end do |
| 2036 | |
| 2037 | ! Extract current line (handle case where line_end went past end of input) |
| 2038 | if (line_end > len(input)) then |
| 2039 | ! No newline found, extract to end of input |
| 2040 | if (line_start <= len(input)) then |
| 2041 | current_line = input(line_start:len(input)) |
| 2042 | else |
| 2043 | current_line = '' |
| 2044 | end if |
| 2045 | else if (line_end > line_start) then |
| 2046 | ! Newline found, extract up to (but not including) the newline |
| 2047 | current_line = input(line_start:line_end-1) |
| 2048 | else |
| 2049 | current_line = '' |
| 2050 | end if |
| 2051 | |
| 2052 | ! Check if this line matches the delimiter |
| 2053 | if (trim(current_line) == trim(delimiter)) then |
| 2054 | found_end = .true. |
| 2055 | exit |
| 2056 | end if |
| 2057 | |
| 2058 | ! Add line to buffer |
| 2059 | if (buffer_pos > 1) then |
| 2060 | ! Add newline before this line |
| 2061 | buffer(buffer_pos:buffer_pos) = char(10) |
| 2062 | buffer_pos = buffer_pos + 1 |
| 2063 | end if |
| 2064 | |
| 2065 | if (len_trim(current_line) > 0) then |
| 2066 | buffer(buffer_pos:buffer_pos+len_trim(current_line)-1) = trim(current_line) |
| 2067 | buffer_pos = buffer_pos + len_trim(current_line) |
| 2068 | end if |
| 2069 | |
| 2070 | ! Move to next line |
| 2071 | line_start = line_end + 1 |
| 2072 | end do |
| 2073 | |
| 2074 | ! Allocate and return content (with trailing newline to match POSIX) |
| 2075 | if (buffer_pos > 1) then |
| 2076 | ! Add trailing newline |
| 2077 | buffer(buffer_pos:buffer_pos) = char(10) |
| 2078 | buffer_pos = buffer_pos + 1 |
| 2079 | allocate(character(len=buffer_pos-1) :: content) |
| 2080 | content = buffer(:buffer_pos-1) |
| 2081 | end if |
| 2082 | end subroutine |
| 2083 | |
| 2084 | ! Convert backtick command substitution to $() format |
| 2085 | function convert_backticks_to_dollar_paren(input) result(output) |
| 2086 | character(len=*), intent(in) :: input |
| 2087 | character(len=:), allocatable :: output |
| 2088 | character(len=len(input)*2) :: temp_result |
| 2089 | integer :: i, j, backtick_start |
| 2090 | logical :: in_backticks, in_single_quote, in_double_quote |
| 2091 | character(len=1) :: backslash |
| 2092 | |
| 2093 | backslash = char(92) |
| 2094 | temp_result = '' |
| 2095 | i = 1 |
| 2096 | j = 1 |
| 2097 | in_backticks = .false. |
| 2098 | in_single_quote = .false. |
| 2099 | in_double_quote = .false. |
| 2100 | backtick_start = 0 |
| 2101 | |
| 2102 | do while (i <= len_trim(input)) |
| 2103 | ! Track quote state (but not inside backticks) |
| 2104 | if (.not. in_backticks) then |
| 2105 | ! Fortran .or. does NOT short-circuit, so check i > 1 separately |
| 2106 | ! to avoid input(0:0) out-of-bounds access |
| 2107 | block |
| 2108 | logical :: not_escaped |
| 2109 | if (i == 1) then |
| 2110 | not_escaped = .true. |
| 2111 | else |
| 2112 | not_escaped = (input(i-1:i-1) /= backslash) |
| 2113 | end if |
| 2114 | if (input(i:i) == "'" .and. not_escaped) then |
| 2115 | in_single_quote = .not. in_single_quote |
| 2116 | else if (input(i:i) == '"' .and. not_escaped) then |
| 2117 | in_double_quote = .not. in_double_quote |
| 2118 | end if |
| 2119 | end block |
| 2120 | end if |
| 2121 | |
| 2122 | ! Inside backticks: handle escaped backticks as nested substitution delimiters |
| 2123 | ! POSIX: \` inside backticks means start/end of nested command substitution |
| 2124 | if (in_backticks .and. input(i:i) == backslash .and. i < len_trim(input)) then |
| 2125 | if (input(i+1:i+1) == '`') then |
| 2126 | ! Escaped backtick inside backticks = nested command substitution |
| 2127 | ! Convert to $() for the nested level |
| 2128 | temp_result(j:j+1) = '$(' |
| 2129 | j = j + 2 |
| 2130 | i = i + 2 |
| 2131 | ! Find the matching closing \` and convert it too |
| 2132 | block |
| 2133 | integer :: k, nest_level |
| 2134 | nest_level = 1 |
| 2135 | k = i |
| 2136 | do while (k <= len_trim(input) .and. nest_level > 0) |
| 2137 | if (input(k:k) == backslash .and. k < len_trim(input) .and. input(k+1:k+1) == '`') then |
| 2138 | nest_level = nest_level - 1 |
| 2139 | if (nest_level == 0) then |
| 2140 | ! Copy everything up to here, then add closing ) |
| 2141 | do while (i < k) |
| 2142 | temp_result(j:j) = input(i:i) |
| 2143 | j = j + 1 |
| 2144 | i = i + 1 |
| 2145 | end do |
| 2146 | temp_result(j:j) = ')' |
| 2147 | j = j + 1 |
| 2148 | i = k + 2 ! Skip the \` |
| 2149 | exit |
| 2150 | else |
| 2151 | k = k + 2 |
| 2152 | end if |
| 2153 | else |
| 2154 | k = k + 1 |
| 2155 | end if |
| 2156 | end do |
| 2157 | end block |
| 2158 | cycle |
| 2159 | else if (input(i+1:i+1) == '$' .or. input(i+1:i+1) == backslash .or. & |
| 2160 | input(i+1:i+1) == char(10)) then |
| 2161 | ! Other escapes: consume backslash, copy the character |
| 2162 | temp_result(j:j) = input(i+1:i+1) |
| 2163 | j = j + 1 |
| 2164 | i = i + 2 |
| 2165 | cycle |
| 2166 | end if |
| 2167 | end if |
| 2168 | |
| 2169 | ! Process backticks (not inside single quotes) |
| 2170 | ! Fortran .or. does NOT short-circuit, so check i > 1 separately |
| 2171 | block |
| 2172 | logical :: backtick_not_escaped |
| 2173 | if (i == 1) then |
| 2174 | backtick_not_escaped = .true. |
| 2175 | else |
| 2176 | backtick_not_escaped = (input(i-1:i-1) /= backslash) |
| 2177 | end if |
| 2178 | if (input(i:i) == '`' .and. .not. in_single_quote .and. backtick_not_escaped) then |
| 2179 | if (.not. in_backticks) then |
| 2180 | ! Start of backtick command substitution |
| 2181 | in_backticks = .true. |
| 2182 | backtick_start = i |
| 2183 | temp_result(j:j+1) = '$(' |
| 2184 | j = j + 2 |
| 2185 | else |
| 2186 | ! End of backtick command substitution |
| 2187 | in_backticks = .false. |
| 2188 | temp_result(j:j) = ')' |
| 2189 | j = j + 1 |
| 2190 | end if |
| 2191 | i = i + 1 |
| 2192 | else |
| 2193 | ! Regular character |
| 2194 | temp_result(j:j) = input(i:i) |
| 2195 | i = i + 1 |
| 2196 | j = j + 1 |
| 2197 | end if |
| 2198 | end block ! backtick_not_escaped |
| 2199 | end do |
| 2200 | |
| 2201 | allocate(character(len=j-1) :: output) |
| 2202 | output = temp_result(1:j-1) |
| 2203 | end function |
| 2204 | |
| 2205 | subroutine execute_command_substitution(command, output, shell) |
| 2206 | use command_capture, only: execute_command_and_capture |
| 2207 | character(len=*), intent(in) :: command |
| 2208 | character(len=:), allocatable, intent(out) :: output |
| 2209 | type(shell_state_t), intent(inout) :: shell |
| 2210 | |
| 2211 | ! POSIX: errexit should not trigger in command substitution |
| 2212 | shell%in_command_substitution = .true. |
| 2213 | |
| 2214 | ! Execute in current shell context to preserve functions, variables, etc. |
| 2215 | call execute_command_and_capture(shell, command, output) |
| 2216 | |
| 2217 | shell%in_command_substitution = .false. |
| 2218 | |
| 2219 | if (.not. allocated(output)) output = '' |
| 2220 | |
| 2221 | ! Remove trailing newlines (but NOT other whitespace like spaces) |
| 2222 | do while (len(output) > 0 .and. output(len(output):len(output)) == char(10)) |
| 2223 | output = output(:len(output)-1) |
| 2224 | end do |
| 2225 | end subroutine |
| 2226 | |
| 2227 | ! Simple pattern matching for shell expansions (supports * wildcard) |
| 2228 | function shell_pattern_match(text, pattern) result(matches) |
| 2229 | character(len=*), intent(in) :: text, pattern |
| 2230 | logical :: matches |
| 2231 | integer :: t_pos, p_pos, star_pos, match_pos |
| 2232 | |
| 2233 | matches = .false. |
| 2234 | t_pos = 1 |
| 2235 | p_pos = 1 |
| 2236 | star_pos = 0 |
| 2237 | match_pos = 0 |
| 2238 | |
| 2239 | do while (t_pos <= len_trim(text)) |
| 2240 | if (p_pos <= len_trim(pattern) .and. & |
| 2241 | (pattern(p_pos:p_pos) == text(t_pos:t_pos) .or. pattern(p_pos:p_pos) == '?')) then |
| 2242 | t_pos = t_pos + 1 |
| 2243 | p_pos = p_pos + 1 |
| 2244 | else if (p_pos <= len_trim(pattern) .and. pattern(p_pos:p_pos) == '*') then |
| 2245 | star_pos = p_pos |
| 2246 | match_pos = t_pos |
| 2247 | p_pos = p_pos + 1 |
| 2248 | else if (star_pos > 0) then |
| 2249 | p_pos = star_pos + 1 |
| 2250 | match_pos = match_pos + 1 |
| 2251 | t_pos = match_pos |
| 2252 | else |
| 2253 | return |
| 2254 | end if |
| 2255 | end do |
| 2256 | |
| 2257 | do while (p_pos <= len_trim(pattern) .and. pattern(p_pos:p_pos) == '*') |
| 2258 | p_pos = p_pos + 1 |
| 2259 | end do |
| 2260 | |
| 2261 | matches = (p_pos > len_trim(pattern)) |
| 2262 | end function |
| 2263 | |
| 2264 | |
| 2265 | subroutine process_tilde_expansion(token, pos, result, result_pos, shell) |
| 2266 | character(len=*), intent(in) :: token |
| 2267 | integer, intent(inout) :: pos, result_pos |
| 2268 | character(len=*), intent(inout) :: result |
| 2269 | type(shell_state_t), intent(in) :: shell |
| 2270 | |
| 2271 | character(len=MAX_TOKEN_LEN) :: username, home_path |
| 2272 | character(len=:), allocatable :: home_dir, shell_var |
| 2273 | integer :: start_pos |
| 2274 | |
| 2275 | ! Skip the tilde |
| 2276 | pos = pos + 1 |
| 2277 | |
| 2278 | ! POSIX: ~+ expands to PWD, ~- expands to OLDPWD |
| 2279 | if (pos <= len_trim(token) .and. token(pos:pos) == '+') then |
| 2280 | ! ~+ - expand to PWD (check shell variable first, then environment) |
| 2281 | shell_var = get_shell_variable(shell, 'PWD') |
| 2282 | if (len_trim(shell_var) > 0) then |
| 2283 | result(result_pos:result_pos+len_trim(shell_var)-1) = trim(shell_var) |
| 2284 | result_pos = result_pos + len_trim(shell_var) |
| 2285 | else |
| 2286 | home_dir = get_environment_var('PWD') |
| 2287 | if (allocated(home_dir) .and. len(home_dir) > 0) then |
| 2288 | result(result_pos:result_pos+len(home_dir)-1) = home_dir |
| 2289 | result_pos = result_pos + len(home_dir) |
| 2290 | else |
| 2291 | ! Fallback: return ~+ literally |
| 2292 | result(result_pos:result_pos+1) = '~+' |
| 2293 | result_pos = result_pos + 2 |
| 2294 | end if |
| 2295 | end if |
| 2296 | pos = pos + 1 ! Skip the + |
| 2297 | return |
| 2298 | else if (pos <= len_trim(token) .and. token(pos:pos) == '-') then |
| 2299 | ! ~- - expand to OLDPWD (check shell variable first, then environment) |
| 2300 | shell_var = get_shell_variable(shell, 'OLDPWD') |
| 2301 | if (len_trim(shell_var) > 0) then |
| 2302 | result(result_pos:result_pos+len_trim(shell_var)-1) = trim(shell_var) |
| 2303 | result_pos = result_pos + len_trim(shell_var) |
| 2304 | else |
| 2305 | home_dir = get_environment_var('OLDPWD') |
| 2306 | if (allocated(home_dir) .and. len(home_dir) > 0) then |
| 2307 | result(result_pos:result_pos+len(home_dir)-1) = home_dir |
| 2308 | result_pos = result_pos + len(home_dir) |
| 2309 | else |
| 2310 | ! Fallback: return ~- literally |
| 2311 | result(result_pos:result_pos+1) = '~-' |
| 2312 | result_pos = result_pos + 2 |
| 2313 | end if |
| 2314 | end if |
| 2315 | pos = pos + 1 ! Skip the - |
| 2316 | return |
| 2317 | else if (pos > len_trim(token) .or. token(pos:pos) == '/' .or. token(pos:pos) == ' ') then |
| 2318 | ! Simple ~ expansion - use HOME environment variable |
| 2319 | home_dir = get_environment_var('HOME') |
| 2320 | if (allocated(home_dir) .and. len(home_dir) > 0) then |
| 2321 | result(result_pos:result_pos+len(home_dir)-1) = home_dir |
| 2322 | result_pos = result_pos + len(home_dir) |
| 2323 | else |
| 2324 | ! Fallback to /home/user if HOME not set |
| 2325 | home_dir = get_environment_var('USER') |
| 2326 | if (allocated(home_dir) .and. len(home_dir) > 0) then |
| 2327 | home_path = '/home/' // home_dir |
| 2328 | result(result_pos:result_pos+len_trim(home_path)-1) = trim(home_path) |
| 2329 | result_pos = result_pos + len_trim(home_path) |
| 2330 | else |
| 2331 | ! Last resort fallback |
| 2332 | result(result_pos:result_pos+5) = '/home/' |
| 2333 | result_pos = result_pos + 6 |
| 2334 | end if |
| 2335 | end if |
| 2336 | else |
| 2337 | ! ~username expansion |
| 2338 | start_pos = pos |
| 2339 | do while (pos <= len_trim(token) .and. token(pos:pos) /= '/' .and. token(pos:pos) /= ' ') |
| 2340 | pos = pos + 1 |
| 2341 | end do |
| 2342 | |
| 2343 | if (pos > start_pos) then |
| 2344 | username = token(start_pos:pos-1) |
| 2345 | else |
| 2346 | username = '' |
| 2347 | end if |
| 2348 | |
| 2349 | ! Simple implementation: assume user home is in /home/username |
| 2350 | ! In a full implementation, you'd use getpwnam() system call |
| 2351 | home_path = '/home/' // trim(username) |
| 2352 | result(result_pos:result_pos+len_trim(home_path)-1) = trim(home_path) |
| 2353 | result_pos = result_pos + len_trim(home_path) |
| 2354 | |
| 2355 | ! Don't increment pos here as it's already at the next character |
| 2356 | pos = pos - 1 ! Adjust because main loop will increment |
| 2357 | end if |
| 2358 | end subroutine |
| 2359 | |
| 2360 | ! Detect and replace process substitution <(...) and >(...) with FIFO paths |
| 2361 | subroutine process_substitutions(shell, input, output) |
| 2362 | use substitution, only: create_fifo_for_subst, set_fifo_pid |
| 2363 | type(shell_state_t), intent(inout) :: shell |
| 2364 | character(len=*), intent(in) :: input |
| 2365 | character(len=*), intent(out) :: output |
| 2366 | |
| 2367 | integer :: i, start_pos, paren_depth, out_pos, fifo_len |
| 2368 | character(len=MAX_PATH_LEN) :: fifo_path, command |
| 2369 | logical :: is_input_subst |
| 2370 | integer(c_pid_t) :: pid |
| 2371 | character(len=1) :: subst_type |
| 2372 | |
| 2373 | output = '' |
| 2374 | out_pos = 1 |
| 2375 | i = 1 |
| 2376 | |
| 2377 | do while (i <= len_trim(input)) |
| 2378 | ! Check for <( or >( |
| 2379 | ! IMPORTANT: Fortran .and. does NOT short-circuit, so we must use |
| 2380 | ! a nested if to avoid substring out-of-bounds access |
| 2381 | if (i+1 <= len(input) .and. i+1 <= len_trim(input)) then |
| 2382 | if (input(i:i+1) == '<(' .or. input(i:i+1) == '>(') then |
| 2383 | |
| 2384 | subst_type = input(i:i) |
| 2385 | is_input_subst = (subst_type == '<') |
| 2386 | |
| 2387 | ! Find matching closing parenthesis |
| 2388 | start_pos = i + 2 |
| 2389 | paren_depth = 1 |
| 2390 | i = start_pos |
| 2391 | |
| 2392 | do while (i <= len_trim(input) .and. paren_depth > 0) |
| 2393 | if (input(i:i) == '(') then |
| 2394 | paren_depth = paren_depth + 1 |
| 2395 | else if (input(i:i) == ')') then |
| 2396 | paren_depth = paren_depth - 1 |
| 2397 | end if |
| 2398 | i = i + 1 |
| 2399 | end do |
| 2400 | |
| 2401 | if (paren_depth == 0) then |
| 2402 | ! Extract the command |
| 2403 | command = input(start_pos:i-2) |
| 2404 | |
| 2405 | ! Create FIFO |
| 2406 | fifo_path = create_fifo_for_subst(shell, is_input_subst) |
| 2407 | |
| 2408 | if (len_trim(fifo_path) > 0) then |
| 2409 | ! Fork background process to execute command with proper redirection |
| 2410 | pid = c_fork() |
| 2411 | |
| 2412 | if (pid == 0) then |
| 2413 | ! Child process |
| 2414 | call execute_proc_subst_command(trim(command), trim(fifo_path), is_input_subst) |
| 2415 | call c_exit(0) |
| 2416 | else |
| 2417 | ! Parent process - track the PID |
| 2418 | call set_fifo_pid(shell, fifo_path, pid) |
| 2419 | |
| 2420 | ! Replace <(command) or >(command) with FIFO path |
| 2421 | fifo_len = len_trim(fifo_path) |
| 2422 | if (out_pos + fifo_len - 1 <= len(output)) then |
| 2423 | output(out_pos:out_pos+fifo_len-1) = trim(fifo_path) |
| 2424 | out_pos = out_pos + fifo_len |
| 2425 | end if |
| 2426 | end if |
| 2427 | else |
| 2428 | write(error_unit, '(A)') 'fortsh: failed to create process substitution' |
| 2429 | ! Keep original if failed - but this is a rare error case |
| 2430 | end if |
| 2431 | end if |
| 2432 | else |
| 2433 | ! Regular character - copy without trimming |
| 2434 | if (out_pos <= len(output)) then |
| 2435 | output(out_pos:out_pos) = input(i:i) |
| 2436 | out_pos = out_pos + 1 |
| 2437 | end if |
| 2438 | i = i + 1 |
| 2439 | end if |
| 2440 | else |
| 2441 | ! Last character or beyond trim - copy as regular character |
| 2442 | if (out_pos <= len(output)) then |
| 2443 | output(out_pos:out_pos) = input(i:i) |
| 2444 | out_pos = out_pos + 1 |
| 2445 | end if |
| 2446 | i = i + 1 |
| 2447 | end if |
| 2448 | end do |
| 2449 | end subroutine |
| 2450 | |
| 2451 | ! Execute a process substitution command with proper redirection |
| 2452 | subroutine execute_proc_subst_command(command, fifo_path, is_input) |
| 2453 | character(len=*), intent(in) :: command, fifo_path |
| 2454 | logical, intent(in) :: is_input |
| 2455 | character(len=512) :: full_command |
| 2456 | character(len=256), target :: shell_cmd, command_c |
| 2457 | character(len=16), target :: shell_flag |
| 2458 | type(c_ptr), target :: argv(4) |
| 2459 | integer :: result |
| 2460 | |
| 2461 | ! Build redirected command using shell |
| 2462 | if (is_input) then |
| 2463 | ! <(command) - redirect command's stdout to FIFO |
| 2464 | write(full_command, '(A,A,A,A)') trim(command), ' > ', trim(fifo_path), c_null_char |
| 2465 | else |
| 2466 | ! >(command) - redirect FIFO to command's stdin |
| 2467 | write(full_command, '(A,A,A,A)') trim(command), ' < ', trim(fifo_path), c_null_char |
| 2468 | end if |
| 2469 | |
| 2470 | ! Execute via /bin/sh -c |
| 2471 | shell_cmd = '/bin/sh'//c_null_char |
| 2472 | shell_flag = '-c'//c_null_char |
| 2473 | command_c = trim(full_command) |
| 2474 | |
| 2475 | argv(1) = c_loc(shell_cmd) |
| 2476 | argv(2) = c_loc(shell_flag) |
| 2477 | argv(3) = c_loc(command_c) |
| 2478 | argv(4) = c_null_ptr |
| 2479 | |
| 2480 | result = c_execvp(c_loc(shell_cmd), c_loc(argv)) |
| 2481 | if (result < 0) then |
| 2482 | write(error_unit, '(A)') 'fortsh: failed to execute process substitution command' |
| 2483 | call c_exit(1) |
| 2484 | end if |
| 2485 | end subroutine |
| 2486 | |
| 2487 | ! Execute a command via system shell (for process substitution) |
| 2488 | subroutine execute_command_via_shell(command) |
| 2489 | character(len=*), intent(in) :: command |
| 2490 | character(len=256), target :: shell_cmd, command_c |
| 2491 | character(len=16), target :: shell_flag |
| 2492 | type(c_ptr), target :: argv(4) |
| 2493 | integer :: result |
| 2494 | |
| 2495 | ! Build command: /bin/sh -c "command" |
| 2496 | shell_cmd = '/bin/sh'//c_null_char |
| 2497 | shell_flag = '-c'//c_null_char |
| 2498 | command_c = trim(command)//c_null_char |
| 2499 | |
| 2500 | argv(1) = c_loc(shell_cmd) |
| 2501 | argv(2) = c_loc(shell_flag) |
| 2502 | argv(3) = c_loc(command_c) |
| 2503 | argv(4) = c_null_ptr |
| 2504 | |
| 2505 | ! Execute /bin/sh -c "command" |
| 2506 | result = c_execvp(c_loc(shell_cmd), c_loc(argv)) |
| 2507 | if (result < 0) then |
| 2508 | write(error_unit, '(A)') 'fortsh: failed to execute process substitution command' |
| 2509 | call c_exit(1) |
| 2510 | end if |
| 2511 | end subroutine |
| 2512 | |
| 2513 | ! Extract prefix assignments (VAR=value command) from tokenized command |
| 2514 | ! Moves VAR=value pairs to cmd%prefix_assignments and removes them from tokens |
| 2515 | subroutine extract_prefix_assignments(cmd) |
| 2516 | type(command_t), intent(inout) :: cmd |
| 2517 | integer :: i, eq_pos, first_cmd_token |
| 2518 | character(len=256) :: token |
| 2519 | logical :: is_assignment |
| 2520 | character(len=MAX_TOKEN_LEN), allocatable :: new_tokens(:) |
| 2521 | integer :: new_token_count |
| 2522 | |
| 2523 | if (.not. allocated(cmd%tokens) .or. cmd%num_tokens == 0) return |
| 2524 | |
| 2525 | cmd%num_prefix_assignments = 0 |
| 2526 | first_cmd_token = 0 |
| 2527 | |
| 2528 | ! Scan tokens from the beginning to find prefix assignments |
| 2529 | do i = 1, cmd%num_tokens |
| 2530 | token = trim(cmd%tokens(i)) |
| 2531 | |
| 2532 | ! Check if token is a valid assignment (VAR=value) |
| 2533 | is_assignment = .false. |
| 2534 | eq_pos = index(token, '=') |
| 2535 | |
| 2536 | if (eq_pos > 1) then |
| 2537 | ! Has '=' and something before it |
| 2538 | ! Check if everything before '=' is a valid variable name |
| 2539 | ! (letters, numbers, underscore, but must start with letter or underscore) |
| 2540 | is_assignment = is_valid_var_name(token(:eq_pos-1)) |
| 2541 | end if |
| 2542 | |
| 2543 | if (is_assignment) then |
| 2544 | ! This is a prefix assignment |
| 2545 | if (cmd%num_prefix_assignments < MAX_PREFIX_ASSIGNMENTS) then |
| 2546 | if (.not. allocated(cmd%prefix_assignments)) then |
| 2547 | allocate(character(len=MAX_TOKEN_LEN) :: cmd%prefix_assignments(MAX_PREFIX_ASSIGNMENTS)) |
| 2548 | end if |
| 2549 | cmd%num_prefix_assignments = cmd%num_prefix_assignments + 1 |
| 2550 | cmd%prefix_assignments(cmd%num_prefix_assignments) = trim(token) |
| 2551 | end if |
| 2552 | else |
| 2553 | ! First non-assignment token - this is where the command starts |
| 2554 | first_cmd_token = i |
| 2555 | exit |
| 2556 | end if |
| 2557 | end do |
| 2558 | |
| 2559 | ! If we found prefix assignments, remove them from tokens |
| 2560 | if (cmd%num_prefix_assignments > 0 .and. first_cmd_token > 0) then |
| 2561 | new_token_count = cmd%num_tokens - cmd%num_prefix_assignments |
| 2562 | |
| 2563 | if (new_token_count > 0) then |
| 2564 | ! Allocate new token array with remaining tokens |
| 2565 | allocate(new_tokens(new_token_count)) |
| 2566 | |
| 2567 | ! Copy remaining tokens |
| 2568 | do i = 1, new_token_count |
| 2569 | new_tokens(i) = cmd%tokens(first_cmd_token + i - 1) |
| 2570 | end do |
| 2571 | |
| 2572 | ! Replace tokens array |
| 2573 | deallocate(cmd%tokens) |
| 2574 | cmd%tokens = new_tokens |
| 2575 | cmd%num_tokens = new_token_count |
| 2576 | else |
| 2577 | ! All tokens were assignments, no actual command |
| 2578 | deallocate(cmd%tokens) |
| 2579 | cmd%num_tokens = 0 |
| 2580 | end if |
| 2581 | end if |
| 2582 | end subroutine |
| 2583 | |
| 2584 | ! Check if a string is a valid shell variable name |
| 2585 | function is_valid_var_name(name) result(is_valid) |
| 2586 | character(len=*), intent(in) :: name |
| 2587 | logical :: is_valid |
| 2588 | integer :: i, check_len, bracket_pos |
| 2589 | character :: ch |
| 2590 | |
| 2591 | is_valid = .false. |
| 2592 | check_len = len_trim(name) |
| 2593 | if (check_len == 0) return |
| 2594 | |
| 2595 | ! Accept array subscript names: var[subscript] |
| 2596 | bracket_pos = index(name(1:check_len), '[') |
| 2597 | if (bracket_pos > 0) then |
| 2598 | check_len = bracket_pos - 1 |
| 2599 | if (check_len == 0) return |
| 2600 | end if |
| 2601 | |
| 2602 | ! First character must be letter or underscore |
| 2603 | ch = name(1:1) |
| 2604 | if (.not. ((ch >= 'A' .and. ch <= 'Z') .or. & |
| 2605 | (ch >= 'a' .and. ch <= 'z') .or. & |
| 2606 | ch == '_')) then |
| 2607 | return |
| 2608 | end if |
| 2609 | |
| 2610 | ! Remaining characters can be letters, digits, or underscores |
| 2611 | do i = 2, check_len |
| 2612 | ch = name(i:i) |
| 2613 | if (.not. ((ch >= 'A' .and. ch <= 'Z') .or. & |
| 2614 | (ch >= 'a' .and. ch <= 'z') .or. & |
| 2615 | (ch >= '0' .and. ch <= '9') .or. & |
| 2616 | ch == '_')) then |
| 2617 | return |
| 2618 | end if |
| 2619 | end do |
| 2620 | |
| 2621 | is_valid = .true. |
| 2622 | end function |
| 2623 | |
| 2624 | ! Check if a line has unclosed quotes (needs continuation) |
| 2625 | function has_unclosed_quote(line) result(has_unclosed) |
| 2626 | character(len=*), intent(in) :: line |
| 2627 | logical :: has_unclosed |
| 2628 | integer :: i |
| 2629 | logical :: in_single_quote, in_double_quote |
| 2630 | character :: prev_char |
| 2631 | |
| 2632 | has_unclosed = .false. |
| 2633 | in_single_quote = .false. |
| 2634 | in_double_quote = .false. |
| 2635 | prev_char = ' ' |
| 2636 | |
| 2637 | do i = 1, len_trim(line) |
| 2638 | ! Check for escape character |
| 2639 | if (prev_char == '\') then |
| 2640 | ! Skip this character as it's escaped |
| 2641 | prev_char = ' ' ! Reset escape |
| 2642 | cycle |
| 2643 | end if |
| 2644 | |
| 2645 | ! Track quote state |
| 2646 | if (line(i:i) == "'" .and. .not. in_double_quote) then |
| 2647 | in_single_quote = .not. in_single_quote |
| 2648 | else if (line(i:i) == '"' .and. .not. in_single_quote) then |
| 2649 | in_double_quote = .not. in_double_quote |
| 2650 | end if |
| 2651 | |
| 2652 | prev_char = line(i:i) |
| 2653 | end do |
| 2654 | |
| 2655 | ! If either quote type is unclosed, we need continuation |
| 2656 | has_unclosed = in_single_quote .or. in_double_quote |
| 2657 | end function |
| 2658 | |
| 2659 | function ends_with_continuation_backslash(line) result(needs_continuation) |
| 2660 | character(len=*), intent(in) :: line |
| 2661 | logical :: needs_continuation |
| 2662 | integer :: i, line_len |
| 2663 | logical :: in_single_quote, in_double_quote |
| 2664 | character :: prev_char |
| 2665 | |
| 2666 | needs_continuation = .false. |
| 2667 | line_len = len_trim(line) |
| 2668 | |
| 2669 | ! Empty line doesn't need continuation |
| 2670 | if (line_len == 0) return |
| 2671 | |
| 2672 | ! Quick check: if line doesn't end with backslash, no continuation needed |
| 2673 | if (line(line_len:line_len) /= '\') return |
| 2674 | |
| 2675 | ! Now we need to check if the trailing backslash is inside quotes |
| 2676 | in_single_quote = .false. |
| 2677 | in_double_quote = .false. |
| 2678 | prev_char = ' ' |
| 2679 | |
| 2680 | do i = 1, line_len |
| 2681 | ! Check for escape character (in double quotes or unquoted) |
| 2682 | if (prev_char == '\' .and. .not. in_single_quote) then |
| 2683 | ! Skip this character as it's escaped |
| 2684 | prev_char = ' ' ! Reset escape |
| 2685 | cycle |
| 2686 | end if |
| 2687 | |
| 2688 | ! Track quote state |
| 2689 | if (line(i:i) == "'" .and. .not. in_double_quote) then |
| 2690 | in_single_quote = .not. in_single_quote |
| 2691 | else if (line(i:i) == '"' .and. .not. in_single_quote) then |
| 2692 | in_double_quote = .not. in_double_quote |
| 2693 | end if |
| 2694 | |
| 2695 | prev_char = line(i:i) |
| 2696 | end do |
| 2697 | |
| 2698 | ! If we're not inside quotes and line ends with backslash, need continuation |
| 2699 | ! (prev_char is backslash at this point since it's the last char) |
| 2700 | needs_continuation = .not. in_single_quote .and. .not. in_double_quote |
| 2701 | end function |
| 2702 | |
| 2703 | ! Detect if a command line has a heredoc operator (<<) whose content hasn't been provided. |
| 2704 | ! Returns the delimiter string if found, empty if no heredoc pending. |
| 2705 | function get_heredoc_delimiter(line) result(delimiter) |
| 2706 | character(len=*), intent(in) :: line |
| 2707 | character(len=256) :: delimiter |
| 2708 | integer :: i, dstart, dend, line_len |
| 2709 | logical :: in_sq, in_dq, strip_tabs, delim_quoted |
| 2710 | |
| 2711 | delimiter = '' |
| 2712 | line_len = len_trim(line) |
| 2713 | in_sq = .false.; in_dq = .false. |
| 2714 | i = 1 |
| 2715 | |
| 2716 | do while (i < line_len) |
| 2717 | ! Track quotes |
| 2718 | if (line(i:i) == "'" .and. .not. in_dq) then |
| 2719 | in_sq = .not. in_sq |
| 2720 | i = i + 1; cycle |
| 2721 | end if |
| 2722 | if (line(i:i) == '"' .and. .not. in_sq) then |
| 2723 | in_dq = .not. in_dq |
| 2724 | i = i + 1; cycle |
| 2725 | end if |
| 2726 | if (in_sq .or. in_dq) then |
| 2727 | i = i + 1; cycle |
| 2728 | end if |
| 2729 | |
| 2730 | ! Look for << not inside quotes (but NOT <<< which is here-string) |
| 2731 | if (line(i:i) == '<' .and. i + 1 <= line_len .and. line(i+1:i+1) == '<') then |
| 2732 | ! Skip <<< (here-string, not heredoc) |
| 2733 | if (i + 2 <= line_len .and. line(i+2:i+2) == '<') then |
| 2734 | i = i + 3 |
| 2735 | cycle |
| 2736 | end if |
| 2737 | i = i + 2 |
| 2738 | ! Skip <<- (strip tabs variant) |
| 2739 | strip_tabs = .false. |
| 2740 | if (i <= line_len .and. line(i:i) == '-') then |
| 2741 | strip_tabs = .true. |
| 2742 | i = i + 1 |
| 2743 | end if |
| 2744 | ! Skip whitespace |
| 2745 | do while (i <= line_len .and. (line(i:i) == ' ' .or. line(i:i) == char(9))) |
| 2746 | i = i + 1 |
| 2747 | end do |
| 2748 | if (i > line_len) return |
| 2749 | |
| 2750 | ! Extract delimiter — may be quoted |
| 2751 | delim_quoted = .false. |
| 2752 | if (line(i:i) == "'" .or. line(i:i) == '"') then |
| 2753 | delim_quoted = .true. |
| 2754 | dstart = i + 1 |
| 2755 | dend = index(line(dstart:line_len), line(i:i)) |
| 2756 | if (dend > 0) then |
| 2757 | dend = dstart + dend - 2 |
| 2758 | else |
| 2759 | dend = line_len ! unclosed quote |
| 2760 | end if |
| 2761 | else |
| 2762 | dstart = i |
| 2763 | dend = i |
| 2764 | do while (dend + 1 <= line_len .and. line(dend+1:dend+1) /= ' ' .and. & |
| 2765 | line(dend+1:dend+1) /= char(9) .and. line(dend+1:dend+1) /= ';') |
| 2766 | dend = dend + 1 |
| 2767 | end do |
| 2768 | end if |
| 2769 | |
| 2770 | if (dend >= dstart) then |
| 2771 | delimiter = line(dstart:dend) |
| 2772 | return |
| 2773 | end if |
| 2774 | end if |
| 2775 | i = i + 1 |
| 2776 | end do |
| 2777 | end function |
| 2778 | |
| 2779 | function needs_compound_continuation(input) result(needs_more) |
| 2780 | use lexer, only: tokenize |
| 2781 | character(len=*), intent(in) :: input |
| 2782 | logical :: needs_more |
| 2783 | type(token_t), allocatable :: tokens(:) |
| 2784 | integer :: num_tokens, i |
| 2785 | integer :: if_depth, do_depth, case_depth, brace_depth |
| 2786 | |
| 2787 | needs_more = .false. |
| 2788 | |
| 2789 | ! Tokenize the input using the lexer |
| 2790 | allocate(tokens(MAX_TOKENS)) |
| 2791 | call tokenize(input, tokens, num_tokens) |
| 2792 | |
| 2793 | if_depth = 0 |
| 2794 | do_depth = 0 |
| 2795 | case_depth = 0 |
| 2796 | brace_depth = 0 |
| 2797 | |
| 2798 | do i = 1, num_tokens |
| 2799 | if (tokens(i)%token_type /= TOKEN_KEYWORD) cycle |
| 2800 | select case (trim(tokens(i)%value)) |
| 2801 | case ('if') |
| 2802 | if_depth = if_depth + 1 |
| 2803 | case ('fi') |
| 2804 | if_depth = if_depth - 1 |
| 2805 | case ('do') |
| 2806 | do_depth = do_depth + 1 |
| 2807 | case ('done') |
| 2808 | do_depth = do_depth - 1 |
| 2809 | case ('case') |
| 2810 | case_depth = case_depth + 1 |
| 2811 | case ('esac') |
| 2812 | case_depth = case_depth - 1 |
| 2813 | case ('{') |
| 2814 | brace_depth = brace_depth + 1 |
| 2815 | case ('}') |
| 2816 | brace_depth = brace_depth - 1 |
| 2817 | end select |
| 2818 | end do |
| 2819 | |
| 2820 | needs_more = (if_depth > 0 .or. do_depth > 0 .or. case_depth > 0 .or. brace_depth > 0) |
| 2821 | end function |
| 2822 | |
| 2823 | function remove_line_continuations(input) result(output) |
| 2824 | character(len=*), intent(in) :: input |
| 2825 | character(len=len(input)) :: output |
| 2826 | integer :: i, j |
| 2827 | |
| 2828 | output = '' |
| 2829 | i = 1 |
| 2830 | j = 1 |
| 2831 | |
| 2832 | do while (i <= len_trim(input)) |
| 2833 | ! Check for backslash followed by newline |
| 2834 | if (i < len_trim(input) .and. input(i:i) == char(92)) then |
| 2835 | if (input(i+1:i+1) == char(10)) then |
| 2836 | ! Skip both the backslash and newline |
| 2837 | i = i + 2 |
| 2838 | cycle |
| 2839 | end if |
| 2840 | end if |
| 2841 | |
| 2842 | ! Copy character to output |
| 2843 | output(j:j) = input(i:i) |
| 2844 | i = i + 1 |
| 2845 | j = j + 1 |
| 2846 | end do |
| 2847 | end function |
| 2848 | |
| 2849 | end module parser |