@@ -20,6 +20,7 @@ module grammar_parser |
| 20 | 20 | type(token_t) :: tokens(MAX_TOKENS) |
| 21 | 21 | integer :: num_tokens = 0 |
| 22 | 22 | integer :: pos = 1 |
| 23 | + integer :: current_line = 1 ! Track line number for LINENO |
| 23 | 24 | logical :: has_error = .false. |
| 24 | 25 | character(len=1024) :: error_msg = '' |
| 25 | 26 | character(len=:), allocatable :: raw_input ! For heredoc extraction |
@@ -134,6 +135,8 @@ contains |
| 134 | 135 | if (trim(tok%value) == ';') then |
| 135 | 136 | sep_type = LIST_SEP_SEQUENTIAL |
| 136 | 137 | call advance(state) |
| 138 | + ! Skip any newlines after semicolon (e.g., semicolon at end of line) |
| 139 | + call skip_newlines(state) |
| 137 | 140 | else if (trim(tok%value) == ';;') then |
| 138 | 141 | ! ;; is only valid in case statements, not here |
| 139 | 142 | write(error_unit, '(A)') 'sh: -c: line 1: syntax error near unexpected token `;;''' |
@@ -148,11 +151,14 @@ contains |
| 148 | 151 | else if (trim(tok%value) == '&') then |
| 149 | 152 | sep_type = LIST_SEP_BACKGROUND |
| 150 | 153 | call advance(state) |
| 154 | + ! Skip any newlines after ampersand (e.g., background at end of line) |
| 155 | + call skip_newlines(state) |
| 151 | 156 | else |
| 152 | 157 | exit |
| 153 | 158 | end if |
| 154 | 159 | else if (tok%token_type == TOKEN_NEWLINE) then |
| 155 | 160 | sep_type = LIST_SEP_SEQUENTIAL |
| 161 | + state%current_line = state%current_line + 1 ! Track this newline for LINENO |
| 156 | 162 | call advance(state) |
| 157 | 163 | ! Skip any additional newlines (e.g., from comment-only lines) |
| 158 | 164 | call skip_newlines(state) |
@@ -257,7 +263,7 @@ contains |
| 257 | 263 | recursive function parse_command_node(state) result(node) |
| 258 | 264 | type(parser_state_t), intent(inout) :: state |
| 259 | 265 | type(command_node_t), pointer :: node |
| 260 | | - type(token_t) :: tok |
| 266 | + type(token_t) :: tok, next_tok |
| 261 | 267 | logical :: is_compound |
| 262 | 268 | tok = current_token(state) |
| 263 | 269 | is_compound = .false. |
@@ -289,7 +295,16 @@ contains |
| 289 | 295 | node => parse_simple_cmd(state) |
| 290 | 296 | end select |
| 291 | 297 | else if (tok%token_type == TOKEN_OPERATOR .and. trim(tok%value) == '(') then |
| 292 | | - node => parse_subshell(state) |
| 298 | + ! Check if this is (( for arithmetic command vs ( for subshell |
| 299 | + ! Key: (( with no space = arithmetic, ( ( with space = nested subshell |
| 300 | + next_tok = peek_token(state%tokens, state%pos + 1) |
| 301 | + if (next_tok%token_type == TOKEN_OPERATOR .and. trim(next_tok%value) == '(' .and. & |
| 302 | + tok%end_pos + 1 == next_tok%start_pos) then |
| 303 | + ! Adjacent (( - treat as arithmetic |
| 304 | + node => parse_arithmetic_command(state) |
| 305 | + else |
| 306 | + node => parse_subshell(state) |
| 307 | + end if |
| 293 | 308 | is_compound = .true. |
| 294 | 309 | else |
| 295 | 310 | node => parse_simple_cmd(state) |
@@ -617,6 +632,7 @@ contains |
| 617 | 632 | end do |
| 618 | 633 | if (num_words > 0) then |
| 619 | 634 | node => create_simple_command(words, num_words) |
| 635 | + node%line = state%current_line ! Track line number for LINENO |
| 620 | 636 | if (associated(node%simple_cmd)) then |
| 621 | 637 | ! Store quoted and escaped flags |
| 622 | 638 | allocate(node%simple_cmd%word_was_quoted(num_words)) |
@@ -655,6 +671,7 @@ contains |
| 655 | 671 | else if (num_assignments > 0) then |
| 656 | 672 | ! Pure assignment(s) with no command - create a node with just assignments |
| 657 | 673 | node => create_simple_command(assignments, num_assignments) |
| 674 | + node%line = state%current_line ! Track line number for LINENO |
| 658 | 675 | if (associated(node%simple_cmd)) then |
| 659 | 676 | ! Mark these as assignments, not command words |
| 660 | 677 | node%simple_cmd%num_words = 0 |
@@ -673,6 +690,7 @@ contains |
| 673 | 690 | words(1) = ':' |
| 674 | 691 | num_words = 1 |
| 675 | 692 | node => create_simple_command(words, num_words) |
| 693 | + node%line = state%current_line ! Track line number for LINENO |
| 676 | 694 | if (associated(node%simple_cmd)) then |
| 677 | 695 | allocate(node%simple_cmd%word_was_quoted(1)) |
| 678 | 696 | node%simple_cmd%word_was_quoted(1) = .false. |
@@ -955,6 +973,15 @@ contains |
| 955 | 973 | call skip_newlines(state) |
| 956 | 974 | commands => parse_list(state) |
| 957 | 975 | call skip_newlines(state) |
| 976 | + ! POSIX: Empty subshell () is a syntax error |
| 977 | + if (.not. associated(commands)) then |
| 978 | + write(error_unit, '(A)') "sh: -c: line 1: syntax error near unexpected token `)'" |
| 979 | + if (allocated(state%raw_input)) then |
| 980 | + write(error_unit, '(A)') "sh: -c: `" // trim(state%raw_input) // "'" |
| 981 | + end if |
| 982 | + state%has_error = .true. |
| 983 | + return |
| 984 | + end if |
| 958 | 985 | if (.not. expect(state, ')')) return |
| 959 | 986 | node => create_subshell(commands) |
| 960 | 987 | end function |
@@ -965,7 +992,7 @@ contains |
| 965 | 992 | type(command_node_t), pointer :: node |
| 966 | 993 | type(token_t) :: tok |
| 967 | 994 | character(len=MAX_TOKEN_LEN) :: arith_expr, words(1) |
| 968 | | - integer :: paren_depth, expr_pos |
| 995 | + integer :: paren_depth, expr_pos, prev_end_pos |
| 969 | 996 | logical :: found_close |
| 970 | 997 | |
| 971 | 998 | nullify(node) |
@@ -980,6 +1007,7 @@ contains |
| 980 | 1007 | expr_pos = 3 |
| 981 | 1008 | paren_depth = 2 |
| 982 | 1009 | found_close = .false. |
| 1010 | + prev_end_pos = -1 ! Track previous token's end position |
| 983 | 1011 | |
| 984 | 1012 | do while (state%pos <= state%num_tokens) |
| 985 | 1013 | tok = current_token(state) |
@@ -990,6 +1018,7 @@ contains |
| 990 | 1018 | paren_depth = paren_depth - 1 |
| 991 | 1019 | arith_expr(expr_pos:expr_pos) = ')' |
| 992 | 1020 | expr_pos = expr_pos + 1 |
| 1021 | + prev_end_pos = tok%end_pos |
| 993 | 1022 | call advance(state) |
| 994 | 1023 | if (paren_depth == 0) then |
| 995 | 1024 | found_close = .true. |
@@ -999,13 +1028,23 @@ contains |
| 999 | 1028 | paren_depth = paren_depth + 1 |
| 1000 | 1029 | arith_expr(expr_pos:expr_pos) = '(' |
| 1001 | 1030 | expr_pos = expr_pos + 1 |
| 1031 | + prev_end_pos = tok%end_pos |
| 1002 | 1032 | call advance(state) |
| 1003 | 1033 | else |
| 1004 | 1034 | ! Add token value to expression |
| 1035 | + ! Only add a space if there was whitespace between this token and the previous one |
| 1036 | + ! in the original source (to preserve adjacent operators like && and ||) |
| 1037 | + if (prev_end_pos >= 0 .and. tok%start_pos > prev_end_pos + 1) then |
| 1038 | + if (expr_pos + 1 <= MAX_TOKEN_LEN) then |
| 1039 | + arith_expr(expr_pos:expr_pos) = ' ' |
| 1040 | + expr_pos = expr_pos + 1 |
| 1041 | + end if |
| 1042 | + end if |
| 1005 | 1043 | if (expr_pos + len_trim(tok%value) <= MAX_TOKEN_LEN) then |
| 1006 | 1044 | arith_expr(expr_pos:expr_pos+len_trim(tok%value)-1) = trim(tok%value) |
| 1007 | 1045 | expr_pos = expr_pos + len_trim(tok%value) |
| 1008 | 1046 | end if |
| 1047 | + prev_end_pos = tok%end_pos |
| 1009 | 1048 | call advance(state) |
| 1010 | 1049 | end if |
| 1011 | 1050 | end do |
@@ -1018,15 +1057,18 @@ contains |
| 1018 | 1057 | ! Create a simple command with the arithmetic expression as the first token |
| 1019 | 1058 | words(1) = arith_expr(1:expr_pos-1) |
| 1020 | 1059 | node => create_simple_command(words, 1) |
| 1060 | + node%line = state%current_line ! Track line number for LINENO |
| 1021 | 1061 | |
| 1022 | 1062 | ! Allocate metadata arrays to prevent segfaults in AST executor |
| 1063 | + ! Mark the arithmetic expression as "quoted" to prevent word splitting |
| 1064 | + ! (the expression is a single unit that should not be split on IFS) |
| 1023 | 1065 | if (associated(node) .and. associated(node%simple_cmd)) then |
| 1024 | 1066 | allocate(node%simple_cmd%word_was_quoted(1)) |
| 1025 | | - node%simple_cmd%word_was_quoted(1) = .false. |
| 1067 | + node%simple_cmd%word_was_quoted(1) = .true. ! Prevent word splitting |
| 1026 | 1068 | allocate(node%simple_cmd%word_was_escaped(1)) |
| 1027 | 1069 | node%simple_cmd%word_was_escaped(1) = .false. |
| 1028 | 1070 | allocate(node%simple_cmd%word_quote_type(1)) |
| 1029 | | - node%simple_cmd%word_quote_type(1) = QUOTE_NONE |
| 1071 | + node%simple_cmd%word_quote_type(1) = QUOTE_DOUBLE ! Treat like double-quoted |
| 1030 | 1072 | allocate(node%simple_cmd%word_lengths(1)) |
| 1031 | 1073 | node%simple_cmd%word_lengths(1) = expr_pos - 1 |
| 1032 | 1074 | end if |
@@ -1049,6 +1091,7 @@ contains |
| 1049 | 1091 | type(token_t) :: tok |
| 1050 | 1092 | tok = current_token(state) |
| 1051 | 1093 | do while (tok%token_type == TOKEN_NEWLINE) |
| 1094 | + state%current_line = state%current_line + 1 ! Track LINENO |
| 1052 | 1095 | call advance(state) |
| 1053 | 1096 | tok = current_token(state) |
| 1054 | 1097 | end do |