| 1 | // Rush shell grammar (Phase 4 - Control Flow) |
| 2 | |
| 3 | // Explicit whitespace handling (no automatic whitespace insertion) |
| 4 | ws = _{ " " | "\t" } |
| 5 | line_comment = _{ "#" ~ (!NEWLINE ~ ANY)* } |
| 6 | |
| 7 | // Top-level input |
| 8 | trailing_sep = _{ (NEWLINE | ";")* } |
| 9 | input = { SOI ~ ws* ~ command_line ~ ws* ~ trailing_sep ~ ws* ~ EOI } |
| 10 | |
| 11 | // Command line: can be multiple complete commands separated by newlines/semicolons |
| 12 | command_line = { (complete_command ~ (separator ~ complete_command)* ~ separator?)? } |
| 13 | |
| 14 | // Complete command: can be a compound command or an and_or list |
| 15 | // Can optionally be run in background with & |
| 16 | // Don't try to parse closing keywords as commands |
| 17 | complete_command = { |
| 18 | (function_definition |
| 19 | | if_statement |
| 20 | | while_statement |
| 21 | | for_statement |
| 22 | | select_statement |
| 23 | | case_statement |
| 24 | | !closing_keyword ~ and_or_list) |
| 25 | ~ background_marker? |
| 26 | } |
| 27 | |
| 28 | // Background execution marker |
| 29 | background_marker = { ws* ~ "&" } |
| 30 | |
| 31 | // Closing keywords that end control flow blocks |
| 32 | // Note: ";;" is for case clauses |
| 33 | closing_keyword = @{ (";;" | "fi" | "done" | "esac" | "elif" | "else") ~ &(ws | NEWLINE | ";" | EOI) } |
| 34 | |
| 35 | // And/Or list: pipelines connected by && or || |
| 36 | and_or_list = { pipeline ~ (ws* ~ and_or_op ~ ws* ~ pipeline)* } |
| 37 | and_or_op = { "&&" | "||" } |
| 38 | |
| 39 | // Pipeline: commands or subshells connected by pipes |
| 40 | // Can be optionally negated with ! prefix |
| 41 | pipeline = { pipeline_negation? ~ pipeline_element ~ (ws* ~ "|" ~ ws* ~ pipeline_element)* } |
| 42 | pipeline_negation = { "!" ~ ws+ } |
| 43 | |
| 44 | // An element in a pipeline can be a simple command, subshell, or extended test |
| 45 | pipeline_element = { extended_test | subshell | simple_command } |
| 46 | |
| 47 | // Control flow statements |
| 48 | // Keywords must be followed by whitespace, separator, or EOI (not word characters) |
| 49 | keyword_boundary = _{ &(ws | NEWLINE | ";" | EOI) } |
| 50 | |
| 51 | if_statement = { |
| 52 | "if" ~ keyword_boundary ~ separator ~ complete_command ~ separator ~ "then" ~ keyword_boundary ~ separator ~ command_list ~ separator? |
| 53 | ~ elif_clause* |
| 54 | ~ else_clause? |
| 55 | ~ "fi" ~ keyword_boundary |
| 56 | } |
| 57 | |
| 58 | elif_clause = { |
| 59 | "elif" ~ keyword_boundary ~ separator ~ complete_command ~ separator ~ "then" ~ keyword_boundary ~ separator ~ command_list ~ separator? |
| 60 | } |
| 61 | |
| 62 | else_clause = { |
| 63 | "else" ~ keyword_boundary ~ separator ~ command_list ~ separator? |
| 64 | } |
| 65 | |
| 66 | while_statement = { |
| 67 | "while" ~ keyword_boundary ~ separator ~ complete_command ~ separator ~ "do" ~ keyword_boundary ~ separator ~ command_list ~ separator? ~ "done" ~ keyword_boundary |
| 68 | } |
| 69 | |
| 70 | for_statement = { |
| 71 | "for" ~ keyword_boundary ~ ws+ ~ var_name ~ ws+ ~ "in" ~ keyword_boundary ~ (ws+ ~ word)* ~ separator ~ "do" ~ keyword_boundary ~ separator ~ command_list ~ separator? ~ "done" ~ keyword_boundary |
| 72 | } |
| 73 | |
| 74 | select_statement = { |
| 75 | "select" ~ keyword_boundary ~ ws+ ~ var_name ~ ws+ ~ "in" ~ keyword_boundary ~ (ws+ ~ word)* ~ separator ~ "do" ~ keyword_boundary ~ separator ~ command_list ~ separator? ~ "done" ~ keyword_boundary |
| 76 | } |
| 77 | |
| 78 | case_statement = { |
| 79 | "case" ~ keyword_boundary ~ ws+ ~ word ~ ws+ ~ "in" ~ keyword_boundary ~ separator ~ case_clause* ~ "esac" ~ keyword_boundary |
| 80 | } |
| 81 | |
| 82 | case_clause = { |
| 83 | ws* ~ pattern ~ (ws* ~ "|" ~ ws* ~ pattern)* ~ ws* ~ ")" ~ separator ~ command_list ~ (NEWLINE | ws)* ~ ";;" ~ separator? |
| 84 | } |
| 85 | |
| 86 | pattern = { word } |
| 87 | |
| 88 | // Subshell: (commands) |
| 89 | // Executes commands in a subprocess |
| 90 | subshell = { "(" ~ separator? ~ command_list ~ separator? ~ ")" } |
| 91 | |
| 92 | // Extended test: [[ expression ]] |
| 93 | // Used for conditional expressions with pattern matching and regex support |
| 94 | extended_test = { "[[" ~ ws+ ~ cond_expr ~ ws+ ~ "]]" } |
| 95 | |
| 96 | // Conditional expression for [[ ]] |
| 97 | cond_expr = { |
| 98 | cond_or |
| 99 | } |
| 100 | |
| 101 | cond_or = { cond_and ~ (ws* ~ "||" ~ ws* ~ cond_and)* } |
| 102 | cond_and = { cond_not ~ (ws* ~ "&&" ~ ws* ~ cond_not)* } |
| 103 | cond_not = { ("!" ~ ws*)? ~ cond_primary } |
| 104 | |
| 105 | cond_primary = { |
| 106 | "(" ~ ws* ~ cond_expr ~ ws* ~ ")" // Grouping |
| 107 | | cond_unary // -z, -n, -f, -d, etc. |
| 108 | | cond_binary // string comparisons, regex |
| 109 | | word // Single word (true if non-empty) |
| 110 | } |
| 111 | |
| 112 | // Unary test operators |
| 113 | cond_unary = { cond_unary_op ~ ws+ ~ word } |
| 114 | cond_unary_op = @{ |
| 115 | "-z" | "-n" // String tests |
| 116 | | "-e" | "-f" | "-d" | "-r" | "-w" | "-x" | "-s" // File tests |
| 117 | | "-L" | "-h" | "-p" | "-S" | "-b" | "-c" // File type tests |
| 118 | } |
| 119 | |
| 120 | // Binary test operators |
| 121 | cond_binary = { word ~ ws+ ~ cond_binary_op ~ ws+ ~ word } |
| 122 | cond_binary_op = @{ |
| 123 | "=~" // Regex match (sets BASH_REMATCH) |
| 124 | | "==" | "=" | "!=" // String comparison (== supports patterns) |
| 125 | | "<" | ">" // String ordering |
| 126 | | "-eq" | "-ne" | "-lt" | "-le" | "-gt" | "-ge" // Numeric comparison |
| 127 | | "-nt" | "-ot" | "-ef" // File comparison |
| 128 | } |
| 129 | |
| 130 | // Function definition: function name { body; } or name() { body; } |
| 131 | function_definition = { |
| 132 | "function" ~ keyword_boundary ~ ws+ ~ function_name ~ separator? ~ "{" ~ separator ~ command_list ~ separator? ~ "}" |
| 133 | | function_name ~ ws* ~ "(" ~ ws* ~ ")" ~ separator? ~ "{" ~ separator ~ command_list ~ separator? ~ "}" |
| 134 | } |
| 135 | |
| 136 | function_name = @{ (ASCII_ALPHA | "_") ~ (ASCII_ALPHANUMERIC | "_")* } |
| 137 | |
| 138 | // Command list: one or more complete commands separated by newlines or semicolons |
| 139 | command_list = { complete_command ~ (separator ~ complete_command)* } |
| 140 | |
| 141 | // Separator: newlines, semicolons, or whitespace |
| 142 | // Don't match ";;" as it's used in case statements |
| 143 | separator = _{ (ws* ~ (NEWLINE | (";" ~ !";")) ~ ws*)+ | ws+ } |
| 144 | |
| 145 | // Simple command: assignments and/or words, with optional redirections |
| 146 | // Assignments must come before words (bash convention) |
| 147 | // Redirects can appear anywhere after words, and must be tried before words to capture fd numbers |
| 148 | simple_command = { |
| 149 | assignment ~ (ws+ ~ assignment)* ~ (ws+ ~ word ~ (ws+ ~ (redirect | word))*)? ~ (ws* ~ redirect)* |
| 150 | | word ~ (ws+ ~ (redirect | word))* ~ (ws* ~ redirect)* |
| 151 | | redirect ~ (ws* ~ redirect)* |
| 152 | } |
| 153 | |
| 154 | // Variable assignment: NAME=value or NAME[index]=value or NAME= (empty value) |
| 155 | assignment = { var_name ~ array_index? ~ "=" ~ word? } |
| 156 | array_index = { "[" ~ array_subscript ~ "]" } |
| 157 | array_subscript = @{ (ASCII_DIGIT+ | var_name) } |
| 158 | var_name = @{ (ASCII_ALPHA | "_") ~ (ASCII_ALPHANUMERIC | "_")* } |
| 159 | |
| 160 | // Redirections: <file, >file, >>file, 2>file, 2>&1, &>file, <<EOF, <<<string, <(cmd), >(cmd) |
| 161 | redirect = { |
| 162 | redirect_stderr_to_stdout // Must come before others to match 2>&1 |
| 163 | | redirect_all_output // &>file or &>>file |
| 164 | | redirect_herestring // <<<string (must come before heredoc) |
| 165 | | redirect_heredoc // <<EOF or <<-EOF |
| 166 | | process_subst_input // <(command) - must come before redirect_input |
| 167 | | process_subst_output // >(command) - must come before redirect_output |
| 168 | | redirect_output_append // >>file |
| 169 | | redirect_output // >file or N>file |
| 170 | | redirect_input // <file |
| 171 | } |
| 172 | |
| 173 | redirect_input = { "<" ~ ws* ~ word } |
| 174 | redirect_output = { fd_number? ~ ">" ~ ws* ~ word } |
| 175 | redirect_output_append = { fd_number? ~ ">>" ~ ws* ~ word } |
| 176 | redirect_stderr_to_stdout = { "2>&1" } |
| 177 | redirect_all_output = { "&>" ~ ">"? ~ ws* ~ word } // &>file or &>>file |
| 178 | |
| 179 | // Heredoc: <<EOF or <<-EOF (content collected separately by parser) |
| 180 | redirect_heredoc = { "<<" ~ "-"? ~ ws* ~ heredoc_delimiter } |
| 181 | // Delimiter can be quoted (literal) or unquoted (expand) |
| 182 | heredoc_delimiter = { quoted_string | bare_delimiter } |
| 183 | bare_delimiter = @{ (ASCII_ALPHA | "_") ~ (ASCII_ALPHANUMERIC | "_")* } |
| 184 | |
| 185 | // Herestring: <<<string |
| 186 | redirect_herestring = { "<<<" ~ ws* ~ word } |
| 187 | |
| 188 | // Process substitution: <(command) and >(command) |
| 189 | // Creates named pipes (FIFOs) that can be used as file arguments |
| 190 | process_subst_input = { "<(" ~ ws* ~ command_subst_content ~ ")" } |
| 191 | process_subst_output = { ">(" ~ ws* ~ command_subst_content ~ ")" } |
| 192 | |
| 193 | fd_number = @{ ASCII_DIGIT+ } |
| 194 | |
| 195 | // Word: a sequence of word parts (can mix literal text, variables, command substitution) |
| 196 | // Parts are concatenated without whitespace (so "hello$VAR" is one word, "hello $VAR" is two) |
| 197 | word = { word_part+ } |
| 198 | |
| 199 | word_part = { |
| 200 | quoted_string |
| 201 | | var_expansion |
| 202 | | arithmetic_expansion |
| 203 | | command_substitution |
| 204 | | backtick_substitution |
| 205 | | array_literal |
| 206 | | extglob_pattern |
| 207 | | bare_word_part |
| 208 | } |
| 209 | |
| 210 | // Backtick command substitution: `command` |
| 211 | // Traditional syntax, supports nested backticks via escaping: `echo \`inner\`` |
| 212 | backtick_substitution = { "`" ~ backtick_content ~ "`" } |
| 213 | backtick_content = @{ ("\\" ~ ANY | !"`" ~ ANY)* } |
| 214 | |
| 215 | // Array literal: (elem1 elem2 elem3) |
| 216 | array_literal = { "(" ~ ws* ~ (word ~ (ws+ ~ word)*)? ~ ws* ~ ")" } |
| 217 | |
| 218 | // Extended glob patterns: !(pat), ?(pat), *(pat), +(pat), @(pat) |
| 219 | // Note: These are treated as literal text for glob expansion later |
| 220 | extglob_pattern = @{ |
| 221 | ("!" | "?" | "*" | "+" | "@") ~ "(" ~ extglob_content ~ ")" |
| 222 | } |
| 223 | extglob_content = { (!(")" | NEWLINE) ~ ANY)* } |
| 224 | |
| 225 | // Variable expansion: $VAR or ${VAR} or ${VAR:-default} or ${#VAR} or ${VAR#pattern} etc. |
| 226 | var_expansion = { |
| 227 | "${" ~ "#" ~ var_name ~ "[" ~ ("@" | "*") ~ "]" ~ "}" // ${#arr[@]} - array length |
| 228 | | "${" ~ "!" ~ var_name ~ "[" ~ ("@" | "*") ~ "]" ~ "}" // ${!arr[@]} - array indices |
| 229 | | "${" ~ "!" ~ var_name ~ "}" // ${!VAR} - indirect expansion |
| 230 | | "${" ~ "#" ~ var_name ~ "}" // ${#VAR} - variable length |
| 231 | | "${" ~ var_name ~ "[" ~ array_subscript ~ "]" ~ "}" // ${arr[index]} - array element |
| 232 | | "${" ~ var_name ~ "[" ~ ("@" | "*") ~ "]" ~ "}" // ${arr[@]} or ${arr[*]} - all elements |
| 233 | | "${" ~ var_name ~ var_modifier? ~ "}" // ${VAR} or ${VAR...} |
| 234 | | "$" ~ special_var // $?, $#, $@, $*, $0, $1, etc. |
| 235 | | "$" ~ var_name // $VAR |
| 236 | } |
| 237 | |
| 238 | // Special shell variables: $?, $#, $@, $*, $0, $1, $2, ..., $$, $!, etc. |
| 239 | special_var = @{ |
| 240 | "?" // Exit status of last command |
| 241 | | "#" // Number of positional parameters |
| 242 | | "@" // All positional parameters (as separate words) |
| 243 | | "*" // All positional parameters (as single word) |
| 244 | | "$" // Process ID |
| 245 | | "!" // Background job PID |
| 246 | | "0" // Shell/script name |
| 247 | | ASCII_DIGIT+ // Positional parameters: $1, $2, ..., $10, etc. |
| 248 | } |
| 249 | |
| 250 | var_modifier = { |
| 251 | // Colon variants (check empty AND unset) |
| 252 | ":-" ~ word // ${VAR:-default} |
| 253 | | ":=" ~ word // ${VAR:=default} - assign default |
| 254 | | ":+" ~ word // ${VAR:+alternate} - use if set |
| 255 | | ":?" ~ word // ${VAR:?message} - error if unset |
| 256 | // Non-colon variants (check unset only) - must come after colon variants |
| 257 | | "-" ~ word // ${VAR-default} - use default only if unset |
| 258 | | "=" ~ word // ${VAR=default} - assign only if unset |
| 259 | | "+" ~ word // ${VAR+alternate} - use if set (even if empty) |
| 260 | | "?" ~ word // ${VAR?message} - error only if unset |
| 261 | | "##" ~ prefix_suffix_pattern // ${VAR##pattern} - remove longest prefix |
| 262 | | "#" ~ prefix_suffix_pattern // ${VAR#pattern} - remove shortest prefix |
| 263 | | "%%" ~ prefix_suffix_pattern // ${VAR%%pattern} - remove longest suffix |
| 264 | | "%" ~ prefix_suffix_pattern // ${VAR%pattern} - remove shortest suffix |
| 265 | | "//" ~ replacement_pattern ~ "/" ~ replacement_pattern // ${VAR//pattern/replacement} |
| 266 | | "/" ~ replacement_pattern ~ "/" ~ replacement_pattern // ${VAR/pattern/replacement} |
| 267 | | ":" ~ var_offset ~ (":" ~ var_length)? // ${VAR:offset} or ${VAR:offset:length} |
| 268 | | "^^" // ${VAR^^} - uppercase all |
| 269 | | "^" // ${VAR^} - uppercase first |
| 270 | | ",," // ${VAR,,} - lowercase all |
| 271 | | "," // ${VAR,} - lowercase first |
| 272 | | "@" ~ transform_op // ${VAR@Q}, ${VAR@E}, etc. - transformation |
| 273 | } |
| 274 | |
| 275 | // Transformation operators for ${VAR@op} |
| 276 | transform_op = @{ "Q" | "E" | "P" | "A" | "K" | "a" | "u" | "L" | "U" } |
| 277 | |
| 278 | // Pattern for prefix/suffix removal (can contain /) |
| 279 | prefix_suffix_pattern = @{ (!("}" | NEWLINE) ~ ANY)+ } |
| 280 | // Pattern for replacement (cannot contain / as it's a separator) |
| 281 | replacement_pattern = @{ (!("/" | "}" | NEWLINE) ~ ANY)+ } |
| 282 | var_offset = @{ ("-"? ~ ASCII_DIGIT+) } |
| 283 | var_length = @{ ASCII_DIGIT+ } |
| 284 | |
| 285 | // Arithmetic expansion: $((expression)) |
| 286 | arithmetic_expansion = { "$((" ~ arithmetic_expr_content ~ "))" } |
| 287 | arithmetic_expr_content = @{ (!("))" | NEWLINE) ~ ANY)* } |
| 288 | |
| 289 | // Command substitution: $(command) |
| 290 | // Handles nested parentheses: $(echo $(echo inner)) |
| 291 | command_substitution = { "$(" ~ command_subst_content ~ ")" } |
| 292 | command_subst_content = { command_subst_part* } |
| 293 | command_subst_part = { |
| 294 | nested_parens |
| 295 | | command_subst_text |
| 296 | } |
| 297 | nested_parens = { "(" ~ command_subst_content ~ ")" } |
| 298 | command_subst_text = { (!(")" | "(" | NEWLINE) ~ ANY)+ } |
| 299 | |
| 300 | // Reserved keywords that cannot be used as bare words |
| 301 | keyword = { ("if" | "then" | "elif" | "else" | "fi" | "while" | "do" | "done" | "for" | "in" | "case" | "esac") ~ &(ws | NEWLINE | ";" | EOI) } |
| 302 | |
| 303 | // Bare word part: literal text (no special characters) |
| 304 | // Note: We exclude $, |, &, <, >, ;, ), }, ( to handle expansion, pipes, operators, redirects, separators, case patterns, var expansion, and array literals |
| 305 | // "=" is allowed in words (needed for test command), assignments use explicit var_name pattern |
| 306 | // "(" is excluded so that array literals like (a b c) can be recognized |
| 307 | // Keywords are reserved and cannot be used as bare words |
| 308 | bare_word_part = @{ !keyword ~ (!(" " | "\t" | NEWLINE | "#" | "\"" | "'" | "$" | "|" | "&" | "<" | ">" | ";" | ")" | "}" | "(") ~ ANY)+ } |
| 309 | |
| 310 | // Quoted strings (single quotes prevent expansion, double quotes allow it) |
| 311 | quoted_string = { double_quoted | single_quoted } |
| 312 | |
| 313 | // Double quotes: allow variable expansion and command substitution inside |
| 314 | double_quoted = { "\"" ~ double_quoted_content ~ "\"" } |
| 315 | double_quoted_content = { double_quoted_part* } |
| 316 | double_quoted_part = { |
| 317 | var_expansion |
| 318 | | arithmetic_expansion |
| 319 | | command_substitution |
| 320 | | backtick_substitution |
| 321 | | double_quoted_text |
| 322 | } |
| 323 | double_quoted_text = @{ (!"\"" ~ !"$" ~ !"`" ~ ANY)+ } |
| 324 | |
| 325 | // Single quotes: no expansion (literal) |
| 326 | single_quoted = @{ "'" ~ (!"'" ~ ANY)* ~ "'" } |
| 327 | |
| 328 | NEWLINE = _{ "\n" | "\r\n" } |
| 329 |