Text · 13825 bytes Raw Blame History
1 // Rush shell grammar (Phase 4 - Control Flow)
2
3 // Explicit whitespace handling (no automatic whitespace insertion)
4 ws = _{ " " | "\t" }
5 line_comment = _{ "#" ~ (!NEWLINE ~ ANY)* }
6
7 // Top-level input
8 trailing_sep = _{ (NEWLINE | ";")* }
9 input = { SOI ~ ws* ~ command_line ~ ws* ~ trailing_sep ~ ws* ~ EOI }
10
11 // Command line: can be multiple complete commands separated by newlines/semicolons
12 command_line = { (complete_command ~ (separator ~ complete_command)* ~ separator?)? }
13
14 // Complete command: can be a compound command or an and_or list
15 // Can optionally be run in background with &
16 // Don't try to parse closing keywords as commands
17 complete_command = {
18 (function_definition
19 | if_statement
20 | while_statement
21 | for_statement
22 | select_statement
23 | case_statement
24 | !closing_keyword ~ and_or_list)
25 ~ background_marker?
26 }
27
28 // Background execution marker
29 background_marker = { ws* ~ "&" }
30
31 // Closing keywords that end control flow blocks
32 // Note: ";;" is for case clauses
33 closing_keyword = @{ (";;" | "fi" | "done" | "esac" | "elif" | "else") ~ &(ws | NEWLINE | ";" | EOI) }
34
35 // And/Or list: pipelines connected by && or ||
36 and_or_list = { pipeline ~ (ws* ~ and_or_op ~ ws* ~ pipeline)* }
37 and_or_op = { "&&" | "||" }
38
39 // Pipeline: commands or subshells connected by pipes
40 // Can be optionally negated with ! prefix
41 pipeline = { pipeline_negation? ~ pipeline_element ~ (ws* ~ "|" ~ ws* ~ pipeline_element)* }
42 pipeline_negation = { "!" ~ ws+ }
43
44 // An element in a pipeline can be a simple command, subshell, or extended test
45 pipeline_element = { extended_test | subshell | simple_command }
46
47 // Control flow statements
48 // Keywords must be followed by whitespace, separator, or EOI (not word characters)
49 keyword_boundary = _{ &(ws | NEWLINE | ";" | EOI) }
50
51 if_statement = {
52 "if" ~ keyword_boundary ~ separator ~ complete_command ~ separator ~ "then" ~ keyword_boundary ~ separator ~ command_list ~ separator?
53 ~ elif_clause*
54 ~ else_clause?
55 ~ "fi" ~ keyword_boundary
56 }
57
58 elif_clause = {
59 "elif" ~ keyword_boundary ~ separator ~ complete_command ~ separator ~ "then" ~ keyword_boundary ~ separator ~ command_list ~ separator?
60 }
61
62 else_clause = {
63 "else" ~ keyword_boundary ~ separator ~ command_list ~ separator?
64 }
65
66 while_statement = {
67 "while" ~ keyword_boundary ~ separator ~ complete_command ~ separator ~ "do" ~ keyword_boundary ~ separator ~ command_list ~ separator? ~ "done" ~ keyword_boundary
68 }
69
70 for_statement = {
71 "for" ~ keyword_boundary ~ ws+ ~ var_name ~ ws+ ~ "in" ~ keyword_boundary ~ (ws+ ~ word)* ~ separator ~ "do" ~ keyword_boundary ~ separator ~ command_list ~ separator? ~ "done" ~ keyword_boundary
72 }
73
74 select_statement = {
75 "select" ~ keyword_boundary ~ ws+ ~ var_name ~ ws+ ~ "in" ~ keyword_boundary ~ (ws+ ~ word)* ~ separator ~ "do" ~ keyword_boundary ~ separator ~ command_list ~ separator? ~ "done" ~ keyword_boundary
76 }
77
78 case_statement = {
79 "case" ~ keyword_boundary ~ ws+ ~ word ~ ws+ ~ "in" ~ keyword_boundary ~ separator ~ case_clause* ~ "esac" ~ keyword_boundary
80 }
81
82 case_clause = {
83 ws* ~ pattern ~ (ws* ~ "|" ~ ws* ~ pattern)* ~ ws* ~ ")" ~ separator ~ command_list ~ (NEWLINE | ws)* ~ ";;" ~ separator?
84 }
85
86 pattern = { word }
87
88 // Subshell: (commands)
89 // Executes commands in a subprocess
90 subshell = { "(" ~ separator? ~ command_list ~ separator? ~ ")" }
91
92 // Extended test: [[ expression ]]
93 // Used for conditional expressions with pattern matching and regex support
94 extended_test = { "[[" ~ ws+ ~ cond_expr ~ ws+ ~ "]]" }
95
96 // Conditional expression for [[ ]]
97 cond_expr = {
98 cond_or
99 }
100
101 cond_or = { cond_and ~ (ws* ~ "||" ~ ws* ~ cond_and)* }
102 cond_and = { cond_not ~ (ws* ~ "&&" ~ ws* ~ cond_not)* }
103 cond_not = { ("!" ~ ws*)? ~ cond_primary }
104
105 cond_primary = {
106 "(" ~ ws* ~ cond_expr ~ ws* ~ ")" // Grouping
107 | cond_unary // -z, -n, -f, -d, etc.
108 | cond_binary // string comparisons, regex
109 | word // Single word (true if non-empty)
110 }
111
112 // Unary test operators
113 cond_unary = { cond_unary_op ~ ws+ ~ word }
114 cond_unary_op = @{
115 "-z" | "-n" // String tests
116 | "-e" | "-f" | "-d" | "-r" | "-w" | "-x" | "-s" // File tests
117 | "-L" | "-h" | "-p" | "-S" | "-b" | "-c" // File type tests
118 }
119
120 // Binary test operators
121 cond_binary = { word ~ ws+ ~ cond_binary_op ~ ws+ ~ word }
122 cond_binary_op = @{
123 "=~" // Regex match (sets BASH_REMATCH)
124 | "==" | "=" | "!=" // String comparison (== supports patterns)
125 | "<" | ">" // String ordering
126 | "-eq" | "-ne" | "-lt" | "-le" | "-gt" | "-ge" // Numeric comparison
127 | "-nt" | "-ot" | "-ef" // File comparison
128 }
129
130 // Function definition: function name { body; } or name() { body; }
131 function_definition = {
132 "function" ~ keyword_boundary ~ ws+ ~ function_name ~ separator? ~ "{" ~ separator ~ command_list ~ separator? ~ "}"
133 | function_name ~ ws* ~ "(" ~ ws* ~ ")" ~ separator? ~ "{" ~ separator ~ command_list ~ separator? ~ "}"
134 }
135
136 function_name = @{ (ASCII_ALPHA | "_") ~ (ASCII_ALPHANUMERIC | "_")* }
137
138 // Command list: one or more complete commands separated by newlines or semicolons
139 command_list = { complete_command ~ (separator ~ complete_command)* }
140
141 // Separator: newlines, semicolons, or whitespace
142 // Don't match ";;" as it's used in case statements
143 separator = _{ (ws* ~ (NEWLINE | (";" ~ !";")) ~ ws*)+ | ws+ }
144
145 // Simple command: assignments and/or words, with optional redirections
146 // Assignments must come before words (bash convention)
147 // Redirects can appear anywhere after words, and must be tried before words to capture fd numbers
148 simple_command = {
149 assignment ~ (ws+ ~ assignment)* ~ (ws+ ~ word ~ (ws+ ~ (redirect | word))*)? ~ (ws* ~ redirect)*
150 | word ~ (ws+ ~ (redirect | word))* ~ (ws* ~ redirect)*
151 | redirect ~ (ws* ~ redirect)*
152 }
153
154 // Variable assignment: NAME=value or NAME[index]=value or NAME= (empty value)
155 assignment = { var_name ~ array_index? ~ "=" ~ word? }
156 array_index = { "[" ~ array_subscript ~ "]" }
157 array_subscript = @{ (ASCII_DIGIT+ | var_name) }
158 var_name = @{ (ASCII_ALPHA | "_") ~ (ASCII_ALPHANUMERIC | "_")* }
159
160 // Redirections: <file, >file, >>file, 2>file, 2>&1, &>file, <<EOF, <<<string, <(cmd), >(cmd)
161 redirect = {
162 redirect_stderr_to_stdout // Must come before others to match 2>&1
163 | redirect_all_output // &>file or &>>file
164 | redirect_herestring // <<<string (must come before heredoc)
165 | redirect_heredoc // <<EOF or <<-EOF
166 | process_subst_input // <(command) - must come before redirect_input
167 | process_subst_output // >(command) - must come before redirect_output
168 | redirect_output_append // >>file
169 | redirect_output // >file or N>file
170 | redirect_input // <file
171 }
172
173 redirect_input = { "<" ~ ws* ~ word }
174 redirect_output = { fd_number? ~ ">" ~ ws* ~ word }
175 redirect_output_append = { fd_number? ~ ">>" ~ ws* ~ word }
176 redirect_stderr_to_stdout = { "2>&1" }
177 redirect_all_output = { "&>" ~ ">"? ~ ws* ~ word } // &>file or &>>file
178
179 // Heredoc: <<EOF or <<-EOF (content collected separately by parser)
180 redirect_heredoc = { "<<" ~ "-"? ~ ws* ~ heredoc_delimiter }
181 // Delimiter can be quoted (literal) or unquoted (expand)
182 heredoc_delimiter = { quoted_string | bare_delimiter }
183 bare_delimiter = @{ (ASCII_ALPHA | "_") ~ (ASCII_ALPHANUMERIC | "_")* }
184
185 // Herestring: <<<string
186 redirect_herestring = { "<<<" ~ ws* ~ word }
187
188 // Process substitution: <(command) and >(command)
189 // Creates named pipes (FIFOs) that can be used as file arguments
190 process_subst_input = { "<(" ~ ws* ~ command_subst_content ~ ")" }
191 process_subst_output = { ">(" ~ ws* ~ command_subst_content ~ ")" }
192
193 fd_number = @{ ASCII_DIGIT+ }
194
195 // Word: a sequence of word parts (can mix literal text, variables, command substitution)
196 // Parts are concatenated without whitespace (so "hello$VAR" is one word, "hello $VAR" is two)
197 word = { word_part+ }
198
199 word_part = {
200 quoted_string
201 | var_expansion
202 | arithmetic_expansion
203 | command_substitution
204 | backtick_substitution
205 | array_literal
206 | extglob_pattern
207 | bare_word_part
208 }
209
210 // Backtick command substitution: `command`
211 // Traditional syntax, supports nested backticks via escaping: `echo \`inner\``
212 backtick_substitution = { "`" ~ backtick_content ~ "`" }
213 backtick_content = @{ ("\\" ~ ANY | !"`" ~ ANY)* }
214
215 // Array literal: (elem1 elem2 elem3)
216 array_literal = { "(" ~ ws* ~ (word ~ (ws+ ~ word)*)? ~ ws* ~ ")" }
217
218 // Extended glob patterns: !(pat), ?(pat), *(pat), +(pat), @(pat)
219 // Note: These are treated as literal text for glob expansion later
220 extglob_pattern = @{
221 ("!" | "?" | "*" | "+" | "@") ~ "(" ~ extglob_content ~ ")"
222 }
223 extglob_content = { (!(")" | NEWLINE) ~ ANY)* }
224
225 // Variable expansion: $VAR or ${VAR} or ${VAR:-default} or ${#VAR} or ${VAR#pattern} etc.
226 var_expansion = {
227 "${" ~ "#" ~ var_name ~ "[" ~ ("@" | "*") ~ "]" ~ "}" // ${#arr[@]} - array length
228 | "${" ~ "!" ~ var_name ~ "[" ~ ("@" | "*") ~ "]" ~ "}" // ${!arr[@]} - array indices
229 | "${" ~ "!" ~ var_name ~ "}" // ${!VAR} - indirect expansion
230 | "${" ~ "#" ~ var_name ~ "}" // ${#VAR} - variable length
231 | "${" ~ var_name ~ "[" ~ array_subscript ~ "]" ~ "}" // ${arr[index]} - array element
232 | "${" ~ var_name ~ "[" ~ ("@" | "*") ~ "]" ~ "}" // ${arr[@]} or ${arr[*]} - all elements
233 | "${" ~ var_name ~ var_modifier? ~ "}" // ${VAR} or ${VAR...}
234 | "$" ~ special_var // $?, $#, $@, $*, $0, $1, etc.
235 | "$" ~ var_name // $VAR
236 }
237
238 // Special shell variables: $?, $#, $@, $*, $0, $1, $2, ..., $$, $!, etc.
239 special_var = @{
240 "?" // Exit status of last command
241 | "#" // Number of positional parameters
242 | "@" // All positional parameters (as separate words)
243 | "*" // All positional parameters (as single word)
244 | "$" // Process ID
245 | "!" // Background job PID
246 | "0" // Shell/script name
247 | ASCII_DIGIT+ // Positional parameters: $1, $2, ..., $10, etc.
248 }
249
250 var_modifier = {
251 // Colon variants (check empty AND unset)
252 ":-" ~ word // ${VAR:-default}
253 | ":=" ~ word // ${VAR:=default} - assign default
254 | ":+" ~ word // ${VAR:+alternate} - use if set
255 | ":?" ~ word // ${VAR:?message} - error if unset
256 // Non-colon variants (check unset only) - must come after colon variants
257 | "-" ~ word // ${VAR-default} - use default only if unset
258 | "=" ~ word // ${VAR=default} - assign only if unset
259 | "+" ~ word // ${VAR+alternate} - use if set (even if empty)
260 | "?" ~ word // ${VAR?message} - error only if unset
261 | "##" ~ prefix_suffix_pattern // ${VAR##pattern} - remove longest prefix
262 | "#" ~ prefix_suffix_pattern // ${VAR#pattern} - remove shortest prefix
263 | "%%" ~ prefix_suffix_pattern // ${VAR%%pattern} - remove longest suffix
264 | "%" ~ prefix_suffix_pattern // ${VAR%pattern} - remove shortest suffix
265 | "//" ~ replacement_pattern ~ "/" ~ replacement_pattern // ${VAR//pattern/replacement}
266 | "/" ~ replacement_pattern ~ "/" ~ replacement_pattern // ${VAR/pattern/replacement}
267 | ":" ~ var_offset ~ (":" ~ var_length)? // ${VAR:offset} or ${VAR:offset:length}
268 | "^^" // ${VAR^^} - uppercase all
269 | "^" // ${VAR^} - uppercase first
270 | ",," // ${VAR,,} - lowercase all
271 | "," // ${VAR,} - lowercase first
272 | "@" ~ transform_op // ${VAR@Q}, ${VAR@E}, etc. - transformation
273 }
274
275 // Transformation operators for ${VAR@op}
276 transform_op = @{ "Q" | "E" | "P" | "A" | "K" | "a" | "u" | "L" | "U" }
277
278 // Pattern for prefix/suffix removal (can contain /)
279 prefix_suffix_pattern = @{ (!("}" | NEWLINE) ~ ANY)+ }
280 // Pattern for replacement (cannot contain / as it's a separator)
281 replacement_pattern = @{ (!("/" | "}" | NEWLINE) ~ ANY)+ }
282 var_offset = @{ ("-"? ~ ASCII_DIGIT+) }
283 var_length = @{ ASCII_DIGIT+ }
284
285 // Arithmetic expansion: $((expression))
286 arithmetic_expansion = { "$((" ~ arithmetic_expr_content ~ "))" }
287 arithmetic_expr_content = @{ (!("))" | NEWLINE) ~ ANY)* }
288
289 // Command substitution: $(command)
290 // Handles nested parentheses: $(echo $(echo inner))
291 command_substitution = { "$(" ~ command_subst_content ~ ")" }
292 command_subst_content = { command_subst_part* }
293 command_subst_part = {
294 nested_parens
295 | command_subst_text
296 }
297 nested_parens = { "(" ~ command_subst_content ~ ")" }
298 command_subst_text = { (!(")" | "(" | NEWLINE) ~ ANY)+ }
299
300 // Reserved keywords that cannot be used as bare words
301 keyword = { ("if" | "then" | "elif" | "else" | "fi" | "while" | "do" | "done" | "for" | "in" | "case" | "esac") ~ &(ws | NEWLINE | ";" | EOI) }
302
303 // Bare word part: literal text (no special characters)
304 // Note: We exclude $, |, &, <, >, ;, ), }, ( to handle expansion, pipes, operators, redirects, separators, case patterns, var expansion, and array literals
305 // "=" is allowed in words (needed for test command), assignments use explicit var_name pattern
306 // "(" is excluded so that array literals like (a b c) can be recognized
307 // Keywords are reserved and cannot be used as bare words
308 bare_word_part = @{ !keyword ~ (!(" " | "\t" | NEWLINE | "#" | "\"" | "'" | "$" | "|" | "&" | "<" | ">" | ";" | ")" | "}" | "(") ~ ANY)+ }
309
310 // Quoted strings (single quotes prevent expansion, double quotes allow it)
311 quoted_string = { double_quoted | single_quoted }
312
313 // Double quotes: allow variable expansion and command substitution inside
314 double_quoted = { "\"" ~ double_quoted_content ~ "\"" }
315 double_quoted_content = { double_quoted_part* }
316 double_quoted_part = {
317 var_expansion
318 | arithmetic_expansion
319 | command_substitution
320 | backtick_substitution
321 | double_quoted_text
322 }
323 double_quoted_text = @{ (!"\"" ~ !"$" ~ !"`" ~ ANY)+ }
324
325 // Single quotes: no expansion (literal)
326 single_quoted = @{ "'" ~ (!"'" ~ ANY)* ~ "'" }
327
328 NEWLINE = _{ "\n" | "\r\n" }
329