Rust · 37218 bytes Raw Blame History
1 //! Pratt parser for Fortran expressions.
2 //!
3 //! 12 precedence levels matching the Fortran standard exactly.
4 //! Handles right-associative **, non-associative comparisons,
5 //! unary operators, function calls, array constructors, and
6 //! component access chains.
7
8 use super::{ParseError, Parser};
9 use crate::ast::expr::*;
10 use crate::ast::Spanned;
11 use crate::lexer::{Span, TokenKind};
12
13 /// Binding power for Pratt parsing.
14 /// Higher = tighter binding. Each level has left and right binding power.
15 /// For left-associative: right bp > left bp.
16 /// For right-associative: left bp > right bp.
17 /// For non-associative: left bp == right bp (disallows chaining).
18 #[derive(Debug, Clone, Copy)]
19 pub(crate) struct Bp {
20 pub(crate) left: u8,
21 pub(crate) right: u8,
22 }
23
24 // Precedence levels (from Fortran standard, lowest to highest).
25 // We use even numbers for left bp, odd for right, to create the half-levels
26 // needed for associativity.
27 const BP_DEFINED_BINARY: Bp = Bp { left: 2, right: 3 }; // .myop. (binary)
28 const BP_EQV: Bp = Bp { left: 4, right: 5 }; // .eqv., .neqv.
29 const BP_OR: Bp = Bp { left: 6, right: 7 }; // .or.
30 const BP_AND: Bp = Bp { left: 8, right: 9 }; // .and.
31 const BP_NOT: u8 = 10; // .not. (unary, right)
32 const BP_COMPARISON: Bp = Bp {
33 left: 12,
34 right: 12,
35 }; // ==, /=, <, >, <=, >= (non-assoc)
36 const BP_CONCAT: Bp = Bp {
37 left: 14,
38 right: 15,
39 }; // //
40 const BP_ADD: Bp = Bp {
41 left: 16,
42 right: 17,
43 }; // +, - (binary)
44 const BP_UNARY_ADD: u8 = 18; // +, - (unary)
45 pub(crate) const BP_MUL: Bp = Bp {
46 left: 20,
47 right: 21,
48 }; // *, /
49 const BP_POW: Bp = Bp {
50 left: 23,
51 right: 22,
52 }; // ** (RIGHT-assoc: left > right)
53 const BP_DEFINED_UNARY: u8 = 24; // .myop. (unary)
54
55 impl<'a> Parser<'a> {
56 /// Parse an expression.
57 pub fn parse_expr(&mut self) -> Result<SpannedExpr, ParseError> {
58 self.parse_expr_bp(0)
59 }
60
61 /// Parse an expression with at least `min_bp` binding power (Pratt core).
62 pub fn parse_expr_bp(&mut self, min_bp: u8) -> Result<SpannedExpr, ParseError> {
63 const EXPR_NESTING_LIMIT: usize = 1024;
64
65 if self.expr_depth >= EXPR_NESTING_LIMIT {
66 return Err(self.error("expression nesting exceeds parser limit".into()));
67 }
68
69 self.expr_depth += 1;
70 let result = (|| {
71 // Parse prefix (atom or unary operator).
72 let mut left = self.parse_prefix()?;
73
74 // Loop: consume infix operators with sufficient binding power.
75 loop {
76 // Check for postfix operations: function call, component access.
77 left = self.parse_postfix(left)?;
78
79 // Check for infix operator.
80 let Some(bp) = self.infix_bp() else { break };
81 if bp.left < min_bp {
82 break;
83 }
84
85 // Non-associative operators: if left_bp == right_bp and we're at the
86 // same precedence level, reject chaining (e.g., a < b < c is illegal).
87 if bp.left == bp.right && bp.left == min_bp {
88 return Err(self.error(
89 "chained comparison operators are not allowed in Fortran (non-associative)"
90 .into(),
91 ));
92 }
93
94 let op_token = self.advance().clone();
95 let op = token_to_binary_op(&op_token)?;
96 let right = self.parse_expr_bp(bp.right)?;
97
98 let span = Span {
99 file_id: left.span.file_id,
100 start: left.span.start,
101 end: right.span.end,
102 };
103
104 left = Spanned::new(
105 Expr::BinaryOp {
106 op,
107 left: Box::new(left),
108 right: Box::new(right),
109 },
110 span,
111 );
112 }
113
114 Ok(left)
115 })();
116 self.expr_depth -= 1;
117 result
118 }
119
120 /// Parse a prefix expression (atom, unary operator, parenthesized expr).
121 fn parse_prefix(&mut self) -> Result<SpannedExpr, ParseError> {
122 let start = self.current_span();
123
124 match self.peek().clone() {
125 // Unary operators.
126 TokenKind::Plus => {
127 self.advance();
128 let operand = self.parse_expr_bp(BP_UNARY_ADD)?;
129 let span = span_from_to(start, operand.span);
130 Ok(Spanned::new(
131 Expr::UnaryOp {
132 op: UnaryOp::Plus,
133 operand: Box::new(operand),
134 },
135 span,
136 ))
137 }
138 TokenKind::Minus => {
139 self.advance();
140 let operand = self.parse_expr_bp(BP_UNARY_ADD)?;
141 let span = span_from_to(start, operand.span);
142 Ok(Spanned::new(
143 Expr::UnaryOp {
144 op: UnaryOp::Minus,
145 operand: Box::new(operand),
146 },
147 span,
148 ))
149 }
150 TokenKind::DotOp(ref name) if name == "not" => {
151 self.advance();
152 let operand = self.parse_expr_bp(BP_NOT)?;
153 let span = span_from_to(start, operand.span);
154 Ok(Spanned::new(
155 Expr::UnaryOp {
156 op: UnaryOp::Not,
157 operand: Box::new(operand),
158 },
159 span,
160 ))
161 }
162 TokenKind::DefinedOp(ref name) => {
163 let op_name = name.clone();
164 self.advance();
165 let operand = self.parse_expr_bp(BP_DEFINED_UNARY)?;
166 let span = span_from_to(start, operand.span);
167 Ok(Spanned::new(
168 Expr::UnaryOp {
169 op: UnaryOp::Defined(op_name),
170 operand: Box::new(operand),
171 },
172 span,
173 ))
174 }
175
176 // Parenthesized expression or array constructor (/ ... /).
177 TokenKind::LParen => {
178 self.advance();
179 // Check for (/ ... /) array constructor.
180 if self.eat(&TokenKind::Slash) {
181 return self.parse_array_constructor_slash(start);
182 }
183 let inner = self.parse_expr()?;
184 // Check for complex literal: (expr, expr)
185 if self.eat(&TokenKind::Comma) {
186 let imag = self.parse_expr()?;
187 self.expect(&TokenKind::RParen)?;
188 let span = span_from_to(start, self.prev_span());
189 return Ok(Spanned::new(
190 Expr::ComplexLiteral {
191 real: Box::new(inner),
192 imag: Box::new(imag),
193 },
194 span,
195 ));
196 }
197 self.expect(&TokenKind::RParen)?;
198 let span = span_from_to(start, self.prev_span());
199 Ok(Spanned::new(
200 Expr::ParenExpr {
201 inner: Box::new(inner),
202 },
203 span,
204 ))
205 }
206
207 // Array constructor [...]
208 TokenKind::LBracket => {
209 self.advance();
210 self.parse_array_constructor_bracket(start)
211 }
212
213 // Literals.
214 TokenKind::IntegerLiteral => self.parse_integer_literal(),
215 TokenKind::RealLiteral => self.parse_real_literal(),
216 TokenKind::StringLiteral => self.parse_string_literal(),
217 TokenKind::LogicalLiteral => self.parse_logical_literal(),
218 TokenKind::BozLiteral => self.parse_boz_literal(),
219
220 // Identifier (name, keyword used as name, potential function call).
221 TokenKind::Identifier => {
222 if self.peek_text().ends_with('_')
223 && self
224 .tokens
225 .get(self.pos + 1)
226 .is_some_and(|tok| tok.kind == TokenKind::StringLiteral)
227 {
228 self.parse_kind_prefixed_string_literal()
229 } else {
230 self.parse_name()
231 }
232 }
233
234 _ => Err(self.error(format!("expected expression, got {}", self.peek()))),
235 }
236 }
237
238 /// Parse postfix operations: function call (parens), component access (%).
239 fn parse_postfix(&mut self, mut expr: SpannedExpr) -> Result<SpannedExpr, ParseError> {
240 loop {
241 match self.peek() {
242 // Function call / array subscript: expr(...)
243 TokenKind::LParen => {
244 self.advance();
245 let args = self.parse_argument_list()?;
246 self.expect(&TokenKind::RParen)?;
247 let span = span_from_to(expr.span, self.prev_span());
248 expr = Spanned::new(
249 Expr::FunctionCall {
250 callee: Box::new(expr),
251 args,
252 },
253 span,
254 );
255 }
256 // Component access: expr%name
257 TokenKind::Percent => {
258 self.advance();
259 let name_tok = self.advance().clone();
260 if name_tok.kind != TokenKind::Identifier {
261 return Err(ParseError {
262 span: name_tok.span,
263 msg: format!("expected component name after %, got {}", name_tok.kind),
264 });
265 }
266 let span = span_from_to(expr.span, name_tok.span);
267 expr = Spanned::new(
268 Expr::ComponentAccess {
269 base: Box::new(expr),
270 component: name_tok.text,
271 },
272 span,
273 );
274 }
275 _ => break,
276 }
277 }
278 Ok(expr)
279 }
280
281 /// Get the binding power of the current token if it's an infix operator.
282 fn infix_bp(&self) -> Option<Bp> {
283 match self.peek() {
284 TokenKind::Power => Some(BP_POW),
285 TokenKind::Star => Some(BP_MUL),
286 TokenKind::Slash => Some(BP_MUL),
287 TokenKind::Plus => Some(BP_ADD),
288 TokenKind::Minus => Some(BP_ADD),
289 TokenKind::Concat => Some(BP_CONCAT),
290 TokenKind::Eq
291 | TokenKind::Ne
292 | TokenKind::Lt
293 | TokenKind::Le
294 | TokenKind::Gt
295 | TokenKind::Ge => Some(BP_COMPARISON),
296 TokenKind::DotOp(ref name) => match name.as_str() {
297 "eq" | "ne" | "lt" | "le" | "gt" | "ge" => Some(BP_COMPARISON),
298 "and" => Some(BP_AND),
299 "or" => Some(BP_OR),
300 "eqv" => Some(BP_EQV),
301 "neqv" => Some(BP_EQV),
302 _ => None,
303 },
304 TokenKind::DefinedOp(_) => Some(BP_DEFINED_BINARY),
305 _ => None,
306 }
307 }
308
309 // ---- Literal parsers ----
310
311 fn parse_integer_literal(&mut self) -> Result<SpannedExpr, ParseError> {
312 let tok = self.advance().clone();
313 let (text, kind) = split_kind_suffix(&tok.text);
314 Ok(Spanned::new(Expr::IntegerLiteral { text, kind }, tok.span))
315 }
316
317 fn parse_real_literal(&mut self) -> Result<SpannedExpr, ParseError> {
318 let tok = self.advance().clone();
319 let (text, kind) = split_kind_suffix(&tok.text);
320 Ok(Spanned::new(Expr::RealLiteral { text, kind }, tok.span))
321 }
322
323 fn parse_string_literal_with_kind(
324 &mut self,
325 kind: Option<String>,
326 prefix_span: Option<Span>,
327 ) -> Result<SpannedExpr, ParseError> {
328 let tok = self.advance().clone();
329 // Strip outer quotes for the value.
330 let value = if tok.text.len() >= 2 {
331 let inner = &tok.text[1..tok.text.len() - 1];
332 match tok.text.as_bytes().first().copied() {
333 Some(b'\'') => inner.replace("''", "'"),
334 Some(b'"') => inner.replace("\"\"", "\""),
335 _ => inner.to_string(),
336 }
337 } else {
338 tok.text.clone()
339 };
340 let span = prefix_span
341 .map(|prefix| span_from_to(prefix, tok.span))
342 .unwrap_or(tok.span);
343 Ok(Spanned::new(Expr::StringLiteral { value, kind }, span))
344 }
345
346 fn parse_string_literal(&mut self) -> Result<SpannedExpr, ParseError> {
347 self.parse_string_literal_with_kind(None, None)
348 }
349
350 fn parse_kind_prefixed_string_literal(&mut self) -> Result<SpannedExpr, ParseError> {
351 let kind_tok = self.advance().clone();
352 let Some(kind) = kind_tok
353 .text
354 .strip_suffix('_')
355 .filter(|text| !text.is_empty())
356 .map(|text| text.to_string())
357 else {
358 return Err(ParseError {
359 span: kind_tok.span,
360 msg: "expected string kind prefix before string literal".into(),
361 });
362 };
363 if self.peek() != &TokenKind::StringLiteral {
364 return Err(self.error(format!(
365 "expected string literal after kind prefix, got {}",
366 self.peek()
367 )));
368 }
369 self.parse_string_literal_with_kind(Some(kind), Some(kind_tok.span))
370 }
371
372 fn parse_logical_literal(&mut self) -> Result<SpannedExpr, ParseError> {
373 let tok = self.advance().clone();
374 let lower = tok.text.to_lowercase();
375 let value = lower.contains("true");
376 let kind = lower
377 .find("._")
378 .map(|pos| lower[pos + 2..].trim_end_matches('.').to_string());
379 Ok(Spanned::new(Expr::LogicalLiteral { value, kind }, tok.span))
380 }
381
382 fn parse_boz_literal(&mut self) -> Result<SpannedExpr, ParseError> {
383 let tok = self.advance().clone();
384 let base = match tok.text.as_bytes()[0] {
385 b'B' | b'b' => BozBase::Binary,
386 b'O' | b'o' => BozBase::Octal,
387 b'Z' | b'z' => BozBase::Hex,
388 _ => BozBase::Hex,
389 };
390 Ok(Spanned::new(
391 Expr::BozLiteral {
392 text: tok.text,
393 base,
394 },
395 tok.span,
396 ))
397 }
398
399 fn parse_name(&mut self) -> Result<SpannedExpr, ParseError> {
400 let tok = self.advance().clone();
401 Ok(Spanned::new(Expr::Name { name: tok.text }, tok.span))
402 }
403
404 // ---- Argument list ----
405
406 fn parse_argument_list(&mut self) -> Result<Vec<Argument>, ParseError> {
407 let mut args = Vec::new();
408 if self.peek() == &TokenKind::RParen {
409 return Ok(args);
410 }
411
412 loop {
413 let arg = self.parse_argument()?;
414 args.push(arg);
415 if !self.eat(&TokenKind::Comma) {
416 break;
417 }
418 }
419 Ok(args)
420 }
421
422 fn parse_argument(&mut self) -> Result<Argument, ParseError> {
423 // Check for keyword argument: name = expr.
424 if self.peek() == &TokenKind::Identifier {
425 let next_pos = self.pos + 1;
426 if next_pos < self.tokens.len() && self.tokens[next_pos].kind == TokenKind::Assign {
427 let name_tok = self.advance().clone();
428 self.advance(); // skip =
429 let value = self.parse_expr()?;
430 return Ok(Argument {
431 keyword: Some(name_tok.text),
432 value: SectionSubscript::Element(value),
433 });
434 }
435 }
436
437 // Leading colon → range with no start: :end or : or ::stride
438 if matches!(self.peek(), TokenKind::Colon | TokenKind::ColonColon) {
439 let sub = self.parse_range(None)?;
440 return Ok(Argument {
441 keyword: None,
442 value: sub,
443 });
444 }
445
446 // Parse an expression.
447 let expr = self.parse_expr()?;
448
449 // If followed by colon, it's a range: start:end[:stride]
450 if matches!(self.peek(), TokenKind::Colon | TokenKind::ColonColon) {
451 // Both `start:end[:stride]` and `start::stride` (empty
452 // end, common in `a(1::7)`) need to enter the range
453 // parser; without this, `::` after `start` was leaking
454 // through and tripping the closing-paren check.
455 let sub = self.parse_range(Some(expr))?;
456 return Ok(Argument {
457 keyword: None,
458 value: sub,
459 });
460 }
461
462 // Plain element.
463 Ok(Argument {
464 keyword: None,
465 value: SectionSubscript::Element(expr),
466 })
467 }
468
469 /// Parse a range subscript: [start]:end[:stride] or [start]: or :
470 /// `start` is already parsed if present.
471 fn parse_range(&mut self, start: Option<SpannedExpr>) -> Result<SectionSubscript, ParseError> {
472 // Handle :: (ColonColon token) as two colons — means start::stride with no end.
473 if self.eat(&TokenKind::ColonColon) {
474 let stride = if !matches!(
475 self.peek(),
476 TokenKind::Comma | TokenKind::RParen | TokenKind::RBracket
477 ) {
478 Some(self.parse_expr()?)
479 } else {
480 None
481 };
482 return Ok(SectionSubscript::Range {
483 start,
484 end: None,
485 stride,
486 });
487 }
488
489 self.expect(&TokenKind::Colon)?; // consume first colon
490
491 // Parse end (optional — absent if next is colon, comma, ), ], or ::).
492 let end = if !matches!(
493 self.peek(),
494 TokenKind::Colon
495 | TokenKind::ColonColon
496 | TokenKind::Comma
497 | TokenKind::RParen
498 | TokenKind::RBracket
499 ) {
500 Some(self.parse_expr()?)
501 } else {
502 None
503 };
504
505 // Parse stride (optional, after second colon).
506 let stride = if self.eat(&TokenKind::Colon) {
507 if !matches!(
508 self.peek(),
509 TokenKind::Comma | TokenKind::RParen | TokenKind::RBracket
510 ) {
511 Some(self.parse_expr()?)
512 } else {
513 None
514 }
515 } else {
516 None
517 };
518
519 Ok(SectionSubscript::Range { start, end, stride })
520 }
521
522 // ---- Array constructors ----
523
524 fn parse_array_constructor_bracket(&mut self, start: Span) -> Result<SpannedExpr, ParseError> {
525 // [type_spec :: ] value, value, ...
526 let type_spec = self.try_parse_ac_type_spec();
527
528 let mut values = Vec::new();
529 if self.peek() != &TokenKind::RBracket {
530 loop {
531 values.push(self.parse_ac_value()?);
532 if !self.eat(&TokenKind::Comma) {
533 break;
534 }
535 }
536 }
537 self.expect(&TokenKind::RBracket)?;
538 let span = span_from_to(start, self.prev_span());
539 Ok(Spanned::new(
540 Expr::ArrayConstructor { type_spec, values },
541 span,
542 ))
543 }
544
545 fn parse_array_constructor_slash(&mut self, start: Span) -> Result<SpannedExpr, ParseError> {
546 // Already consumed ( and /. Parse values until /).
547 // The closing /) is ambiguous with division. We handle this by
548 // checking if / is immediately followed by ) — if so, it's the closer.
549 // Division inside (/ /) (e.g., (/ a/b /) ) is allowed but the / before )
550 // is always the constructor close.
551 // Each value routes through parse_ac_value so implied-do
552 // constructors like `(/ (i, i=1,5) /)` are recognised — the
553 // previous path used parse_expr_bp, which couldn't parse the
554 // parenthesised implied-do form and errored on `=`.
555 let mut values = Vec::new();
556 loop {
557 if matches!(self.peek(), TokenKind::Slash) {
558 break;
559 }
560 values.push(self.parse_ac_value_bracketed(BP_MUL.right)?);
561 if !self.eat(&TokenKind::Comma) {
562 break;
563 }
564 }
565 self.expect(&TokenKind::Slash)?;
566 self.expect(&TokenKind::RParen)?;
567 let span = span_from_to(start, self.prev_span());
568 Ok(Spanned::new(
569 Expr::ArrayConstructor {
570 type_spec: None,
571 values,
572 },
573 span,
574 ))
575 }
576
577 /// Variant of parse_ac_value that honours a minimum binding
578 /// power. Needed inside the `(/ ... /)` form where plain
579 /// `parse_expr` would greedily consume the closing `/` as
580 /// integer division. Implied-do values nested inside a `(...)`
581 /// still parse their inner expressions at full precedence — the
582 /// minimum BP only applies at the top level of each AcValue.
583 fn parse_ac_value_bracketed(&mut self, min_bp: u8) -> Result<AcValue, ParseError> {
584 if self.peek() == &TokenKind::LParen {
585 let save_pos = self.pos;
586 if let Ok(implied) = self.try_parse_implied_do() {
587 return Ok(implied);
588 }
589 self.pos = save_pos;
590 }
591 let expr = self.parse_expr_bp(min_bp)?;
592 Ok(AcValue::Expr(expr))
593 }
594
595 fn try_parse_ac_type_spec(&mut self) -> Option<String> {
596 let save_pos = self.pos;
597 if let Some(ts_result) = self.try_parse_type_spec() {
598 if ts_result.is_ok() && self.peek() == &TokenKind::ColonColon {
599 let spec = render_token_slice(&self.tokens[save_pos..self.pos]);
600 self.advance(); // skip ::
601 return Some(spec);
602 }
603 self.pos = save_pos;
604 }
605 None
606 }
607
608 fn parse_ac_value(&mut self) -> Result<AcValue, ParseError> {
609 // Check for implied-do: (value-list, var=start,end[,step])
610 if self.peek() == &TokenKind::LParen {
611 // Save position — we might need to backtrack if it's not an implied-do.
612 let save_pos = self.pos;
613 if let Ok(implied) = self.try_parse_implied_do() {
614 return Ok(implied);
615 }
616 // Not an implied-do — restore and parse as expression.
617 self.pos = save_pos;
618 }
619
620 let expr = self.parse_expr()?;
621 Ok(AcValue::Expr(expr))
622 }
623
624 fn try_parse_implied_do(&mut self) -> Result<AcValue, ParseError> {
625 self.expect(&TokenKind::LParen)?;
626
627 // Parse value list (one or more expressions separated by commas).
628 let mut values = vec![self.parse_ac_value()?];
629 while self.eat(&TokenKind::Comma) {
630 // Check if this is the var=start part.
631 // Pattern: identifier = expr , expr [, expr]
632 if self.peek() == &TokenKind::Identifier {
633 let next_pos = self.pos + 1;
634 if next_pos < self.tokens.len() && self.tokens[next_pos].kind == TokenKind::Assign {
635 let var_tok = self.advance().clone();
636 self.advance(); // skip =
637 let start = self.parse_expr()?;
638 self.expect(&TokenKind::Comma)?;
639 let end = self.parse_expr()?;
640 let step = if self.eat(&TokenKind::Comma) {
641 Some(self.parse_expr()?)
642 } else {
643 None
644 };
645 self.expect(&TokenKind::RParen)?;
646 return Ok(AcValue::ImpliedDo(Box::new(ImpliedDoLoop {
647 values,
648 var: var_tok.text,
649 start,
650 end,
651 step,
652 })));
653 }
654 }
655 values.push(self.parse_ac_value()?);
656 }
657
658 // If we get here, it wasn't an implied-do — fail so caller backtracks.
659 Err(self.error("expected implied-do variable assignment".into()))
660 }
661
662 // ---- Helpers ----
663 }
664
665 // ---- Token to operator conversion ----
666
667 fn token_to_binary_op(tok: &crate::lexer::Token) -> Result<BinaryOp, ParseError> {
668 match &tok.kind {
669 TokenKind::Plus => Ok(BinaryOp::Add),
670 TokenKind::Minus => Ok(BinaryOp::Sub),
671 TokenKind::Star => Ok(BinaryOp::Mul),
672 TokenKind::Slash => Ok(BinaryOp::Div),
673 TokenKind::Power => Ok(BinaryOp::Pow),
674 TokenKind::Concat => Ok(BinaryOp::Concat),
675 TokenKind::Eq => Ok(BinaryOp::Eq),
676 TokenKind::Ne => Ok(BinaryOp::Ne),
677 TokenKind::Lt => Ok(BinaryOp::Lt),
678 TokenKind::Le => Ok(BinaryOp::Le),
679 TokenKind::Gt => Ok(BinaryOp::Gt),
680 TokenKind::Ge => Ok(BinaryOp::Ge),
681 TokenKind::DotOp(name) => match name.as_str() {
682 "eq" => Ok(BinaryOp::Eq),
683 "ne" => Ok(BinaryOp::Ne),
684 "lt" => Ok(BinaryOp::Lt),
685 "le" => Ok(BinaryOp::Le),
686 "gt" => Ok(BinaryOp::Gt),
687 "ge" => Ok(BinaryOp::Ge),
688 "and" => Ok(BinaryOp::And),
689 "or" => Ok(BinaryOp::Or),
690 "eqv" => Ok(BinaryOp::Eqv),
691 "neqv" => Ok(BinaryOp::Neqv),
692 _ => Err(ParseError {
693 span: tok.span,
694 msg: format!("unknown dot-operator .{}.", name),
695 }),
696 },
697 TokenKind::DefinedOp(name) => Ok(BinaryOp::Defined(name.clone())),
698 _ => Err(ParseError {
699 span: tok.span,
700 msg: format!("expected operator, got {}", tok.kind),
701 }),
702 }
703 }
704
705 fn split_kind_suffix(text: &str) -> (String, Option<String>) {
706 if let Some(pos) = text.find('_') {
707 let num = text[..pos].to_string();
708 let kind = text[pos + 1..].to_string();
709 if kind.is_empty() {
710 (text.to_string(), None)
711 } else {
712 (num, Some(kind))
713 }
714 } else {
715 (text.to_string(), None)
716 }
717 }
718
719 fn render_token_slice(tokens: &[crate::lexer::Token]) -> String {
720 fn is_word_like(kind: &TokenKind) -> bool {
721 matches!(
722 kind,
723 TokenKind::Identifier
724 | TokenKind::IntegerLiteral
725 | TokenKind::RealLiteral
726 | TokenKind::StringLiteral
727 | TokenKind::LogicalLiteral
728 | TokenKind::BozLiteral
729 )
730 }
731
732 let mut out = String::new();
733 let mut prev_kind: Option<&TokenKind> = None;
734 for tok in tokens {
735 if let Some(prev) = prev_kind {
736 if is_word_like(prev) && is_word_like(&tok.kind) {
737 out.push(' ');
738 }
739 }
740 out.push_str(&tok.text);
741 prev_kind = Some(&tok.kind);
742 }
743 out
744 }
745
746 pub(crate) fn span_from_to(start: Span, end: Span) -> Span {
747 Span {
748 file_id: start.file_id,
749 start: start.start,
750 end: end.end,
751 }
752 }
753
754 #[cfg(test)]
755 mod tests {
756 use super::*;
757 use crate::lexer::Lexer;
758
759 fn parse_expression(src: &str) -> SpannedExpr {
760 let tokens = Lexer::tokenize(src, 0).unwrap();
761 let mut parser = Parser::new(&tokens);
762 parser.parse_expr().unwrap()
763 }
764
765 fn sexpr(src: &str) -> String {
766 parse_expression(src).to_sexpr()
767 }
768
769 // ---- Literals ----
770
771 #[test]
772 fn integer() {
773 assert_eq!(sexpr("42"), "42");
774 }
775 #[test]
776 fn integer_kind() {
777 assert_eq!(sexpr("42_8"), "42");
778 }
779 #[test]
780 fn real() {
781 assert_eq!(sexpr("3.14"), "3.14");
782 }
783 #[test]
784 fn real_exp() {
785 assert_eq!(sexpr("1.0e5"), "1.0e5");
786 }
787 #[test]
788 fn real_double() {
789 assert_eq!(sexpr("1.0d0"), "1.0d0");
790 }
791 #[test]
792 fn string_single() {
793 assert_eq!(sexpr("'hello'"), "'hello'");
794 }
795 #[test]
796 fn string_double() {
797 assert_eq!(sexpr("\"hello\""), "'hello'");
798 }
799 #[test]
800 fn string_kind_prefix_named_constant() {
801 let expr = parse_expression("tfc_\"'\"");
802 match expr.node {
803 Expr::StringLiteral { value, kind } => {
804 assert_eq!(value, "'");
805 assert_eq!(kind.as_deref(), Some("tfc"));
806 }
807 other => panic!("expected string literal, got {:?}", other),
808 }
809 }
810 #[test]
811 fn logical_true() {
812 assert_eq!(sexpr(".true."), ".true.");
813 }
814 #[test]
815 fn logical_false() {
816 assert_eq!(sexpr(".false."), ".false.");
817 }
818 #[test]
819 fn boz() {
820 assert_eq!(sexpr("B'1010'"), "B'1010'");
821 }
822 #[test]
823 fn complex_literal() {
824 assert_eq!(sexpr("(1.0, 2.0)"), "(1.0, 2.0)");
825 }
826 #[test]
827 fn complex_literal_exprs() {
828 assert_eq!(sexpr("(a + b, c * d)"), "((a + b), (c * d))");
829 }
830 #[test]
831 fn name() {
832 assert_eq!(sexpr("x"), "x");
833 }
834
835 // ---- Arithmetic precedence ----
836
837 #[test]
838 fn add_mul_precedence() {
839 // a + b * c → (a + (b * c))
840 assert_eq!(sexpr("a + b * c"), "(a + (b * c))");
841 }
842
843 #[test]
844 fn mul_add_precedence() {
845 // a * b + c → ((a * b) + c)
846 assert_eq!(sexpr("a * b + c"), "((a * b) + c)");
847 }
848
849 #[test]
850 fn power_right_associative() {
851 // a ** b ** c → (a ** (b ** c))
852 assert_eq!(sexpr("a ** b ** c"), "(a ** (b ** c))");
853 }
854
855 #[test]
856 fn unary_minus_below_power() {
857 // -a ** b → (- (a ** b))
858 assert_eq!(sexpr("-a ** b"), "(- (a ** b))");
859 }
860
861 #[test]
862 fn unary_minus_simple() {
863 assert_eq!(sexpr("-x"), "(- x)");
864 }
865
866 #[test]
867 fn unary_plus() {
868 assert_eq!(sexpr("+x"), "(+ x)");
869 }
870
871 // ---- Comparison operators ----
872
873 #[test]
874 fn comparison_eq() {
875 assert_eq!(sexpr("a == b"), "(a == b)");
876 }
877
878 #[test]
879 fn comparison_dot_eq() {
880 assert_eq!(sexpr("a .eq. b"), "(a == b)");
881 }
882
883 #[test]
884 fn comparison_ne() {
885 assert_eq!(sexpr("a /= b"), "(a /= b)");
886 }
887
888 #[test]
889 fn comparison_chained_is_error() {
890 // a < b < c is illegal Fortran (non-associative).
891 let tokens = Lexer::tokenize("a < b < c", 0).unwrap();
892 let mut parser = Parser::new(&tokens);
893 let result = parser.parse_expr();
894 assert!(result.is_err(), "chained comparisons should error");
895 }
896
897 // ---- Logical operators ----
898
899 #[test]
900 fn logical_and_or() {
901 // a .and. b .or. c → ((a .and. b) .or. c)
902 assert_eq!(sexpr("a .and. b .or. c"), "((a .and. b) .or. c)");
903 }
904
905 #[test]
906 fn logical_not() {
907 assert_eq!(sexpr(".not. x"), "(.not. x)");
908 }
909
910 #[test]
911 fn logical_eqv() {
912 assert_eq!(sexpr("a .eqv. b"), "(a .eqv. b)");
913 }
914
915 // ---- Concatenation ----
916
917 #[test]
918 fn concat() {
919 assert_eq!(sexpr("a // b"), "(a // b)");
920 }
921
922 #[test]
923 fn concat_left_assoc() {
924 assert_eq!(sexpr("a // b // c"), "((a // b) // c)");
925 }
926
927 // ---- Parentheses ----
928
929 #[test]
930 fn parens_override_precedence() {
931 // ParenExpr wraps inner, so (a + b) * c has an explicit paren node.
932 assert_eq!(sexpr("(a + b) * c"), "(((a + b)) * c)");
933 }
934
935 // ---- Function calls ----
936
937 #[test]
938 fn function_call_no_args() {
939 assert_eq!(sexpr("f()"), "f()");
940 }
941
942 #[test]
943 fn function_call_one_arg() {
944 assert_eq!(sexpr("sin(x)"), "sin(x)");
945 }
946
947 #[test]
948 fn function_call_multiple_args() {
949 assert_eq!(sexpr("max(a, b, c)"), "max(a, b, c)");
950 }
951
952 #[test]
953 fn function_call_keyword_arg() {
954 assert_eq!(sexpr("open(unit=10)"), "open(unit=10)");
955 }
956
957 // ---- Component access ----
958
959 #[test]
960 fn component_access() {
961 assert_eq!(sexpr("x%field"), "x%field");
962 }
963
964 #[test]
965 fn component_access_chain() {
966 assert_eq!(sexpr("x%inner%deep"), "x%inner%deep");
967 }
968
969 #[test]
970 fn component_access_with_call() {
971 assert_eq!(sexpr("obj%method(a)"), "obj%method(a)");
972 }
973
974 // ---- Array constructors ----
975
976 #[test]
977 fn array_constructor_bracket() {
978 assert_eq!(sexpr("[1, 2, 3]"), "[1, 2, 3]");
979 }
980
981 #[test]
982 fn array_constructor_typed() {
983 assert_eq!(sexpr("[integer :: 1, 2]"), "[integer :: 1, 2]");
984 }
985
986 #[test]
987 fn array_constructor_typed_character_len() {
988 assert_eq!(
989 sexpr("[character(len=26) :: '%s', 'left']"),
990 "[character(len=26) :: '%s', 'left']"
991 );
992 }
993
994 #[test]
995 fn implied_do_basic() {
996 assert_eq!(sexpr("[(i, i=1,10)]"), "[(i, i=1, 10)]");
997 }
998
999 #[test]
1000 fn implied_do_with_step() {
1001 assert_eq!(sexpr("[(i, i=1,10,2)]"), "[(i, i=1, 10, 2)]");
1002 }
1003
1004 #[test]
1005 fn implied_do_expression() {
1006 assert_eq!(sexpr("[(i*2, i=1,5)]"), "[((i * 2), i=1, 5)]");
1007 }
1008
1009 // ---- Range subscripts ----
1010
1011 #[test]
1012 fn range_start_end() {
1013 assert_eq!(sexpr("a(1:5)"), "a(1:5)");
1014 }
1015
1016 #[test]
1017 fn range_start_end_stride() {
1018 assert_eq!(sexpr("a(1:10:2)"), "a(1:10:2)");
1019 }
1020
1021 #[test]
1022 fn range_colon_only() {
1023 // a(:) — full range
1024 assert_eq!(sexpr("a(:)"), "a(:)");
1025 }
1026
1027 #[test]
1028 fn range_no_start() {
1029 // a(:5) — range with no start
1030 assert_eq!(sexpr("a(:5)"), "a(:5)");
1031 }
1032
1033 #[test]
1034 fn range_no_end() {
1035 // a(2:) — range with no end
1036 assert_eq!(sexpr("a(2:)"), "a(2:)");
1037 }
1038
1039 #[test]
1040 fn range_stride_only() {
1041 // a(::2) — full range with stride
1042 assert_eq!(sexpr("a(::2)"), "a(::2)");
1043 }
1044
1045 #[test]
1046 fn multi_dim_with_ranges() {
1047 assert_eq!(sexpr("a(1:5, :, 3)"), "a(1:5, :, 3)");
1048 }
1049
1050 // ---- Complex expressions ----
1051
1052 #[test]
1053 fn complex_arithmetic() {
1054 assert_eq!(
1055 sexpr("a + b * c ** d - e / f"),
1056 "((a + (b * (c ** d))) - (e / f))"
1057 );
1058 }
1059
1060 #[test]
1061 fn mixed_comparison_and_logical() {
1062 assert_eq!(sexpr("x > 0 .and. y < 10"), "((x > 0) .and. (y < 10))");
1063 }
1064
1065 #[test]
1066 fn chained_function_calls() {
1067 assert_eq!(sexpr("f(g(x))"), "f(g(x))");
1068 }
1069
1070 #[test]
1071 fn array_element_in_expression() {
1072 assert_eq!(sexpr("a(i) + b(j)"), "(a(i) + b(j))");
1073 }
1074
1075 // ======================================================================
1076 // Audit test gap coverage
1077 // ======================================================================
1078
1079 // ---- (/ /) array constructor form ----
1080 #[test]
1081 fn array_constructor_slash_form() {
1082 // (/ ... /) form — the closing / before ) can conflict with division.
1083 // Our parser handles this by checking if / is followed by ).
1084 assert_eq!(sexpr("(/ 1, 2, 3 /)"), "[1, 2, 3]");
1085 }
1086
1087 // ---- .not. vs .and. precedence ----
1088 #[test]
1089 fn not_binds_tighter_than_and() {
1090 // .not. a .and. b → ((.not. a) .and. b)
1091 assert_eq!(sexpr(".not. a .and. b"), "((.not. a) .and. b)");
1092 }
1093
1094 #[test]
1095 fn not_binds_tighter_than_or() {
1096 assert_eq!(sexpr(".not. a .or. b"), "((.not. a) .or. b)");
1097 }
1098
1099 // ---- .eqv./.neqv. precedence ----
1100 #[test]
1101 fn eqv_lower_than_or() {
1102 // a .or. b .eqv. c → ((a .or. b) .eqv. c)
1103 assert_eq!(sexpr("a .or. b .eqv. c"), "((a .or. b) .eqv. c)");
1104 }
1105
1106 // ---- Defined operators ----
1107 #[test]
1108 fn defined_binary_op() {
1109 assert_eq!(sexpr("a .cross. b"), "(a .cross. b)");
1110 }
1111
1112 #[test]
1113 fn defined_unary_op() {
1114 assert_eq!(sexpr(".inv. x"), "(.inv. x)");
1115 }
1116
1117 #[test]
1118 fn defined_binary_lowest_precedence() {
1119 // defined binary is lowest — a + b .myop. c → ((a + b) .myop. c)
1120 assert_eq!(sexpr("a + b .myop. c"), "((a + b) .myop. c)");
1121 }
1122
1123 // ---- BOZ variants ----
1124 #[test]
1125 fn boz_octal() {
1126 assert_eq!(sexpr("O'777'"), "O'777'");
1127 }
1128 #[test]
1129 fn boz_hex() {
1130 assert_eq!(sexpr("Z'FF'"), "Z'FF'");
1131 }
1132
1133 // ---- Real literal edge cases ----
1134 #[test]
1135 fn real_leading_dot() {
1136 assert_eq!(sexpr(".5"), ".5");
1137 }
1138 #[test]
1139 fn real_trailing_dot() {
1140 assert_eq!(sexpr("5."), "5.");
1141 }
1142
1143 // ---- Mixed postfix chains ----
1144 #[test]
1145 fn postfix_call_then_component() {
1146 assert_eq!(sexpr("a(i)%field"), "a(i)%field");
1147 }
1148
1149 #[test]
1150 fn postfix_deep_chain() {
1151 assert_eq!(sexpr("a%b(i)%c"), "a%b(i)%c");
1152 }
1153
1154 // ---- Error cases ----
1155 #[test]
1156 fn error_unexpected_operator() {
1157 let tokens = Lexer::tokenize("+ *", 0).unwrap();
1158 let mut parser = Parser::new(&tokens);
1159 assert!(parser.parse_expr().is_err());
1160 }
1161
1162 #[test]
1163 fn error_unclosed_paren() {
1164 let tokens = Lexer::tokenize("(a + b", 0).unwrap();
1165 let mut parser = Parser::new(&tokens);
1166 assert!(parser.parse_expr().is_err());
1167 }
1168
1169 #[test]
1170 fn error_trailing_operator() {
1171 let tokens = Lexer::tokenize("a +", 0).unwrap();
1172 let mut parser = Parser::new(&tokens);
1173 assert!(parser.parse_expr().is_err());
1174 }
1175 }
1176