Rust · 53136 bytes Raw Blame History
1 //! Program unit parser.
2 //!
3 //! Parses top-level Fortran compilation units: programs, modules,
4 //! subroutines, functions, and interface blocks.
5
6 use super::expr::span_from_to;
7 use super::{ParseError, Parser};
8 use crate::ast::decl::SpannedDecl;
9 use crate::ast::stmt::SpannedStmt;
10 use crate::ast::unit::*;
11 use crate::ast::Spanned;
12 use crate::lexer::TokenKind;
13
14 impl<'a> Parser<'a> {
15 /// Parse a complete Fortran source file — one or more program units.
16 pub fn parse_file(&mut self) -> Result<Vec<SpannedUnit>, ParseError> {
17 let mut units = Vec::new();
18 loop {
19 self.skip_newlines();
20 if self.peek() == &TokenKind::Eof {
21 break;
22 }
23 units.push(self.parse_program_unit()?);
24 }
25 Ok(units)
26 }
27
28 /// Parse a single program unit.
29 pub fn parse_program_unit(&mut self) -> Result<SpannedUnit, ParseError> {
30 self.skip_newlines();
31 let start = self.current_span();
32
33 // Prefixes and a single optional return-type spec may appear in
34 // any order before `function` / `subroutine` / `procedure`.
35 // Fortran 2008 R1226: prefix-spec ::= type-spec | declaration-prefix
36 // where declaration-prefix is one of pure/impure/elemental/
37 // recursive/non_recursive/module. Stdlib uses every order:
38 // pure module function foo
39 // elemental module logical function bar
40 // logical pure module function baz
41 // pure real(sp) module function qux
42 let mut prefixes: Vec<Prefix> = Vec::new();
43 let mut return_type: Option<crate::ast::decl::TypeSpec> = None;
44 loop {
45 let text = self.peek_text().to_lowercase();
46 match text.as_str() {
47 "pure" => {
48 self.advance();
49 prefixes.push(Prefix::Pure);
50 }
51 "impure" => {
52 self.advance();
53 prefixes.push(Prefix::Impure);
54 }
55 "elemental" => {
56 self.advance();
57 prefixes.push(Prefix::Elemental);
58 }
59 "recursive" => {
60 self.advance();
61 prefixes.push(Prefix::Recursive);
62 }
63 "non_recursive" => {
64 self.advance();
65 prefixes.push(Prefix::NonRecursive);
66 }
67 "module" => {
68 // `module` is a prefix iff the *eventual* keyword
69 // afterward is subroutine/function/procedure. The
70 // intervening tokens may be other prefixes or a
71 // type-spec; check the next token cheaply and treat
72 // it as a prefix when it can lead to those keywords.
73 let next = if self.pos + 1 < self.tokens.len() {
74 self.tokens[self.pos + 1].text.to_lowercase()
75 } else {
76 String::new()
77 };
78 let is_simple_prefix =
79 matches!(next.as_str(), "subroutine" | "function" | "procedure");
80 let is_followed_by_decl_prefix = matches!(
81 next.as_str(),
82 "pure" | "impure" | "elemental" | "recursive" | "non_recursive"
83 );
84 let is_type_then_function = matches!(
85 next.as_str(),
86 "integer"
87 | "real"
88 | "double"
89 | "complex"
90 | "logical"
91 | "character"
92 | "type"
93 | "class"
94 );
95 if is_simple_prefix || is_followed_by_decl_prefix || is_type_then_function {
96 self.advance();
97 prefixes.push(Prefix::Module);
98 } else {
99 break;
100 }
101 }
102 _ => {
103 if return_type.is_none() {
104 if let Some(ts_result) = self.try_parse_type_spec() {
105 return_type = Some(ts_result?);
106 continue;
107 }
108 }
109 break;
110 }
111 }
112 }
113
114 let text = self.peek_text().to_lowercase();
115 match text.as_str() {
116 "program" => self.parse_program(start),
117 "module" => self.parse_module(start),
118 "submodule" => self.parse_submodule(start),
119 "subroutine" => self.parse_subroutine(start, prefixes),
120 "function" => self.parse_function(start, prefixes, return_type),
121 // F2008 §12.6.2.5: separate module procedure body
122 // (module procedure NAME ... end procedure [NAME])
123 // — the procedure's signature is inherited from the
124 // parent module's interface block, so args/return type
125 // are not repeated here. Only valid when the `module`
126 // prefix was consumed above.
127 "procedure" if prefixes.iter().any(|p| matches!(p, Prefix::Module)) => {
128 self.parse_separate_module_procedure(start, prefixes)
129 }
130 "blockdata" | "block" => {
131 if text == "block"
132 && self.pos + 1 < self.tokens.len()
133 && self.tokens[self.pos + 1].text.eq_ignore_ascii_case("data")
134 {
135 self.parse_block_data(start)
136 } else if !prefixes.is_empty() || return_type.is_some() {
137 Err(self.error("expected 'subroutine' or 'function' after prefixes".into()))
138 } else {
139 Err(self.error(format!("expected program unit keyword, got '{}'", text)))
140 }
141 }
142 "interface" | "abstract" => self.parse_interface_block(start),
143 _ => {
144 if return_type.is_some() {
145 // Had a type spec — must be a function.
146 if self.peek_text().eq_ignore_ascii_case("function") {
147 self.parse_function(start, prefixes, return_type)
148 } else {
149 Err(self.error("expected 'function' after type specifier".into()))
150 }
151 } else {
152 // Implicit main program (no PROGRAM keyword).
153 self.parse_implicit_program(start)
154 }
155 }
156 }
157 }
158
159 fn parse_program(&mut self, start: crate::lexer::Span) -> Result<SpannedUnit, ParseError> {
160 self.advance(); // consume 'program'
161 let name = if self.peek() == &TokenKind::Identifier {
162 Some(self.advance().clone().text)
163 } else {
164 None
165 };
166 self.skip_newlines();
167
168 let (uses, imports, implicit, decls, body, ifaces) = self.parse_unit_body(&["program"])?;
169 let mut contains = self.parse_contains_section()?;
170 contains.extend(ifaces); // Interface blocks resolved by sema, ignored by lowering.
171 self.consume_end("program")?;
172
173 let span = span_from_to(start, self.prev_span());
174 Ok(Spanned::new(
175 ProgramUnit::Program {
176 name,
177 uses,
178 imports,
179 implicit,
180 decls,
181 body,
182 contains,
183 },
184 span,
185 ))
186 }
187
188 fn parse_implicit_program(
189 &mut self,
190 start: crate::lexer::Span,
191 ) -> Result<SpannedUnit, ParseError> {
192 // No PROGRAM keyword — implicit main program.
193 let (uses, imports, implicit, decls, body, ifaces) = self.parse_unit_body(&["program"])?;
194
195 // Consume the END [PROGRAM] if present — parse_unit_body breaks
196 // *before* consuming the terminator, so we must advance past it
197 // or parse_file will re-enter parse_program_unit at the same
198 // position forever.
199 self.skip_newlines();
200 if self.peek() != &TokenKind::Eof {
201 let _ = self.consume_end("program");
202 }
203
204 let span = span_from_to(start, self.prev_span());
205 Ok(Spanned::new(
206 ProgramUnit::Program {
207 name: None,
208 uses,
209 imports,
210 implicit,
211 decls,
212 body,
213 contains: ifaces,
214 },
215 span,
216 ))
217 }
218
219 fn parse_module(&mut self, start: crate::lexer::Span) -> Result<SpannedUnit, ParseError> {
220 self.advance(); // consume 'module'
221 let name = self.advance().clone().text;
222 self.skip_newlines();
223
224 let (uses, imports, implicit, decls, _body, ifaces) = self.parse_unit_body(&["module"])?;
225 let mut contains = self.parse_contains_section()?;
226 contains.extend(ifaces);
227 self.consume_end("module")?;
228
229 let span = span_from_to(start, self.prev_span());
230 Ok(Spanned::new(
231 ProgramUnit::Module {
232 name,
233 uses,
234 imports,
235 implicit,
236 decls,
237 contains,
238 },
239 span,
240 ))
241 }
242
243 fn parse_submodule(&mut self, start: crate::lexer::Span) -> Result<SpannedUnit, ParseError> {
244 self.advance(); // consume 'submodule'
245 self.expect(&TokenKind::LParen)?;
246 let parent = self.advance().clone().text;
247 let ancestor = if self.eat(&TokenKind::Colon) {
248 Some(self.advance().clone().text)
249 } else {
250 None
251 };
252 self.expect(&TokenKind::RParen)?;
253 let name = self.advance().clone().text;
254 self.skip_newlines();
255
256 let (uses, _imports, _implicit, decls, _body, ifaces) =
257 self.parse_unit_body(&["submodule"])?;
258 let mut contains = self.parse_contains_section()?;
259 // Carry interface blocks declared at the submodule's
260 // specification section into `contains` so sema sees them
261 // (without this, generic interfaces declared inside the
262 // submodule — e.g. stdlib_quadrature_simps's
263 // `interface simps38_weights` — are silently dropped).
264 contains.extend(ifaces);
265 self.consume_end("submodule")?;
266
267 let span = span_from_to(start, self.prev_span());
268 Ok(Spanned::new(
269 ProgramUnit::Submodule {
270 parent,
271 ancestor,
272 name,
273 uses,
274 decls,
275 contains,
276 },
277 span,
278 ))
279 }
280
281 fn parse_subroutine(
282 &mut self,
283 start: crate::lexer::Span,
284 prefix: Vec<Prefix>,
285 ) -> Result<SpannedUnit, ParseError> {
286 self.advance(); // consume 'subroutine'
287 let name = self.advance().clone().text;
288
289 let args = if self.eat(&TokenKind::LParen) {
290 let a = self.parse_dummy_arg_list()?;
291 self.expect(&TokenKind::RParen)?;
292 a
293 } else {
294 Vec::new()
295 };
296
297 let bind = self.try_parse_bind()?;
298 self.skip_newlines();
299
300 let (uses, imports, implicit, decls, body, ifaces) =
301 self.parse_unit_body(&["subroutine"])?;
302 let mut contains = self.parse_contains_section()?;
303 contains.extend(ifaces);
304 self.consume_end("subroutine")?;
305
306 let span = span_from_to(start, self.prev_span());
307 Ok(Spanned::new(
308 ProgramUnit::Subroutine {
309 name,
310 args,
311 bind,
312 prefix,
313 uses,
314 imports,
315 implicit,
316 decls,
317 body,
318 contains,
319 },
320 span,
321 ))
322 }
323
324 fn parse_function(
325 &mut self,
326 start: crate::lexer::Span,
327 prefix: Vec<Prefix>,
328 return_type: Option<crate::ast::decl::TypeSpec>,
329 ) -> Result<SpannedUnit, ParseError> {
330 self.advance(); // consume 'function'
331 let name = self.advance().clone().text;
332
333 self.expect(&TokenKind::LParen)?;
334 let args = self.parse_dummy_arg_list()?;
335 self.expect(&TokenKind::RParen)?;
336
337 // RESULT and BIND clauses may appear in either order
338 // (F2008 R1229). Scan for both repeatedly so either
339 // `result(r) bind(C)` or `bind(C) result(r)` parses.
340 let mut result: Option<String> = None;
341 let mut bind: Option<BindInfo> = None;
342 loop {
343 if result.is_none() && self.peek_text().eq_ignore_ascii_case("result") {
344 self.advance();
345 self.expect(&TokenKind::LParen)?;
346 let r = self.advance().clone().text;
347 self.expect(&TokenKind::RParen)?;
348 result = Some(r);
349 continue;
350 }
351 if bind.is_none() && self.peek_text().eq_ignore_ascii_case("bind") {
352 bind = self.try_parse_bind()?;
353 continue;
354 }
355 break;
356 }
357 self.skip_newlines();
358
359 let (uses, imports, implicit, decls, body, ifaces) = self.parse_unit_body(&["function"])?;
360 let mut contains = self.parse_contains_section()?;
361 contains.extend(ifaces);
362 self.consume_end("function")?;
363
364 let span = span_from_to(start, self.prev_span());
365 Ok(Spanned::new(
366 ProgramUnit::Function {
367 name,
368 args,
369 result,
370 return_type,
371 bind,
372 prefix,
373 uses,
374 imports,
375 implicit,
376 decls,
377 body,
378 contains,
379 },
380 span,
381 ))
382 }
383
384 /// Parse the F2008 separate module procedure body form:
385 /// `module procedure NAME` [ body ] `end [procedure [NAME]]`
386 /// The signature (args, return type, etc.) is inherited from the
387 /// matching `module subroutine`/`module function` interface in the
388 /// parent module — sema fills it in once both files are processed.
389 /// We always emit a Subroutine here; if the parent's interface was
390 /// actually a function, sema rewrites it (sema/resolve.rs).
391 fn parse_separate_module_procedure(
392 &mut self,
393 start: crate::lexer::Span,
394 prefix: Vec<Prefix>,
395 ) -> Result<SpannedUnit, ParseError> {
396 self.advance(); // consume 'procedure'
397 let name = self.advance().clone().text;
398 self.skip_newlines();
399
400 // Body is parsed normally; declarations may appear (e.g. local
401 // vars). The dummy arguments themselves are *not* redeclared
402 // here per F2008 §12.6.2.5 — sema injects them from the
403 // parent module's interface.
404 let (uses, imports, implicit, decls, body, ifaces) =
405 self.parse_unit_body(&["procedure"])?;
406 let mut contains = self.parse_contains_section()?;
407 contains.extend(ifaces);
408 self.consume_end("procedure")?;
409
410 let span = span_from_to(start, self.prev_span());
411 Ok(Spanned::new(
412 ProgramUnit::Subroutine {
413 name,
414 args: Vec::new(),
415 bind: None,
416 prefix,
417 uses,
418 imports,
419 implicit,
420 decls,
421 body,
422 contains,
423 },
424 span,
425 ))
426 }
427
428 fn parse_block_data(&mut self, start: crate::lexer::Span) -> Result<SpannedUnit, ParseError> {
429 self.advance(); // consume 'block'
430 self.advance(); // consume 'data'
431 let name = if self.peek() == &TokenKind::Identifier {
432 Some(self.advance().clone().text)
433 } else {
434 None
435 };
436 self.skip_newlines();
437
438 let (uses, _imports, _implicit, decls, _body, _ifaces) =
439 self.parse_unit_body(&["blockdata", "block"])?;
440 // End block data.
441 self.skip_newlines();
442 let text = self.peek_text().to_lowercase();
443 if text == "endblockdata" {
444 self.advance();
445 } else if text == "end" {
446 self.advance();
447 self.eat_ident("block");
448 self.eat_ident("data");
449 }
450
451 let span = span_from_to(start, self.prev_span());
452 Ok(Spanned::new(
453 ProgramUnit::BlockData { name, uses, decls },
454 span,
455 ))
456 }
457
458 fn parse_interface_block(
459 &mut self,
460 start: crate::lexer::Span,
461 ) -> Result<SpannedUnit, ParseError> {
462 let is_abstract = if self.peek_text().eq_ignore_ascii_case("abstract") {
463 self.advance();
464 true
465 } else {
466 false
467 };
468 self.advance(); // consume 'interface'
469
470 // Optional name or generic spec.
471 // Check generic specs BEFORE generic identifier — they lex as identifiers.
472 let kw_lc = self.peek_text().to_lowercase();
473 let is_generic_spec =
474 matches!(kw_lc.as_str(), "operator" | "assignment" | "read" | "write")
475 && self.pos + 1 < self.tokens.len()
476 && self.tokens[self.pos + 1].kind == TokenKind::LParen;
477 let name = if is_generic_spec {
478 let op_kw = self.advance().clone().text;
479 self.expect(&TokenKind::LParen)?;
480 // Consume balanced contents — operators can span multiple
481 // tokens (==, /=, //, .lt., etc.) and defined I/O uses
482 // `formatted` / `unformatted` identifiers.
483 let mut op = String::new();
484 let mut depth = 1;
485 while depth > 0 && self.peek() != &TokenKind::Eof {
486 match self.peek() {
487 TokenKind::LParen => {
488 op.push_str(self.advance().clone().text.as_str());
489 depth += 1;
490 }
491 TokenKind::RParen => {
492 if depth == 1 {
493 self.advance();
494 depth = 0;
495 } else {
496 op.push_str(self.advance().clone().text.as_str());
497 depth -= 1;
498 }
499 }
500 _ => {
501 op.push_str(self.advance().clone().text.as_str());
502 }
503 }
504 }
505 Some(format!("{}({})", op_kw, op))
506 } else if self.peek() == &TokenKind::Identifier {
507 Some(self.advance().clone().text)
508 } else {
509 None
510 };
511 self.skip_newlines();
512
513 let mut bodies = Vec::new();
514 loop {
515 self.skip_newlines();
516 let text = self.peek_text().to_lowercase();
517 if text == "endinterface" || text == "end" {
518 break;
519 }
520
521 if text == "module" {
522 let next = if self.pos + 1 < self.tokens.len() {
523 self.tokens[self.pos + 1].text.to_lowercase()
524 } else {
525 String::new()
526 };
527 if next == "procedure" {
528 self.advance(); // module
529 self.advance(); // procedure
530 self.eat(&TokenKind::ColonColon);
531 let mut names = Vec::new();
532 loop {
533 names.push(self.advance().clone().text);
534 if !self.eat(&TokenKind::Comma) {
535 break;
536 }
537 }
538 bodies.push(InterfaceBody::ModuleProcedure(names));
539 self.skip_newlines();
540 continue;
541 }
542 }
543
544 // F2003 R1207: bare `procedure :: NAME [, NAME...]` inside a
545 // generic interface dispatches to the named specifics with
546 // the same semantics as `module procedure NAME` here.
547 // Several stdlib generic interfaces (e.g. `interface arg`,
548 // `interface deg2rad`) use this form.
549 if text == "procedure" {
550 let next_kind = if self.pos + 1 < self.tokens.len() {
551 self.tokens[self.pos + 1].kind.clone()
552 } else {
553 TokenKind::Eof
554 };
555 // Disambiguate from `procedure(iface), attr :: name`
556 // (procedure-pointer / abstract-iface declaration) which
557 // takes a parenthesized interface name; that form is a
558 // subprogram declaration the regular path handles.
559 if next_kind == TokenKind::ColonColon
560 || next_kind == TokenKind::Identifier
561 || next_kind == TokenKind::Comma
562 {
563 self.advance(); // procedure
564 self.eat(&TokenKind::ColonColon);
565 let mut names = Vec::new();
566 loop {
567 names.push(self.advance().clone().text);
568 if !self.eat(&TokenKind::Comma) {
569 break;
570 }
571 }
572 bodies.push(InterfaceBody::ModuleProcedure(names));
573 self.skip_newlines();
574 continue;
575 }
576 }
577
578 // Try parsing as a subprogram.
579 let sub = self.parse_program_unit()?;
580 bodies.push(InterfaceBody::Subprogram(sub));
581 }
582
583 self.consume_end("interface")?;
584 let span = span_from_to(start, self.prev_span());
585 Ok(Spanned::new(
586 ProgramUnit::InterfaceBlock {
587 name,
588 is_abstract,
589 bodies,
590 },
591 span,
592 ))
593 }
594
595 // ---- Helpers ----
596
597 /// Parse the body of a program unit: uses, implicit, declarations, then executable statements.
598 #[allow(clippy::type_complexity)]
599 pub(crate) fn parse_unit_body(
600 &mut self,
601 terminators: &[&str],
602 ) -> Result<
603 (
604 Vec<SpannedDecl>,
605 Vec<ImportStmt>,
606 Vec<SpannedDecl>,
607 Vec<SpannedDecl>,
608 Vec<SpannedStmt>,
609 Vec<SpannedUnit>,
610 ),
611 ParseError,
612 > {
613 let mut uses = Vec::new();
614 let mut imports = Vec::new();
615 let mut implicit = Vec::new();
616 let mut decls = Vec::new();
617 let mut body = Vec::new();
618 let mut interfaces = Vec::new();
619
620 // Phase 1: USE statements.
621 loop {
622 self.skip_newlines();
623 if self.peek_text().eq_ignore_ascii_case("use") {
624 self.advance();
625 uses.push(self.parse_use_stmt()?);
626 } else {
627 break;
628 }
629 }
630
631 // Phase 1.5: IMPORT statements.
632 loop {
633 self.skip_newlines();
634 if self.peek_text().eq_ignore_ascii_case("import") {
635 self.advance();
636 imports.push(self.parse_import()?);
637 } else {
638 break;
639 }
640 }
641
642 // Phase 2: IMPLICIT statements.
643 loop {
644 self.skip_newlines();
645 if self.peek_text().eq_ignore_ascii_case("implicit") {
646 self.advance();
647 implicit.push(self.parse_implicit()?);
648 } else {
649 break;
650 }
651 }
652
653 // Phase 3: Declarations and executable statements.
654 // In practice, declarations and statements can be intermixed in modern Fortran.
655 // We'll parse everything as statements and let sema separate them.
656 loop {
657 self.skip_newlines();
658 if self.peek() == &TokenKind::Eof {
659 break;
660 }
661 let text = self.peek_text().to_lowercase();
662
663 // Check for end of unit.
664 if terminators.iter().any(|t| text == format!("end{}", t)) {
665 break;
666 }
667 if text == "end" {
668 let next = if self.pos + 1 < self.tokens.len() {
669 self.tokens[self.pos + 1].text.to_lowercase()
670 } else {
671 String::new()
672 };
673 if terminators.iter().any(|t| next == *t)
674 || next.is_empty()
675 || self.at_stmt_end_after(1)
676 {
677 break;
678 }
679 }
680 if text == "contains" {
681 break;
682 }
683
684 // Check for derived type definition: type name
685 // or type [, attrs] :: name.
686 if text == "type" {
687 let next_pos = self.pos + 1;
688 // type(name) is a declaration type-specifier, but bare
689 // type name starts a derived-type definition.
690 if self.tokens.get(next_pos).is_some_and(|t| {
691 matches!(
692 t.kind,
693 TokenKind::Identifier | TokenKind::Comma | TokenKind::ColonColon
694 )
695 }) {
696 self.advance(); // consume 'type'
697 decls.push(self.parse_derived_type_def()?);
698 continue;
699 }
700 }
701
702 // Check for interface block (specification construct).
703 // Interface blocks are valid in the specification section of any
704 // program unit. Parse and discard — type information is captured
705 // by semantic analysis, no IR generation needed.
706 if text == "interface" || text == "abstract" {
707 let istart = self.current_span();
708 let iface = self.parse_interface_block(istart)?;
709 interfaces.push(iface);
710 continue;
711 }
712
713 // PROCEDURE(interface_name) [, attrs] :: name [=> null()]
714 // Procedure pointer / procedure component declarations.
715 if text == "procedure" {
716 let next_pos = self.pos + 1;
717 if next_pos < self.tokens.len() && self.tokens[next_pos].kind == TokenKind::LParen {
718 let start = self.current_span();
719 self.advance(); // consume 'procedure'
720 self.advance(); // consume '('
721 let iface_name = if self.peek() == &TokenKind::Identifier {
722 self.advance().clone().text
723 } else {
724 String::new()
725 };
726 self.expect(&TokenKind::RParen)?;
727
728 // Parse attributes: regular declaration attrs like
729 // OPTIONAL/VALUE plus procedure-specific ones like POINTER.
730 let mut attrs = Vec::new();
731 while self.eat(&TokenKind::Comma) {
732 let attr_text = self.peek_text().to_lowercase();
733 if matches!(
734 attr_text.as_str(),
735 "nopass" | "pass" | "deferred" | "non_overridable"
736 ) {
737 self.advance();
738 continue;
739 }
740 if let Some(attr) = self.try_parse_attribute() {
741 attrs.push(attr?);
742 } else {
743 self.advance();
744 }
745 }
746
747 // :: separator
748 if self.peek() == &TokenKind::ColonColon {
749 self.advance();
750 }
751
752 // Comma-separated entity list. Each entity may
753 // carry its own optional `=> null()` initializer.
754 // Previously the parser stopped after the first
755 // name, dropping `g` in `procedure(...) :: f, g`
756 // and tripping the next-token check on the comma.
757 let mut entities = Vec::new();
758 loop {
759 let entity_name = if self.peek() == &TokenKind::Identifier {
760 self.advance().clone().text
761 } else {
762 String::new()
763 };
764
765 if self.eat(&TokenKind::Arrow)
766 && self.peek_text().eq_ignore_ascii_case("null")
767 {
768 self.advance();
769 if self.peek() == &TokenKind::LParen {
770 self.advance();
771 let _ = self.expect(&TokenKind::RParen);
772 }
773 }
774
775 entities.push(crate::ast::decl::EntityDecl {
776 name: entity_name,
777 array_spec: None,
778 init: None,
779 char_len: None,
780 ptr_init: None,
781 });
782
783 if !self.eat(&TokenKind::Comma) {
784 break;
785 }
786 }
787
788 // Emit as a variable declaration with Pointer attribute.
789 // The interface name is stored but the full procedure
790 // pointer call semantics are deferred.
791 let span = span_from_to(start, self.prev_span());
792 let mut all_attrs = attrs;
793 all_attrs.push(crate::ast::decl::Attribute::External);
794 decls.push(crate::ast::Spanned::new(
795 crate::ast::decl::Decl::TypeDecl {
796 type_spec: crate::ast::decl::TypeSpec::Type(iface_name),
797 attrs: all_attrs,
798 entities,
799 },
800 span,
801 ));
802 continue;
803 }
804 }
805
806 // Try as type declaration.
807 if let Some(ts_result) = self.try_parse_type_spec() {
808 let ts = ts_result?;
809 decls.push(self.parse_type_decl(ts)?);
810 continue;
811 }
812
813 // Standalone declaration statements that introduce no
814 // new type. Audit MAJOR-2: prior to this dispatch the
815 // PARAMETER/COMMON/DATA parsers existed but were never
816 // called, so `parameter (x = 42)` at statement-start was
817 // silently dropped and the program ran with x=0.
818 //
819 // Audit Maj-5: Fortran has no reserved words, so a
820 // legacy F77 program may use `parameter`, `common`, or
821 // `data` as a variable name. Disambiguate by peeking
822 // at the next token: the declaration form is always
823 // followed by `(` (for PARAMETER and DATA) or `/` (for
824 // COMMON); an expression-statement use as an LHS is
825 // followed by `=`. This is a single-token lookahead.
826 let next_tok = self.tokens.get(self.pos + 1).map(|t| t.kind.clone());
827 if text == "parameter" && next_tok.as_ref() == Some(&TokenKind::LParen) {
828 self.advance(); // consume 'parameter'
829 decls.push(self.parse_parameter_stmt()?);
830 continue;
831 }
832 if text == "common" && matches!(next_tok.as_ref(), Some(TokenKind::Slash)) {
833 self.advance(); // consume 'common'
834 decls.push(self.parse_common_block()?);
835 continue;
836 }
837 if text == "data" && next_tok.as_ref() == Some(&TokenKind::Identifier) {
838 self.advance(); // consume 'data'
839 decls.push(self.parse_data_stmt()?);
840 continue;
841 }
842 if text == "equivalence" && next_tok.as_ref() == Some(&TokenKind::LParen) {
843 self.advance(); // consume 'equivalence'
844 decls.push(self.parse_equivalence_stmt()?);
845 continue;
846 }
847 if text == "enum" && next_tok.as_ref() == Some(&TokenKind::Comma) {
848 decls.push(self.parse_enum_def()?);
849 continue;
850 }
851
852 // INTRINSIC / EXTERNAL :: name-list — informational
853 // declarations that mark functions as intrinsic or external.
854 // We consume and discard them; sema already knows which names
855 // are intrinsic.
856 if (text == "intrinsic" || text == "external")
857 && (next_tok.as_ref() == Some(&TokenKind::ColonColon)
858 || next_tok.as_ref() == Some(&TokenKind::Identifier))
859 {
860 self.advance(); // consume keyword
861 let _ = self.eat(&TokenKind::ColonColon);
862 // Eat the name list.
863 loop {
864 if self.peek() == &TokenKind::Identifier {
865 self.advance();
866 } else {
867 break;
868 }
869 if !self.eat(&TokenKind::Comma) {
870 break;
871 }
872 }
873 self.skip_newlines();
874 continue;
875 }
876
877 // SAVE statement (F2018 §8.6.14):
878 // bare `save` — saves all locals in this scope
879 // `save :: a, b` — saves listed entities
880 // `save a, b` — same, no `::`
881 // `save /cb/, x` — common-block and entity mix
882 // Disambiguate from a variable named `save` by requiring
883 // the next token to start a SAVE list (`::`, identifier,
884 // `/`) or end the statement.
885 if text == "save" {
886 let next_kind = self.tokens.get(self.pos + 1).map(|t| t.kind.clone());
887 let is_save_stmt = self.at_stmt_end_after(1)
888 || matches!(
889 next_kind,
890 Some(TokenKind::ColonColon)
891 | Some(TokenKind::Identifier)
892 | Some(TokenKind::Slash)
893 );
894 if is_save_stmt {
895 let start = self.current_span();
896 self.advance(); // consume 'save'
897 let _ = self.eat(&TokenKind::ColonColon);
898 let mut entities = Vec::new();
899 while !self.at_stmt_end() {
900 if self.peek() == &TokenKind::Slash {
901 // /common-block-name/ — consume bracketing slashes.
902 self.advance();
903 if self.peek() == &TokenKind::Identifier {
904 entities.push(self.advance().clone().text);
905 }
906 let _ = self.eat(&TokenKind::Slash);
907 } else if self.peek() == &TokenKind::Identifier {
908 entities.push(self.advance().clone().text);
909 } else {
910 break;
911 }
912 if !self.eat(&TokenKind::Comma) {
913 break;
914 }
915 }
916 self.skip_newlines();
917 let span = span_from_to(start, self.prev_span());
918 decls.push(crate::ast::Spanned::new(
919 crate::ast::decl::Decl::AttributeStmt {
920 attr: crate::ast::decl::Attribute::Save,
921 entities,
922 },
923 span,
924 ));
925 continue;
926 }
927 }
928
929 // PRIVATE / PUBLIC access statements.
930 if text == "private" || text == "public" {
931 let start = self.current_span();
932 let attr = if text == "private" {
933 crate::ast::decl::Attribute::Private
934 } else {
935 crate::ast::decl::Attribute::Public
936 };
937 if self.at_stmt_end_after(1) {
938 // Standalone: sets default access for the module.
939 self.advance();
940 let span = span_from_to(start, self.prev_span());
941 decls.push(crate::ast::Spanned::new(
942 crate::ast::decl::Decl::AccessDefault { access: attr },
943 span,
944 ));
945 continue;
946 }
947 // PUBLIC :: name-list or PRIVATE :: name-list
948 let next_pos = self.pos + 1;
949 let has_colons = next_pos < self.tokens.len()
950 && self.tokens[next_pos].kind == TokenKind::ColonColon;
951 let ident_pos = if has_colons { next_pos + 1 } else { next_pos };
952 if ident_pos < self.tokens.len()
953 && self.tokens[ident_pos].kind == TokenKind::Identifier
954 {
955 self.advance(); // consume PUBLIC/PRIVATE
956 if has_colons {
957 self.advance();
958 } // consume ::
959 let mut names = Vec::new();
960 while let Some(name) = self.parse_access_list_item()? {
961 names.push(name);
962 if !self.eat(&TokenKind::Comma) {
963 break;
964 }
965 }
966 if !names.is_empty() {
967 let span = span_from_to(start, self.prev_span());
968 decls.push(crate::ast::Spanned::new(
969 crate::ast::decl::Decl::AccessList {
970 access: attr,
971 names,
972 },
973 span,
974 ));
975 continue;
976 }
977 }
978 }
979
980 // Try as executable statement.
981 body.push(self.parse_stmt()?);
982 }
983
984 Ok((uses, imports, implicit, decls, body, interfaces))
985 }
986
987 fn parse_access_list_item(&mut self) -> Result<Option<String>, ParseError> {
988 if self.peek() != &TokenKind::Identifier {
989 return Ok(None);
990 }
991
992 let kw = self.peek_text().to_lowercase();
993 let is_generic_spec = matches!(kw.as_str(), "operator" | "assignment" | "read" | "write")
994 && self.pos + 1 < self.tokens.len()
995 && self.tokens[self.pos + 1].kind == TokenKind::LParen;
996
997 if !is_generic_spec {
998 return Ok(Some(self.advance().clone().text));
999 }
1000
1001 let generic_kw = self.advance().clone().text;
1002 self.expect(&TokenKind::LParen)?;
1003 // Consume the parenthesized contents until the matching ).
1004 // Operators can be `==`, `/=`, `//`, etc. — multi-token. Defined
1005 // I/O uses `formatted` / `unformatted` identifiers.
1006 let mut op = String::new();
1007 let mut depth = 1;
1008 while depth > 0 && self.peek() != &TokenKind::Eof {
1009 match self.peek() {
1010 TokenKind::LParen => {
1011 op.push_str(self.advance().clone().text.as_str());
1012 depth += 1;
1013 }
1014 TokenKind::RParen => {
1015 if depth == 1 {
1016 self.advance();
1017 depth = 0;
1018 } else {
1019 op.push_str(self.advance().clone().text.as_str());
1020 depth -= 1;
1021 }
1022 }
1023 _ => {
1024 op.push_str(self.advance().clone().text.as_str());
1025 }
1026 }
1027 }
1028 Ok(Some(format!("{}({})", generic_kw, op)))
1029 }
1030
1031 fn parse_contains_section(&mut self) -> Result<Vec<SpannedUnit>, ParseError> {
1032 self.skip_newlines();
1033 if !self.peek_text().eq_ignore_ascii_case("contains") {
1034 return Ok(Vec::new());
1035 }
1036 self.advance(); // consume 'contains'
1037 self.skip_newlines();
1038
1039 let mut units = Vec::new();
1040 loop {
1041 self.skip_newlines();
1042 if self.peek() == &TokenKind::Eof {
1043 break;
1044 }
1045 let text = self.peek_text().to_lowercase();
1046 // Only break on END that closes the parent unit — not on inner subprograms'
1047 // END keywords (those are consumed by parse_program_unit).
1048 // Combined forms like "endprogram", "endmodule" etc. close the parent.
1049 if text == "end" {
1050 let next = if self.pos + 1 < self.tokens.len() {
1051 self.tokens[self.pos + 1].text.to_lowercase()
1052 } else {
1053 String::new()
1054 };
1055 // Bare "end" or "end program/module/submodule" closes the parent.
1056 if next.is_empty()
1057 || self.at_stmt_end_after(1)
1058 || matches!(
1059 next.as_str(),
1060 "program" | "module" | "submodule" | "subroutine" | "function"
1061 )
1062 {
1063 break;
1064 }
1065 }
1066 if matches!(
1067 text.as_str(),
1068 "endprogram" | "endmodule" | "endsubmodule" | "endsubroutine" | "endfunction"
1069 ) {
1070 break;
1071 }
1072 units.push(self.parse_program_unit()?);
1073 }
1074 Ok(units)
1075 }
1076
1077 fn parse_dummy_arg_list(&mut self) -> Result<Vec<DummyArg>, ParseError> {
1078 let mut args = Vec::new();
1079 if self.peek() == &TokenKind::RParen {
1080 return Ok(args);
1081 }
1082 loop {
1083 if self.eat(&TokenKind::Star) {
1084 args.push(DummyArg::Star);
1085 } else {
1086 args.push(DummyArg::Name(self.advance().clone().text));
1087 }
1088 if !self.eat(&TokenKind::Comma) {
1089 break;
1090 }
1091 }
1092 Ok(args)
1093 }
1094
1095 /// Parse an IMPORT statement.
1096 pub fn parse_import(&mut self) -> Result<ImportStmt, ParseError> {
1097 // Already consumed 'import'.
1098 if self.eat(&TokenKind::Comma) {
1099 let text = self.peek_text().to_lowercase();
1100 match text.as_str() {
1101 "all" => {
1102 self.advance();
1103 return Ok(ImportStmt::All);
1104 }
1105 "none" => {
1106 self.advance();
1107 return Ok(ImportStmt::None);
1108 }
1109 "only" => {
1110 self.advance();
1111 self.expect(&TokenKind::Colon)?;
1112 let mut names = Vec::new();
1113 loop {
1114 names.push(self.advance().clone().text);
1115 if !self.eat(&TokenKind::Comma) {
1116 break;
1117 }
1118 }
1119 return Ok(ImportStmt::Only(names));
1120 }
1121 _ => {}
1122 }
1123 }
1124 // import :: name1, name2
1125 self.eat(&TokenKind::ColonColon);
1126 let mut names = Vec::new();
1127 if !self.at_stmt_end() {
1128 loop {
1129 names.push(self.advance().clone().text);
1130 if !self.eat(&TokenKind::Comma) {
1131 break;
1132 }
1133 }
1134 }
1135 Ok(ImportStmt::Default(names))
1136 }
1137
1138 /// Parse optional BIND(C [, NAME="..."]) clause.
1139 /// Returns `None` if no BIND, `Some(BindInfo)` if present.
1140 fn try_parse_bind(&mut self) -> Result<Option<BindInfo>, ParseError> {
1141 if !self.peek_text().eq_ignore_ascii_case("bind") {
1142 return Ok(None);
1143 }
1144 self.advance(); // bind
1145 self.expect(&TokenKind::LParen)?;
1146 self.advance(); // c
1147 let name = if self.eat(&TokenKind::Comma) {
1148 if self.peek_text().eq_ignore_ascii_case("name") {
1149 self.advance();
1150 self.expect(&TokenKind::Assign)?;
1151 Some(self.advance().clone().text)
1152 } else {
1153 None
1154 }
1155 } else {
1156 None
1157 };
1158 self.expect(&TokenKind::RParen)?;
1159 Ok(Some(BindInfo { name }))
1160 }
1161 }
1162
1163 #[cfg(test)]
1164 mod tests {
1165 use super::*;
1166 use crate::lexer::Lexer;
1167
1168 fn parse_units(src: &str) -> Vec<SpannedUnit> {
1169 let tokens = Lexer::tokenize(src, 0).unwrap();
1170 let mut parser = Parser::new(&tokens);
1171 parser.parse_file().unwrap()
1172 }
1173
1174 fn parse_unit(src: &str) -> SpannedUnit {
1175 let units = parse_units(src);
1176 assert_eq!(units.len(), 1, "expected 1 unit, got {}", units.len());
1177 units.into_iter().next().unwrap()
1178 }
1179
1180 // ---- PROGRAM ----
1181
1182 #[test]
1183 fn simple_program() {
1184 let u = parse_unit(
1185 "program hello\n implicit none\n integer :: x\n x = 42\nend program hello\n",
1186 );
1187 if let ProgramUnit::Program {
1188 name, decls, body, ..
1189 } = &u.node
1190 {
1191 assert_eq!(name.as_deref(), Some("hello"));
1192 assert!(!decls.is_empty());
1193 assert!(!body.is_empty());
1194 } else {
1195 panic!("not Program");
1196 }
1197 }
1198
1199 #[test]
1200 fn program_with_contains() {
1201 let u = parse_unit(
1202 "program main\n x = 1\ncontains\n subroutine sub()\n end subroutine\nend program\n",
1203 );
1204 if let ProgramUnit::Program { contains, .. } = &u.node {
1205 assert_eq!(contains.len(), 1);
1206 } else {
1207 panic!("not Program");
1208 }
1209 }
1210
1211 #[test]
1212 fn program_with_bare_end() {
1213 let u = parse_unit("program main\n integer :: x\n x = 1\nend\n");
1214 if let ProgramUnit::Program { name, body, .. } = &u.node {
1215 assert_eq!(name.as_deref(), Some("main"));
1216 assert_eq!(body.len(), 1);
1217 } else {
1218 panic!("not Program");
1219 }
1220 }
1221
1222 // ---- SUBROUTINE ----
1223
1224 #[test]
1225 fn simple_subroutine() {
1226 let u = parse_unit("subroutine foo(x, y)\n real :: x, y\nend subroutine\n");
1227 if let ProgramUnit::Subroutine { name, args, .. } = &u.node {
1228 assert_eq!(name, "foo");
1229 assert_eq!(args.len(), 2);
1230 } else {
1231 panic!("not Subroutine");
1232 }
1233 }
1234
1235 #[test]
1236 fn pure_elemental_subroutine() {
1237 let u = parse_unit(
1238 "pure elemental subroutine bar(x)\n real, intent(in) :: x\nend subroutine\n",
1239 );
1240 if let ProgramUnit::Subroutine { prefix, .. } = &u.node {
1241 assert!(prefix.contains(&Prefix::Pure));
1242 assert!(prefix.contains(&Prefix::Elemental));
1243 } else {
1244 panic!("not Subroutine");
1245 }
1246 }
1247
1248 // ---- FUNCTION ----
1249
1250 #[test]
1251 fn simple_function() {
1252 let u =
1253 parse_unit("function square(x) result(y)\n real :: x, y\n y = x * x\nend function\n");
1254 if let ProgramUnit::Function { name, result, .. } = &u.node {
1255 assert_eq!(name, "square");
1256 assert_eq!(result.as_deref(), Some("y"));
1257 } else {
1258 panic!("not Function");
1259 }
1260 }
1261
1262 #[test]
1263 fn typed_function() {
1264 let u =
1265 parse_unit("real function add(a, b)\n real :: a, b\n add = a + b\nend function\n");
1266 if let ProgramUnit::Function { return_type, .. } = &u.node {
1267 assert!(return_type.is_some());
1268 } else {
1269 panic!("not Function");
1270 }
1271 }
1272
1273 #[test]
1274 fn recursive_function() {
1275 let u = parse_unit("recursive function fact(n) result(f)\n integer :: n, f\n if (n <= 1) then\n f = 1\n else\n f = n * fact(n - 1)\n end if\nend function\n");
1276 if let ProgramUnit::Function { prefix, .. } = &u.node {
1277 assert!(prefix.contains(&Prefix::Recursive));
1278 } else {
1279 panic!("not Function");
1280 }
1281 }
1282
1283 // ---- MODULE ----
1284
1285 #[test]
1286 fn simple_module() {
1287 let u = parse_unit("module my_mod\n implicit none\n integer :: x\ncontains\n subroutine sub()\n end subroutine\nend module\n");
1288 if let ProgramUnit::Module { name, contains, .. } = &u.node {
1289 assert_eq!(name, "my_mod");
1290 assert_eq!(contains.len(), 1);
1291 } else {
1292 panic!("not Module");
1293 }
1294 }
1295
1296 #[test]
1297 fn module_with_use() {
1298 let u = parse_unit("module b\n use a\n implicit none\nend module\n");
1299 if let ProgramUnit::Module { uses, .. } = &u.node {
1300 assert_eq!(uses.len(), 1);
1301 } else {
1302 panic!("not Module");
1303 }
1304 }
1305
1306 // ---- INTERFACE ----
1307
1308 #[test]
1309 fn interface_explicit() {
1310 let u = parse_unit(
1311 "interface\n subroutine ext(x)\n real :: x\n end subroutine\nend interface\n",
1312 );
1313 if let ProgramUnit::InterfaceBlock { bodies, .. } = &u.node {
1314 assert_eq!(bodies.len(), 1);
1315 } else {
1316 panic!("not InterfaceBlock");
1317 }
1318 }
1319
1320 #[test]
1321 fn interface_generic() {
1322 let u = parse_unit("interface sort\n module procedure sort_int\n module procedure sort_real\nend interface\n");
1323 if let ProgramUnit::InterfaceBlock { name, bodies, .. } = &u.node {
1324 assert_eq!(name.as_deref(), Some("sort"));
1325 assert_eq!(bodies.len(), 2);
1326 } else {
1327 panic!("not InterfaceBlock");
1328 }
1329 }
1330
1331 #[test]
1332 fn interface_operator_end_spec() {
1333 let u = parse_unit(
1334 "interface operator(+)\n module procedure add_int\nend interface operator(+)\n",
1335 );
1336 if let ProgramUnit::InterfaceBlock { name, bodies, .. } = &u.node {
1337 assert_eq!(name.as_deref(), Some("operator(+)"));
1338 assert_eq!(bodies.len(), 1);
1339 } else {
1340 panic!("not InterfaceBlock");
1341 }
1342 }
1343
1344 #[test]
1345 fn module_access_list_accepts_generic_specs() {
1346 let u = parse_unit(
1347 "module m\n implicit none\n private\n public :: assignment(=), operator(+), box_t\n type :: box_t\n integer :: value\n end type\nend module\n",
1348 );
1349 if let ProgramUnit::Module { decls, .. } = &u.node {
1350 let access = decls
1351 .iter()
1352 .find_map(|decl| match &decl.node {
1353 crate::ast::decl::Decl::AccessList { names, .. } => Some(names.clone()),
1354 _ => None,
1355 })
1356 .expect("expected access list");
1357 assert_eq!(
1358 access,
1359 vec![
1360 "assignment(=)".to_string(),
1361 "operator(+)".to_string(),
1362 "box_t".to_string()
1363 ]
1364 );
1365 } else {
1366 panic!("not Module");
1367 }
1368 }
1369
1370 #[test]
1371 fn module_accepts_derived_type_def_without_colon_colon() {
1372 let u = parse_unit(
1373 "module m\n implicit none\n type node_ptr\n integer :: value\n end type node_ptr\nend module\n",
1374 );
1375 if let ProgramUnit::Module { decls, .. } = &u.node {
1376 assert!(decls
1377 .iter()
1378 .any(|decl| matches!(decl.node, crate::ast::decl::Decl::DerivedTypeDef { .. })));
1379 } else {
1380 panic!("not Module");
1381 }
1382 }
1383
1384 // ---- MULTI-UNIT FILES ----
1385
1386 #[test]
1387 fn multi_unit_file() {
1388 let units = parse_units("module m1\nend module\n\nmodule m2\n use m1\nend module\n\nprogram main\n use m2\nend program\n");
1389 assert_eq!(units.len(), 3);
1390 assert!(matches!(units[0].node, ProgramUnit::Module { .. }));
1391 assert!(matches!(units[1].node, ProgramUnit::Module { .. }));
1392 assert!(matches!(units[2].node, ProgramUnit::Program { .. }));
1393 }
1394
1395 // ---- BIND(C) ----
1396
1397 #[test]
1398 fn subroutine_bind_c() {
1399 let u = parse_unit("subroutine cfunc(x) bind(c)\n real :: x\nend subroutine\n");
1400 if let ProgramUnit::Subroutine { bind, .. } = &u.node {
1401 assert!(bind.is_some(), "should have BindInfo");
1402 assert!(bind.as_ref().unwrap().name.is_none(), "no name= specified");
1403 } else {
1404 panic!("not Subroutine");
1405 }
1406 }
1407
1408 #[test]
1409 fn subroutine_bind_c_with_name() {
1410 let u =
1411 parse_unit("subroutine foo(x) bind(c, name='c_foo')\n real :: x\nend subroutine\n");
1412 if let ProgramUnit::Subroutine { bind, .. } = &u.node {
1413 assert!(bind.is_some());
1414 assert_eq!(bind.as_ref().unwrap().name.as_deref(), Some("'c_foo'"));
1415 } else {
1416 panic!("not Subroutine");
1417 }
1418 }
1419 }
1420