armfortas Public

Watch 0 Fork 0 Star 0
Rust · 52366 bytes Raw Blame History
  
        1
        //! Fixed-form (F77) Fortran lexer.
      
        2
        //!
      
        3
        //! Two-pass approach:
      
        4
        //! 1. Preprocess lines: identify comments, extract labels, join continuations,
      
        5
        //!    strip columns 73+, handle tab-form extension.
      
        6
        //! 2. Tokenize each logical statement body, handling whitespace insensitivity
      
        7
        //!    and Hollerith constants.
      
        8
        //!
      
        9
        //! Produces the same Token types as the free-form lexer.
      
        10
        
        11
        use super::{is_keyword, is_known_dot_op, LexError, Position, Span, Token, TokenKind};
      
        12
        
        13
        /// Tokenize fixed-form Fortran source.
      
        14
        pub fn tokenize_fixed(src: &str, file_id: u32) -> Result<Vec<Token>, LexError> {
      
        15
            let statements = preprocess_lines(src, file_id);
      
        16
            let mut tokens = Vec::new();
      
        17
        
        18
            for stmt in &statements {
      
        19
                match stmt {
      
        20
                    FixedLine::Comment { text, span } => {
      
        21
                        tokens.push(Token {
      
        22
                            kind: TokenKind::Comment,
      
        23
                            text: text.clone(),
      
        24
                            span: *span,
      
        25
                        });
      
        26
                        tokens.push(Token {
      
        27
                            kind: TokenKind::Newline,
      
        28
                            text: "\n".into(),
      
        29
                            span: *span,
      
        30
                        });
      
        31
                    }
      
        32
                    FixedLine::Statement {
      
        33
                        label,
      
        34
                        body,
      
        35
                        start_line,
      
        36
                        file_id: fid,
      
        37
                    } => {
      
        38
                        // Emit label as integer literal if present.
      
        39
                        if let Some(label_text) = label {
      
        40
                            let label_trimmed = label_text.trim();
      
        41
                            if !label_trimmed.is_empty() {
      
        42
                                tokens.push(Token {
      
        43
                                    kind: TokenKind::IntegerLiteral,
      
        44
                                    text: label_trimmed.to_string(),
      
        45
                                    span: Span {
      
        46
                                        file_id: *fid,
      
        47
                                        start: Position {
      
        48
                                            line: *start_line,
      
        49
                                            col: 1,
      
        50
                                        },
      
        51
                                        end: Position {
      
        52
                                            line: *start_line,
      
        53
                                            col: 6,
      
        54
                                        },
      
        55
                                    },
      
        56
                                });
      
        57
                            }
      
        58
                        }
      
        59
        
        60
                        // Tokenize the body with the whitespace-insensitive scanner.
      
        61
                        let body_tokens = tokenize_body(body, *fid, *start_line)?;
      
        62
                        tokens.extend(body_tokens);
      
        63
        
        64
                        tokens.push(Token {
      
        65
                            kind: TokenKind::Newline,
      
        66
                            text: "\n".into(),
      
        67
                            span: Span {
      
        68
                                file_id: *fid,
      
        69
                                start: Position {
      
        70
                                    line: *start_line,
      
        71
                                    col: 1,
      
        72
                                },
      
        73
                                end: Position {
      
        74
                                    line: *start_line,
      
        75
                                    col: 1,
      
        76
                                },
      
        77
                            },
      
        78
                        });
      
        79
                    }
      
        80
                    FixedLine::Blank { span } => {
      
        81
                        tokens.push(Token {
      
        82
                            kind: TokenKind::Newline,
      
        83
                            text: "\n".into(),
      
        84
                            span: *span,
      
        85
                        });
      
        86
                    }
      
        87
                }
      
        88
            }
      
        89
        
        90
            tokens.push(Token {
      
        91
                kind: TokenKind::Eof,
      
        92
                text: String::new(),
      
        93
                span: Span {
      
        94
                    file_id,
      
        95
                    start: Position {
      
        96
                        line: src.lines().count() as u32 + 1,
      
        97
                        col: 1,
      
        98
                    },
      
        99
                    end: Position {
      
        100
                        line: src.lines().count() as u32 + 1,
      
        101
                        col: 1,
      
        102
                    },
      
        103
                },
      
        104
            });
      
        105
        
        106
            Ok(tokens)
      
        107
        }
      
        108
        
        109
        fn unexpected_char_message(text: &str, pos: usize, context: &str) -> String {
      
        110
            let ch = text.as_bytes().get(pos).copied().unwrap_or(b'?') as char;
      
        111
            format!("{context}: '{ch}'")
      
        112
        }
      
        113
        
        114
        // ---- Whitespace-insensitive body tokenizer ----
      
        115
        
        116
        /// Tokenize a fixed-form statement body with whitespace insensitivity.
      
        117
        ///
      
        118
        /// Three-phase approach:
      
        119
        /// 1. Protect Hollerith constants (nH...) by converting to string literals before stripping
      
        120
        /// 2. Strip all whitespace outside string literals
      
        121
        /// 3. Tokenize with keyword-splitting: longest keyword prefix match at letter runs
      
        122
        fn tokenize_body(body: &str, file_id: u32, line: u32) -> Result<Vec<Token>, LexError> {
      
        123
            // Phase 1: Convert Hollerith constants to string literals (preserves their spaces).
      
        124
            let hollerith_protected = protect_hollerith(body);
      
        125
            // Phase 2: Strip whitespace outside string literals.
      
        126
            let stripped = strip_whitespace_outside_strings(&hollerith_protected);
      
        127
            let bytes = stripped.as_bytes();
      
        128
            let mut tokens = Vec::new();
      
        129
            let mut pos = 0;
      
        130
        
        131
            while pos < bytes.len() {
      
        132
                let col = (pos as u32) + 7;
      
        133
                let start = Position { line, col };
      
        134
                let ch = bytes[pos];
      
        135
        
        136
                // Comment (! to end).
      
        137
                if ch == b'!' {
      
        138
                    tokens.push(Token {
      
        139
                        kind: TokenKind::Comment,
      
        140
                        text: stripped[pos..].to_string(),
      
        141
                        span: Span {
      
        142
                            file_id,
      
        143
                            start,
      
        144
                            end: Position {
      
        145
                                line,
      
        146
                                col: col + (bytes.len() - pos) as u32,
      
        147
                            },
      
        148
                        },
      
        149
                    });
      
        150
                    break;
      
        151
                }
      
        152
        
        153
                // String literal.
      
        154
                if ch == b'\'' || ch == b'"' {
      
        155
                    let (tok, consumed) = lex_fixed_string(&stripped, pos, file_id, line)?;
      
        156
                    tokens.push(tok);
      
        157
                    pos += consumed;
      
        158
                    continue;
      
        159
                }
      
        160
        
        161
                // Dot-operator or real starting with dot.
      
        162
                if ch == b'.' {
      
        163
                    if pos + 1 < bytes.len() && bytes[pos + 1].is_ascii_digit() {
      
        164
                        let (tok, consumed) = lex_fixed_number(&stripped, pos, file_id, line);
      
        165
                        tokens.push(tok);
      
        166
                        pos += consumed;
      
        167
                    } else {
      
        168
                        let (tok, consumed) = lex_fixed_dot_op(&stripped, pos, file_id, line)?;
      
        169
                        tokens.push(tok);
      
        170
                        pos += consumed;
      
        171
                    }
      
        172
                    continue;
      
        173
                }
      
        174
        
        175
                // Number (integer or real).
      
        176
                if ch.is_ascii_digit() {
      
        177
                    let (tok, consumed) = lex_fixed_number(&stripped, pos, file_id, line);
      
        178
                    tokens.push(tok);
      
        179
                    pos += consumed;
      
        180
                    continue;
      
        181
                }
      
        182
        
        183
                // BOZ literal: B/O/Z followed by quote.
      
        184
                if matches!(ch, b'B' | b'b' | b'O' | b'o' | b'Z' | b'z')
      
        185
                    && pos + 1 < bytes.len()
      
        186
                    && matches!(bytes[pos + 1], b'\'' | b'"')
      
        187
                {
      
        188
                    let (tok, consumed) = lex_fixed_boz(&stripped, pos, file_id, line)?;
      
        189
                    tokens.push(tok);
      
        190
                    pos += consumed;
      
        191
                    continue;
      
        192
                }
      
        193
        
        194
                // Letter — keyword or identifier with fixed-form prefix splitting.
      
        195
                if ch.is_ascii_alphabetic() || ch == b'_' {
      
        196
                    let (tok, consumed) =
      
        197
                        lex_fixed_ident_or_keyword(&stripped, pos, file_id, line, &tokens);
      
        198
                    tokens.push(tok);
      
        199
                    pos += consumed;
      
        200
                    continue;
      
        201
                }
      
        202
        
        203
                // Operators and punctuation.
      
        204
                let (tok, consumed) = lex_fixed_punct(&stripped, pos, file_id, line)?;
      
        205
                tokens.push(tok);
      
        206
                pos += consumed;
      
        207
            }
      
        208
        
        209
            Ok(tokens)
      
        210
        }
      
        211
        
        212
        /// Convert Hollerith constants (nH...) to quoted string literals BEFORE whitespace stripping.
      
        213
        /// This preserves spaces inside Hollerith content: `6H HELLO` → `' HELLO'`.
      
        214
        fn protect_hollerith(body: &str) -> String {
      
        215
            let bytes = body.as_bytes();
      
        216
            let mut result = String::with_capacity(body.len());
      
        217
            let mut i = 0;
      
        218
        
        219
            while i < bytes.len() {
      
        220
                // Inside a string literal: copy verbatim.
      
        221
                if bytes[i] == b'\'' || bytes[i] == b'"' {
      
        222
                    let quote = bytes[i];
      
        223
                    result.push(bytes[i] as char);
      
        224
                    i += 1;
      
        225
                    while i < bytes.len() {
      
        226
                        result.push(bytes[i] as char);
      
        227
                        if bytes[i] == quote {
      
        228
                            i += 1;
      
        229
                            if i < bytes.len() && bytes[i] == quote {
      
        230
                                result.push(bytes[i] as char);
      
        231
                                i += 1;
      
        232
                            } else {
      
        233
                                break;
      
        234
                            }
      
        235
                        } else {
      
        236
                            i += 1;
      
        237
                        }
      
        238
                    }
      
        239
                    continue;
      
        240
                }
      
        241
        
        242
                // Check for Hollerith: digits followed by H, not preceded by a letter/digit.
      
        243
                if bytes[i].is_ascii_digit() {
      
        244
                    let preceded_by_alnum =
      
        245
                        i > 0 && (bytes[i - 1].is_ascii_alphanumeric() || bytes[i - 1] == b'_');
      
        246
                    if !preceded_by_alnum {
      
        247
                        let digit_start = i;
      
        248
                        while i < bytes.len() && bytes[i].is_ascii_digit() {
      
        249
                            i += 1;
      
        250
                        }
      
        251
                        if i < bytes.len() && (bytes[i] == b'H' || bytes[i] == b'h') {
      
        252
                            if let Ok(count) = body[digit_start..i].parse::<usize>() {
      
        253
                                i += 1; // skip H
      
        254
                                if i + count <= bytes.len() {
      
        255
                                    // Replace nH... with '...'
      
        256
                                    result.push('\'');
      
        257
                                    result.push_str(&body[i..i + count]);
      
        258
                                    result.push('\'');
      
        259
                                    i += count;
      
        260
                                    continue;
      
        261
                                }
      
        262
                            }
      
        263
                        }
      
        264
                        // Not Hollerith — put the digits back.
      
        265
                        result.push_str(&body[digit_start..i]);
      
        266
                        continue;
      
        267
                    }
      
        268
                }
      
        269
        
        270
                result.push(bytes[i] as char);
      
        271
                i += 1;
      
        272
            }
      
        273
            result
      
        274
        }
      
        275
        
        276
        /// Strip whitespace from body text, preserving content inside string literals.
      
        277
        fn strip_whitespace_outside_strings(body: &str) -> String {
      
        278
            let mut result = String::with_capacity(body.len());
      
        279
            let bytes = body.as_bytes();
      
        280
            let mut i = 0;
      
        281
            while i < bytes.len() {
      
        282
                if bytes[i] == b'\'' || bytes[i] == b'"' {
      
        283
                    let quote = bytes[i];
      
        284
                    result.push(quote as char);
      
        285
                    i += 1;
      
        286
                    while i < bytes.len() {
      
        287
                        result.push(bytes[i] as char);
      
        288
                        if bytes[i] == quote {
      
        289
                            i += 1;
      
        290
                            if i < bytes.len() && bytes[i] == quote {
      
        291
                                result.push(bytes[i] as char);
      
        292
                                i += 1;
      
        293
                            } else {
      
        294
                                break;
      
        295
                            }
      
        296
                        } else {
      
        297
                            i += 1;
      
        298
                        }
      
        299
                    }
      
        300
                } else if bytes[i] == b' ' || bytes[i] == b'\t' {
      
        301
                    i += 1;
      
        302
                } else {
      
        303
                    result.push(bytes[i] as char);
      
        304
                    i += 1;
      
        305
                }
      
        306
            }
      
        307
            result
      
        308
        }
      
        309
        
        310
        /// Lex a string literal in whitespace-stripped body.
      
        311
        fn lex_fixed_string(
      
        312
            text: &str,
      
        313
            pos: usize,
      
        314
            file_id: u32,
      
        315
            line: u32,
      
        316
        ) -> Result<(Token, usize), LexError> {
      
        317
            let bytes = text.as_bytes();
      
        318
            let quote = bytes[pos];
      
        319
            let mut end = pos + 1;
      
        320
            let mut tok_text = String::new();
      
        321
            tok_text.push(quote as char);
      
        322
        
        323
            let mut closed = false;
      
        324
            while end < bytes.len() {
      
        325
                tok_text.push(bytes[end] as char);
      
        326
                if bytes[end] == quote {
      
        327
                    end += 1;
      
        328
                    if end < bytes.len() && bytes[end] == quote {
      
        329
                        tok_text.push(bytes[end] as char);
      
        330
                        end += 1;
      
        331
                    } else {
      
        332
                        closed = true;
      
        333
                        break;
      
        334
                    }
      
        335
                } else {
      
        336
                    end += 1;
      
        337
                }
      
        338
            }
      
        339
        
        340
            if !closed {
      
        341
                let col = (pos as u32) + 7;
      
        342
                return Err(LexError {
      
        343
                    span: Span {
      
        344
                        file_id,
      
        345
                        start: Position { line, col },
      
        346
                        end: Position { line, col },
      
        347
                    },
      
        348
                    msg: "unterminated string literal in fixed-form body".into(),
      
        349
                });
      
        350
            }
      
        351
        
        352
            let col = (pos as u32) + 7;
      
        353
            Ok((
      
        354
                Token {
      
        355
                    kind: TokenKind::StringLiteral,
      
        356
                    text: tok_text,
      
        357
                    span: Span {
      
        358
                        file_id,
      
        359
                        start: Position { line, col },
      
        360
                        end: Position {
      
        361
                            line,
      
        362
                            col: col + (end - pos) as u32,
      
        363
                        },
      
        364
                    },
      
        365
                },
      
        366
                end - pos,
      
        367
            ))
      
        368
        }
      
        369
        
        370
        /// Lex a dot-operator (.AND., .EQ., .TRUE., .myop.) in whitespace-stripped body.
      
        371
        fn lex_fixed_dot_op(
      
        372
            text: &str,
      
        373
            pos: usize,
      
        374
            file_id: u32,
      
        375
            line: u32,
      
        376
        ) -> Result<(Token, usize), LexError> {
      
        377
            let bytes = text.as_bytes();
      
        378
            let mut end = pos + 1; // skip first dot
      
        379
            let mut name = String::new();
      
        380
        
        381
            while end < bytes.len() && (bytes[end].is_ascii_alphabetic() || bytes[end] == b'_') {
      
        382
                name.push(bytes[end] as char);
      
        383
                end += 1;
      
        384
            }
      
        385
        
        386
            if end < bytes.len() && bytes[end] == b'.' {
      
        387
                end += 1; // closing dot
      
        388
            }
      
        389
        
        390
            let lower = name.to_lowercase();
      
        391
            let col = (pos as u32) + 7;
      
        392
            let tok_text = format!(".{}.", name);
      
        393
            let span = Span {
      
        394
                file_id,
      
        395
                start: Position { line, col },
      
        396
                end: Position {
      
        397
                    line,
      
        398
                    col: col + (end - pos) as u32,
      
        399
                },
      
        400
            };
      
        401
        
        402
            if lower == "true" || lower == "false" {
      
        403
                // Check for kind suffix.
      
        404
                let mut full_text = tok_text;
      
        405
                if end < bytes.len() && bytes[end] == b'_' {
      
        406
                    full_text.push('_');
      
        407
                    end += 1;
      
        408
                    while end < bytes.len() && (bytes[end].is_ascii_alphanumeric() || bytes[end] == b'_') {
      
        409
                        full_text.push(bytes[end] as char);
      
        410
                        end += 1;
      
        411
                    }
      
        412
                }
      
        413
                return Ok((
      
        414
                    Token {
      
        415
                        kind: TokenKind::LogicalLiteral,
      
        416
                        text: full_text,
      
        417
                        span,
      
        418
                    },
      
        419
                    end - pos,
      
        420
                ));
      
        421
            }
      
        422
        
        423
            let kind = if is_known_dot_op(&lower) {
      
        424
                TokenKind::DotOp(lower)
      
        425
            } else {
      
        426
                TokenKind::DefinedOp(name.to_lowercase())
      
        427
            };
      
        428
        
        429
            Ok((
      
        430
                Token {
      
        431
                    kind,
      
        432
                    text: tok_text,
      
        433
                    span,
      
        434
                },
      
        435
                end - pos,
      
        436
            ))
      
        437
        }
      
        438
        
        439
        /// Lex a number (integer or real) in whitespace-stripped body.
      
        440
        fn lex_fixed_number(text: &str, pos: usize, file_id: u32, line: u32) -> (Token, usize) {
      
        441
            let bytes = text.as_bytes();
      
        442
            let mut end = pos;
      
        443
            let mut is_real = false;
      
        444
            let mut tok_text = String::new();
      
        445
        
        446
            // Leading digits.
      
        447
            while end < bytes.len() && bytes[end].is_ascii_digit() {
      
        448
                tok_text.push(bytes[end] as char);
      
        449
                end += 1;
      
        450
            }
      
        451
        
        452
            // Decimal point — but not if followed by letter (dot-op like .EQ.).
      
        453
            if end < bytes.len() && bytes[end] == b'.' {
      
        454
                let after_dot = if end + 1 < bytes.len() {
      
        455
                    bytes[end + 1]
      
        456
                } else {
      
        457
                    0
      
        458
                };
      
        459
                let dot_is_numeric = after_dot.is_ascii_digit()
      
        460
                    || tok_text.is_empty() // leading dot
      
        461
                    || {
      
        462
                        // Check for exponent: .e5 vs .eq.
      
        463
                        if matches!(after_dot, b'e' | b'E' | b'd' | b'D') {
      
        464
                            let after_ed = if end + 2 < bytes.len() { bytes[end + 2] } else { 0 };
      
        465
                            matches!(after_ed, b'0'..=b'9' | b'+' | b'-')
      
        466
                        } else {
      
        467
                            !after_dot.is_ascii_alphabetic() // 5. followed by op/end
      
        468
                        }
      
        469
                    };
      
        470
        
        471
                if dot_is_numeric {
      
        472
                    is_real = true;
      
        473
                    tok_text.push(bytes[end] as char);
      
        474
                    end += 1;
      
        475
                    while end < bytes.len() && bytes[end].is_ascii_digit() {
      
        476
                        tok_text.push(bytes[end] as char);
      
        477
                        end += 1;
      
        478
                    }
      
        479
                }
      
        480
            }
      
        481
        
        482
            // Exponent — only consume e/d if followed by digit or +/- then digit.
      
        483
            // This prevents `10DO` from being lexed as real `10D` + identifier `O`.
      
        484
            if end < bytes.len() && matches!(bytes[end], b'e' | b'E' | b'd' | b'D') {
      
        485
                let after_ed = if end + 1 < bytes.len() {
      
        486
                    bytes[end + 1]
      
        487
                } else {
      
        488
                    0
      
        489
                };
      
        490
                let has_exponent_digits = after_ed.is_ascii_digit()
      
        491
                    || (matches!(after_ed, b'+' | b'-')
      
        492
                        && end + 2 < bytes.len()
      
        493
                        && bytes[end + 2].is_ascii_digit());
      
        494
        
        495
                if has_exponent_digits {
      
        496
                    is_real = true;
      
        497
                    tok_text.push(bytes[end] as char);
      
        498
                    end += 1;
      
        499
                    if end < bytes.len() && matches!(bytes[end], b'+' | b'-') {
      
        500
                        tok_text.push(bytes[end] as char);
      
        501
                        end += 1;
      
        502
                    }
      
        503
                    while end < bytes.len() && bytes[end].is_ascii_digit() {
      
        504
                        tok_text.push(bytes[end] as char);
      
        505
                        end += 1;
      
        506
                    }
      
        507
                }
      
        508
            }
      
        509
        
        510
            // Kind suffix.
      
        511
            if end < bytes.len() && bytes[end] == b'_' {
      
        512
                tok_text.push(bytes[end] as char);
      
        513
                end += 1;
      
        514
                while end < bytes.len() && (bytes[end].is_ascii_alphanumeric() || bytes[end] == b'_') {
      
        515
                    tok_text.push(bytes[end] as char);
      
        516
                    end += 1;
      
        517
                }
      
        518
            }
      
        519
        
        520
            let col = (pos as u32) + 7;
      
        521
            let kind = if is_real {
      
        522
                TokenKind::RealLiteral
      
        523
            } else {
      
        524
                TokenKind::IntegerLiteral
      
        525
            };
      
        526
            (
      
        527
                Token {
      
        528
                    kind,
      
        529
                    text: tok_text,
      
        530
                    span: Span {
      
        531
                        file_id,
      
        532
                        start: Position { line, col },
      
        533
                        end: Position {
      
        534
                            line,
      
        535
                            col: col + (end - pos) as u32,
      
        536
                        },
      
        537
                    },
      
        538
                },
      
        539
                end - pos,
      
        540
            )
      
        541
        }
      
        542
        
        543
        /// Lex an identifier or keyword in whitespace-stripped fixed-form body.
      
        544
        ///
      
        545
        /// Fixed-form removes spaces from the statement body, so common source like
      
        546
        /// `PROGRAM HELLO` and `INTEGER I, N` reaches us as `PROGRAMHELLO` and
      
        547
        /// `INTEGERI,N`.  The parser does not have enough context to recover those
      
        548
        /// boundaries reliably from a single opaque identifier token, so the fixed-form
      
        549
        /// lexer splits a small set of keyword prefixes when we are at a statement
      
        550
        /// boundary or another keyword-following context.
      
        551
        ///
      
        552
        /// The DO/assignment ambiguity still needs special handling before the generic
      
        553
        /// prefix splitter because `DO10I=1,10` is a loop while `DO10I=1.10` is an
      
        554
        /// assignment.
      
        555
        fn lex_fixed_ident_or_keyword(
      
        556
            text: &str,
      
        557
            pos: usize,
      
        558
            file_id: u32,
      
        559
            line: u32,
      
        560
            prior_tokens: &[Token],
      
        561
        ) -> (Token, usize) {
      
        562
            let bytes = text.as_bytes();
      
        563
            let mut run_end = pos;
      
        564
            while run_end < bytes.len()
      
        565
                && (bytes[run_end].is_ascii_alphanumeric() || bytes[run_end] == b'_')
      
        566
            {
      
        567
                run_end += 1;
      
        568
            }
      
        569
            let run = &text[pos..run_end];
      
        570
            let run_lower = run.to_lowercase();
      
        571
        
        572
            // DO/assignment ambiguity: if the run starts with "do" followed by digits,
      
        573
            // check if this is a DO loop (has comma after =) or an assignment.
      
        574
            if run_lower.starts_with("do")
      
        575
                && run.len() > 2
      
        576
                && run.as_bytes()[2].is_ascii_digit()
      
        577
                && is_do_loop_context(text, pos + 2)
      
        578
            {
      
        579
                // IS a DO loop — emit just "DO" (2 chars). Subsequent calls
      
        580
                // will pick up the label (digits) and variable (letters) separately.
      
        581
                let col = (pos as u32) + 7;
      
        582
                return (
      
        583
                    Token {
      
        584
                        kind: TokenKind::Identifier,
      
        585
                        text: run[..2].to_string(),
      
        586
                        span: Span {
      
        587
                            file_id,
      
        588
                            start: Position { line, col },
      
        589
                            end: Position { line, col: col + 2 },
      
        590
                        },
      
        591
                    },
      
        592
                    2,
      
        593
                );
      
        594
            }
      
        595
        
        596
            if let Some(prefix_len) = split_fixed_keyword_prefix(text, pos, run, prior_tokens) {
      
        597
                return make_ident_token(&run[..prefix_len], pos, file_id, line);
      
        598
            }
      
        599
        
        600
            // Emit the entire alphanumeric run as one identifier.
      
        601
            make_ident_token(run, pos, file_id, line)
      
        602
        }
      
        603
        
        604
        fn split_fixed_keyword_prefix(
      
        605
            text: &str,
      
        606
            pos: usize,
      
        607
            run: &str,
      
        608
            prior_tokens: &[Token],
      
        609
        ) -> Option<usize> {
      
        610
            if !allow_fixed_keyword_split(prior_tokens) || run.len() <= 4 {
      
        611
                return None;
      
        612
            }
      
        613
        
        614
            let trailing = text.as_bytes().get(pos + run.len()).copied();
      
        615
            if matches!(trailing, Some(b'=') | Some(b'%')) {
      
        616
                return None;
      
        617
            }
      
        618
        
        619
            for prefix_len in (4..run.len()).rev() {
      
        620
                let prefix = &run[..prefix_len];
      
        621
                let prefix_lower = prefix.to_ascii_lowercase();
      
        622
                let suffix = &run[prefix_len..];
      
        623
                let suffix_first = suffix.as_bytes()[0];
      
        624
        
        625
                let is_fixed_keyword = prefix_lower == "endtype" || is_keyword(prefix).is_some();
      
        626
                if !is_fixed_keyword {
      
        627
                    continue;
      
        628
                }
      
        629
        
        630
                if suffix_first.is_ascii_digit() && !matches!(prefix_lower.as_str(), "goto" | "call") {
      
        631
                    continue;
      
        632
                }
      
        633
        
        634
                return Some(prefix_len);
      
        635
            }
      
        636
        
        637
            None
      
        638
        }
      
        639
        
        640
        fn allow_fixed_keyword_split(prior_tokens: &[Token]) -> bool {
      
        641
            let Some(prev) = prior_tokens.last() else {
      
        642
                return true;
      
        643
            };
      
        644
        
        645
            match prev.kind {
      
        646
                TokenKind::Comma | TokenKind::ColonColon => true,
      
        647
                TokenKind::Identifier => matches!(
      
        648
                    prev.text.to_ascii_lowercase().as_str(),
      
        649
                    "integer"
      
        650
                        | "real"
      
        651
                        | "doubleprecision"
      
        652
                        | "doublecomplex"
      
        653
                        | "complex"
      
        654
                        | "character"
      
        655
                        | "logical"
      
        656
                        | "type"
      
        657
                        | "class"
      
        658
                        | "implicit"
      
        659
                        | "program"
      
        660
                        | "module"
      
        661
                        | "submodule"
      
        662
                        | "subroutine"
      
        663
                        | "function"
      
        664
                        | "entry"
      
        665
                        | "call"
      
        666
                        | "pure"
      
        667
                        | "impure"
      
        668
                        | "elemental"
      
        669
                        | "recursive"
      
        670
                        | "end"
      
        671
                        | "endtype"
      
        672
                ),
      
        673
                _ => false,
      
        674
            }
      
        675
        }
      
        676
        
        677
        fn make_ident_token(text: &str, pos: usize, file_id: u32, line: u32) -> (Token, usize) {
      
        678
            let col = (pos as u32) + 7;
      
        679
            (
      
        680
                Token {
      
        681
                    kind: TokenKind::Identifier,
      
        682
                    text: text.to_string(),
      
        683
                    span: Span {
      
        684
                        file_id,
      
        685
                        start: Position { line, col },
      
        686
                        end: Position {
      
        687
                            line,
      
        688
                            col: col + text.len() as u32,
      
        689
                        },
      
        690
                    },
      
        691
                },
      
        692
                text.len(),
      
        693
            )
      
        694
        }
      
        695
        
        696
        /// Check if the rest of the statement after DO+digits looks like a DO loop.
      
        697
        /// A DO loop has: DO [label] variable = start , end [, step]
      
        698
        /// An assignment has: DO[label][var] = expr (no top-level comma after =).
      
        699
        fn is_do_loop_context(text: &str, after_do: usize) -> bool {
      
        700
            let bytes = text.as_bytes();
      
        701
        
        702
            // Find '=' that is not inside strings or parens.
      
        703
            let eq_pos = find_top_level_char(bytes, after_do, b'=');
      
        704
            let eq_pos = match eq_pos {
      
        705
                Some(p) => p,
      
        706
                None => return false,
      
        707
            };
      
        708
        
        709
            // Make sure '=' is not '==' (comparison).
      
        710
            if eq_pos + 1 < bytes.len() && bytes[eq_pos + 1] == b'=' {
      
        711
                return false;
      
        712
            }
      
        713
        
        714
            // Check for a top-level comma after the '='.
      
        715
            find_top_level_char(bytes, eq_pos + 1, b',').is_some()
      
        716
        }
      
        717
        
        718
        /// Find the first occurrence of `target` byte at the top level
      
        719
        /// (not inside parentheses or string literals).
      
        720
        fn find_top_level_char(bytes: &[u8], start: usize, target: u8) -> Option<usize> {
      
        721
            let mut i = start;
      
        722
            let mut depth = 0i32;
      
        723
            while i < bytes.len() {
      
        724
                let b = bytes[i];
      
        725
        
        726
                // Skip string literals.
      
        727
                if b == b'\'' || b == b'"' {
      
        728
                    let quote = b;
      
        729
                    i += 1;
      
        730
                    while i < bytes.len() {
      
        731
                        if bytes[i] == quote {
      
        732
                            i += 1;
      
        733
                            if i < bytes.len() && bytes[i] == quote {
      
        734
                                i += 1; // doubled quote escape
      
        735
                            } else {
      
        736
                                break;
      
        737
                            }
      
        738
                        } else {
      
        739
                            i += 1;
      
        740
                        }
      
        741
                    }
      
        742
                    continue;
      
        743
                }
      
        744
        
        745
                match b {
      
        746
                    b'(' => {
      
        747
                        depth += 1;
      
        748
                    }
      
        749
                    b')' => {
      
        750
                        depth -= 1;
      
        751
                    }
      
        752
                    c if c == target && depth == 0 => return Some(i),
      
        753
                    _ => {}
      
        754
                }
      
        755
                i += 1;
      
        756
            }
      
        757
            None
      
        758
        }
      
        759
        
        760
        /// Lex a BOZ literal in fixed-form body.
      
        761
        fn lex_fixed_boz(
      
        762
            text: &str,
      
        763
            pos: usize,
      
        764
            file_id: u32,
      
        765
            line: u32,
      
        766
        ) -> Result<(Token, usize), LexError> {
      
        767
            let bytes = text.as_bytes();
      
        768
            let mut end = pos;
      
        769
            let mut tok_text = String::new();
      
        770
        
        771
            tok_text.push(bytes[end] as char); // B/O/Z
      
        772
            end += 1;
      
        773
            let quote = bytes[end];
      
        774
            tok_text.push(quote as char); // opening quote
      
        775
            end += 1;
      
        776
        
        777
            while end < bytes.len() && bytes[end] != quote {
      
        778
                tok_text.push(bytes[end] as char);
      
        779
                end += 1;
      
        780
            }
      
        781
            if end >= bytes.len() {
      
        782
                return Err(LexError {
      
        783
                    span: Span {
      
        784
                        file_id,
      
        785
                        start: Position {
      
        786
                            line,
      
        787
                            col: (pos as u32) + 7,
      
        788
                        },
      
        789
                        end: Position {
      
        790
                            line,
      
        791
                            col: (pos as u32) + 7,
      
        792
                        },
      
        793
                    },
      
        794
                    msg: "unterminated BOZ literal".into(),
      
        795
                });
      
        796
            }
      
        797
            tok_text.push(bytes[end] as char); // closing quote
      
        798
            end += 1;
      
        799
        
        800
            let col = (pos as u32) + 7;
      
        801
            Ok((
      
        802
                Token {
      
        803
                    kind: TokenKind::BozLiteral,
      
        804
                    text: tok_text,
      
        805
                    span: Span {
      
        806
                        file_id,
      
        807
                        start: Position { line, col },
      
        808
                        end: Position {
      
        809
                            line,
      
        810
                            col: col + (end - pos) as u32,
      
        811
                        },
      
        812
                    },
      
        813
                },
      
        814
                end - pos,
      
        815
            ))
      
        816
        }
      
        817
        
        818
        /// Lex an operator or punctuation in whitespace-stripped body.
      
        819
        fn lex_fixed_punct(
      
        820
            text: &str,
      
        821
            pos: usize,
      
        822
            file_id: u32,
      
        823
            line: u32,
      
        824
        ) -> Result<(Token, usize), LexError> {
      
        825
            let bytes = text.as_bytes();
      
        826
            let ch = bytes[pos];
      
        827
            let next = if pos + 1 < bytes.len() {
      
        828
                bytes[pos + 1]
      
        829
            } else {
      
        830
                0
      
        831
            };
      
        832
            let col = (pos as u32) + 7;
      
        833
            let start = Position { line, col };
      
        834
        
        835
            let (kind, tok_text, consumed) = match ch {
      
        836
                b'+' => (TokenKind::Plus, "+", 1),
      
        837
                b'-' => (TokenKind::Minus, "-", 1),
      
        838
                b'*' if next == b'*' => (TokenKind::Power, "**", 2),
      
        839
                b'*' => (TokenKind::Star, "*", 1),
      
        840
                b'/' if next == b'/' => (TokenKind::Concat, "//", 2),
      
        841
                b'/' if next == b'=' => (TokenKind::Ne, "/=", 2),
      
        842
                b'/' => (TokenKind::Slash, "/", 1),
      
        843
                b'=' if next == b'=' => (TokenKind::Eq, "==", 2),
      
        844
                b'=' if next == b'>' => (TokenKind::Arrow, "=>", 2),
      
        845
                b'=' => (TokenKind::Assign, "=", 1),
      
        846
                b'<' if next == b'=' => (TokenKind::Le, "<=", 2),
      
        847
                b'<' => (TokenKind::Lt, "<", 1),
      
        848
                b'>' if next == b'=' => (TokenKind::Ge, ">=", 2),
      
        849
                b'>' => (TokenKind::Gt, ">", 1),
      
        850
                b'(' => (TokenKind::LParen, "(", 1),
      
        851
                b')' => (TokenKind::RParen, ")", 1),
      
        852
                b'[' => (TokenKind::LBracket, "[", 1),
      
        853
                b']' => (TokenKind::RBracket, "]", 1),
      
        854
                b',' => (TokenKind::Comma, ",", 1),
      
        855
                b':' if next == b':' => (TokenKind::ColonColon, "::", 2),
      
        856
                b':' => (TokenKind::Colon, ":", 1),
      
        857
                b';' => (TokenKind::Semicolon, ";", 1),
      
        858
                b'%' => (TokenKind::Percent, "%", 1),
      
        859
                b'&' => (TokenKind::Ampersand, "&", 1),
      
        860
                _ => {
      
        861
                    return Err(LexError {
      
        862
                        span: Span {
      
        863
                            file_id,
      
        864
                            start,
      
        865
                            end: start,
      
        866
                        },
      
        867
                        msg: unexpected_char_message(text, pos, "unexpected character in fixed-form body"),
      
        868
                    });
      
        869
                }
      
        870
            };
      
        871
        
        872
            Ok((
      
        873
                Token {
      
        874
                    kind,
      
        875
                    text: tok_text.into(),
      
        876
                    span: Span {
      
        877
                        file_id,
      
        878
                        start,
      
        879
                        end: Position {
      
        880
                            line,
      
        881
                            col: col + consumed as u32,
      
        882
                        },
      
        883
                    },
      
        884
                },
      
        885
                consumed,
      
        886
            ))
      
        887
        }
      
        888
        
        889
        // ---- Line preprocessing ----
      
        890
        
        891
        enum FixedLine {
      
        892
            Comment {
      
        893
                text: String,
      
        894
                span: Span,
      
        895
            },
      
        896
            Statement {
      
        897
                label: Option<String>,
      
        898
                body: String,
      
        899
                start_line: u32,
      
        900
                file_id: u32,
      
        901
            },
      
        902
            Blank {
      
        903
                span: Span,
      
        904
            },
      
        905
        }
      
        906
        
        907
        /// Preprocess fixed-form lines: identify comments, extract labels, join
      
        908
        /// continuations, strip columns 73+, handle tab-form.
      
        909
        fn preprocess_lines(src: &str, file_id: u32) -> Vec<FixedLine> {
      
        910
            let lines: Vec<&str> = src.lines().collect();
      
        911
            let mut result = Vec::new();
      
        912
            let mut i = 0;
      
        913
        
        914
            while i < lines.len() {
      
        915
                let line = lines[i];
      
        916
                let line_num = (i + 1) as u32;
      
        917
        
        918
                // Blank line.
      
        919
                if line.trim().is_empty() {
      
        920
                    result.push(FixedLine::Blank {
      
        921
                        span: Span {
      
        922
                            file_id,
      
        923
                            start: Position {
      
        924
                                line: line_num,
      
        925
                                col: 1,
      
        926
                            },
      
        927
                            end: Position {
      
        928
                                line: line_num,
      
        929
                                col: 1,
      
        930
                            },
      
        931
                        },
      
        932
                    });
      
        933
                    i += 1;
      
        934
                    continue;
      
        935
                }
      
        936
        
        937
                let first_byte = line.as_bytes().first().copied().unwrap_or(0);
      
        938
        
        939
                // Comment line: C, c, *, or ! in column 1.
      
        940
                if matches!(first_byte, b'C' | b'c' | b'*' | b'!') {
      
        941
                    result.push(FixedLine::Comment {
      
        942
                        text: line.to_string(),
      
        943
                        span: Span {
      
        944
                            file_id,
      
        945
                            start: Position {
      
        946
                                line: line_num,
      
        947
                                col: 1,
      
        948
                            },
      
        949
                            end: Position {
      
        950
                                line: line_num,
      
        951
                                col: line.len() as u32,
      
        952
                            },
      
        953
                        },
      
        954
                    });
      
        955
                    i += 1;
      
        956
                    continue;
      
        957
                }
      
        958
        
        959
                // Extract columns from this line.
      
        960
                let (label, body) = extract_fixed_columns(line);
      
        961
        
        962
                // Collect continuation lines.
      
        963
                let start_line = line_num;
      
        964
                let mut full_body = body;
      
        965
                i += 1;
      
        966
        
        967
                while i < lines.len() {
      
        968
                    let next = lines[i];
      
        969
        
        970
                    // Blank lines between continuations: skip them only if the line
      
        971
                    // after the blank is actually a continuation. Otherwise, the blank
      
        972
                    // terminates the statement and should be emitted by the outer loop.
      
        973
                    if next.trim().is_empty() {
      
        974
                        // Peek ahead: is the line after this blank a continuation?
      
        975
                        let lookahead = i + 1;
      
        976
                        if lookahead < lines.len() && is_continuation_line(lines[lookahead]) {
      
        977
                            i += 1;
      
        978
                            continue;
      
        979
                        }
      
        980
                        break; // blank line ends the statement
      
        981
                    }
      
        982
        
        983
                    let next_first = next.as_bytes().first().copied().unwrap_or(0);
      
        984
                    // Comment lines between continuations: skip them.
      
        985
                    if matches!(next_first, b'C' | b'c' | b'*' | b'!') {
      
        986
                        // Emit the comment but don't break the continuation.
      
        987
                        result.push(FixedLine::Comment {
      
        988
                            text: next.to_string(),
      
        989
                            span: Span {
      
        990
                                file_id,
      
        991
                                start: Position {
      
        992
                                    line: (i + 1) as u32,
      
        993
                                    col: 1,
      
        994
                                },
      
        995
                                end: Position {
      
        996
                                    line: (i + 1) as u32,
      
        997
                                    col: next.len() as u32,
      
        998
                                },
      
        999
                            },
      
        1000
                        });
      
        1001
                        i += 1;
      
        1002
                        continue;
      
        1003
                    }
      
        1004
        
        1005
                    // Check column 6 for continuation marker.
      
        1006
                    if is_continuation_line(next) {
      
        1007
                        let (_, cont_body) = extract_fixed_columns(next);
      
        1008
                        full_body.push_str(&cont_body);
      
        1009
                        i += 1;
      
        1010
                    } else {
      
        1011
                        break;
      
        1012
                    }
      
        1013
                }
      
        1014
        
        1015
                result.push(FixedLine::Statement {
      
        1016
                    label: if label.trim().is_empty() {
      
        1017
                        None
      
        1018
                    } else {
      
        1019
                        Some(label)
      
        1020
                    },
      
        1021
                    body: full_body,
      
        1022
                    start_line,
      
        1023
                    file_id,
      
        1024
                });
      
        1025
            }
      
        1026
        
        1027
            result
      
        1028
        }
      
        1029
        
        1030
        /// Check if a line is a continuation line (non-space, non-zero in column 6).
      
        1031
        fn is_continuation_line(line: &str) -> bool {
      
        1032
            let bytes = line.as_bytes();
      
        1033
        
        1034
            // Tab-form: tab followed by digit 1-9 is continuation.
      
        1035
            if bytes.first() == Some(&b'\t') {
      
        1036
                if let Some(&d) = bytes.get(1) {
      
        1037
                    return (b'1'..=b'9').contains(&d);
      
        1038
                }
      
        1039
            }
      
        1040
        
        1041
            // Standard: column 6 (0-indexed: byte 5) is non-space, non-zero.
      
        1042
            if bytes.len() >= 6 {
      
        1043
                let col6 = bytes[5];
      
        1044
                return col6 != b' ' && col6 != b'0' && col6 != b'\t';
      
        1045
            }
      
        1046
        
        1047
            false
      
        1048
        }
      
        1049
        
        1050
        /// Extract label (columns 1-5) and body (columns 7-72) from a fixed-form line.
      
        1051
        /// Handles tab-form extension.
      
        1052
        fn extract_fixed_columns(line: &str) -> (String, String) {
      
        1053
            let bytes = line.as_bytes();
      
        1054
        
        1055
            // Tab-form: if first character is a tab, everything after is body (or continuation).
      
        1056
            if bytes.first() == Some(&b'\t') {
      
        1057
                // Tab followed by digit 1-9: continuation (body starts after the digit).
      
        1058
                if let Some(&d) = bytes.get(1) {
      
        1059
                    if (b'1'..=b'9').contains(&d) {
      
        1060
                        let body = if bytes.len() > 2 {
      
        1061
                            String::from_utf8_lossy(&bytes[2..]).to_string()
      
        1062
                        } else {
      
        1063
                            String::new()
      
        1064
                        };
      
        1065
                        return (String::new(), body);
      
        1066
                    }
      
        1067
                }
      
        1068
                // Tab followed by anything else: body starts at position after tab.
      
        1069
                let body = if bytes.len() > 1 {
      
        1070
                    String::from_utf8_lossy(&bytes[1..]).to_string()
      
        1071
                } else {
      
        1072
                    String::new()
      
        1073
                };
      
        1074
                return (String::new(), body);
      
        1075
            }
      
        1076
        
        1077
            // Standard fixed-form: columns 1-5 label, column 6 continuation marker, 7-72 body.
      
        1078
            let label = if bytes.len() >= 5 {
      
        1079
                String::from_utf8_lossy(&bytes[0..5]).to_string()
      
        1080
            } else {
      
        1081
                String::from_utf8_lossy(bytes).to_string()
      
        1082
            };
      
        1083
        
        1084
            let body_start = 6.min(bytes.len());
      
        1085
            let body_end = 72.min(bytes.len()); // columns 73+ are ignored
      
        1086
            let body = if body_start < bytes.len() {
      
        1087
                String::from_utf8_lossy(&bytes[body_start..body_end]).to_string()
      
        1088
            } else {
      
        1089
                String::new()
      
        1090
            };
      
        1091
        
        1092
            (label, body)
      
        1093
        }
      
        1094
        
        1095
        // ---- Hollerith constants ----
      
        1096
        
        1097
        #[cfg(test)]
      
        1098
        mod tests {
      
        1099
            use super::*;
      
        1100
            use crate::lexer::TokenKind;
      
        1101
        
        1102
            fn fixed_toks(src: &str) -> Vec<Token> {
      
        1103
                tokenize_fixed(src, 0).unwrap()
      
        1104
            }
      
        1105
        
        1106
            fn fixed_kinds(src: &str) -> Vec<TokenKind> {
      
        1107
                fixed_toks(src)
      
        1108
                    .into_iter()
      
        1109
                    .map(|t| t.kind)
      
        1110
                    .filter(|k| !matches!(k, TokenKind::Eof | TokenKind::Newline))
      
        1111
                    .collect()
      
        1112
            }
      
        1113
        
        1114
            fn fixed_texts(src: &str) -> Vec<String> {
      
        1115
                fixed_toks(src)
      
        1116
                    .into_iter()
      
        1117
                    .filter(|t| {
      
        1118
                        !matches!(
      
        1119
                            t.kind,
      
        1120
                            TokenKind::Eof | TokenKind::Newline | TokenKind::Comment
      
        1121
                        )
      
        1122
                    })
      
        1123
                    .map(|t| t.text)
      
        1124
                    .collect()
      
        1125
            }
      
        1126
        
        1127
            // ---- Comment detection ----
      
        1128
        
        1129
            #[test]
      
        1130
            fn comment_c_uppercase() {
      
        1131
                let k = fixed_kinds("C     This is a comment\n");
      
        1132
                assert_eq!(k, vec![TokenKind::Comment]);
      
        1133
            }
      
        1134
        
        1135
            #[test]
      
        1136
            fn comment_c_lowercase() {
      
        1137
                let k = fixed_kinds("c     This is a comment\n");
      
        1138
                assert_eq!(k, vec![TokenKind::Comment]);
      
        1139
            }
      
        1140
        
        1141
            #[test]
      
        1142
            fn comment_star() {
      
        1143
                let k = fixed_kinds("*     This is a comment\n");
      
        1144
                assert_eq!(k, vec![TokenKind::Comment]);
      
        1145
            }
      
        1146
        
        1147
            #[test]
      
        1148
            fn comment_bang() {
      
        1149
                let k = fixed_kinds("!     This is a comment\n");
      
        1150
                assert_eq!(k, vec![TokenKind::Comment]);
      
        1151
            }
      
        1152
        
        1153
            // ---- Statement labels ----
      
        1154
        
        1155
            #[test]
      
        1156
            fn statement_with_label() {
      
        1157
                // "   10 CONTINUE" — label 10 in columns 1-5, CONTINUE in 7+
      
        1158
                let texts = fixed_texts("   10 CONTINUE\n");
      
        1159
                assert!(texts.contains(&"10".to_string()), "got: {:?}", texts);
      
        1160
                assert!(texts.contains(&"CONTINUE".to_string()), "got: {:?}", texts);
      
        1161
            }
      
        1162
        
        1163
            #[test]
      
        1164
            fn statement_without_label() {
      
        1165
                // No label means the first token should be the identifier X, not a label number.
      
        1166
                let toks = fixed_toks("      X = 42\n");
      
        1167
                let first_meaningful = toks
      
        1168
                    .iter()
      
        1169
                    .find(|t| {
      
        1170
                        !matches!(
      
        1171
                            t.kind,
      
        1172
                            TokenKind::Newline | TokenKind::Eof | TokenKind::Comment
      
        1173
                        )
      
        1174
                    })
      
        1175
                    .unwrap();
      
        1176
                assert_eq!(first_meaningful.kind, TokenKind::Identifier);
      
        1177
                assert_eq!(first_meaningful.text, "X");
      
        1178
            }
      
        1179
        
        1180
            // ---- Column 73+ ignored ----
      
        1181
        
        1182
            #[test]
      
        1183
            fn columns_past_72_ignored() {
      
        1184
                // Columns 73+ should be stripped. Place code in 7-72, junk in 73+.
      
        1185
                let line = format!("      X = 42{}\n", " ".repeat(60) + "JUNK");
      
        1186
                // Body should be "X = 42" + spaces, NOT including JUNK.
      
        1187
                let texts = fixed_texts(&line);
      
        1188
                assert!(texts.contains(&"X".to_string()));
      
        1189
                assert!(
      
        1190
                    !texts.iter().any(|t| t.contains("JUNK")),
      
        1191
                    "got: {:?}",
      
        1192
                    texts
      
        1193
                );
      
        1194
            }
      
        1195
        
        1196
            // ---- Continuation lines ----
      
        1197
        
        1198
            #[test]
      
        1199
            fn continuation_in_column_6() {
      
        1200
                let src = "      X = 1 +\n     +  2\n";
      
        1201
                let kinds = fixed_kinds(src);
      
        1202
                assert!(kinds.contains(&TokenKind::Plus));
      
        1203
                // Should have both integer literals.
      
        1204
                let int_count = kinds
      
        1205
                    .iter()
      
        1206
                    .filter(|k| **k == TokenKind::IntegerLiteral)
      
        1207
                    .count();
      
        1208
                assert_eq!(int_count, 2, "expected 2 integer literals, got {:?}", kinds);
      
        1209
            }
      
        1210
        
        1211
            #[test]
      
        1212
            fn continuation_dollar_sign() {
      
        1213
                // Any non-space, non-zero character in column 6 is continuation.
      
        1214
                let src = "      X = 1 +\n     $  2\n";
      
        1215
                let kinds = fixed_kinds(src);
      
        1216
                let int_count = kinds
      
        1217
                    .iter()
      
        1218
                    .filter(|k| **k == TokenKind::IntegerLiteral)
      
        1219
                    .count();
      
        1220
                assert_eq!(int_count, 2);
      
        1221
            }
      
        1222
        
        1223
            // ---- Tab-form extension ----
      
        1224
        
        1225
            #[test]
      
        1226
            fn tab_form_statement() {
      
        1227
                let src = "\tX = 42\n";
      
        1228
                let texts = fixed_texts(src);
      
        1229
                assert!(texts.contains(&"X".to_string()));
      
        1230
                assert!(texts.contains(&"42".to_string()));
      
        1231
            }
      
        1232
        
        1233
            #[test]
      
        1234
            fn tab_form_continuation() {
      
        1235
                // Tab followed by digit 1-9 is continuation.
      
        1236
                let src = "\tX = 1 +\n\t1  2\n";
      
        1237
                let kinds = fixed_kinds(src);
      
        1238
                let int_count = kinds
      
        1239
                    .iter()
      
        1240
                    .filter(|k| **k == TokenKind::IntegerLiteral)
      
        1241
                    .count();
      
        1242
                assert_eq!(int_count, 2, "got: {:?}", kinds);
      
        1243
            }
      
        1244
        
        1245
            // ---- Simple programs ----
      
        1246
        
        1247
            #[test]
      
        1248
            fn simple_fixed_form_program() {
      
        1249
                let src = "\
      
        1250
        C     Hello World
      
        1251
              PROGRAM HELLO
      
        1252
              INTEGER I
      
        1253
              DO 10 I = 1, 10
      
        1254
                 WRITE(*,*) I
      
        1255
           10 CONTINUE
      
        1256
              STOP
      
        1257
              END
      
        1258
        ";
      
        1259
                let tokens = tokenize_fixed(src, 0).unwrap();
      
        1260
                let ident_count = tokens
      
        1261
                    .iter()
      
        1262
                    .filter(|t| t.kind == TokenKind::Identifier)
      
        1263
                    .count();
      
        1264
                assert!(
      
        1265
                    ident_count >= 8,
      
        1266
                    "expected 8+ identifiers, got {}",
      
        1267
                    ident_count
      
        1268
                );
      
        1269
        
        1270
                // Should have a label "10".
      
        1271
                assert!(tokens
      
        1272
                    .iter()
      
        1273
                    .any(|t| t.kind == TokenKind::IntegerLiteral && t.text == "10"));
      
        1274
            }
      
        1275
        
        1276
            // ---- Mode detection ----
      
        1277
        
        1278
            #[test]
      
        1279
            fn detect_free_form() {
      
        1280
                use super::super::detect_source_form;
      
        1281
                assert_eq!(
      
        1282
                    detect_source_form("test.f90"),
      
        1283
                    super::super::SourceForm::FreeForm
      
        1284
                );
      
        1285
                assert_eq!(
      
        1286
                    detect_source_form("test.f95"),
      
        1287
                    super::super::SourceForm::FreeForm
      
        1288
                );
      
        1289
                assert_eq!(
      
        1290
                    detect_source_form("test.f03"),
      
        1291
                    super::super::SourceForm::FreeForm
      
        1292
                );
      
        1293
                assert_eq!(
      
        1294
                    detect_source_form("test.f08"),
      
        1295
                    super::super::SourceForm::FreeForm
      
        1296
                );
      
        1297
                assert_eq!(
      
        1298
                    detect_source_form("test.f18"),
      
        1299
                    super::super::SourceForm::FreeForm
      
        1300
                );
      
        1301
            }
      
        1302
        
        1303
            #[test]
      
        1304
            fn detect_fixed_form() {
      
        1305
                use super::super::detect_source_form;
      
        1306
                assert_eq!(
      
        1307
                    detect_source_form("test.f"),
      
        1308
                    super::super::SourceForm::FixedForm
      
        1309
                );
      
        1310
                assert_eq!(
      
        1311
                    detect_source_form("test.for"),
      
        1312
                    super::super::SourceForm::FixedForm
      
        1313
                );
      
        1314
                assert_eq!(
      
        1315
                    detect_source_form("test.ftn"),
      
        1316
                    super::super::SourceForm::FixedForm
      
        1317
                );
      
        1318
            }
      
        1319
        
        1320
            // ---- Unified token stream ----
      
        1321
        
        1322
            #[test]
      
        1323
            fn fixed_and_free_produce_same_tokens() {
      
        1324
                let free_src = "integer :: x\nx = 42\n";
      
        1325
                let fixed_src = "      integer :: x\n      x = 42\n";
      
        1326
        
        1327
                let free_kinds: Vec<_> = super::super::Lexer::tokenize(free_src, 0)
      
        1328
                    .unwrap()
      
        1329
                    .into_iter()
      
        1330
                    .map(|t| t.kind)
      
        1331
                    .filter(|k| !matches!(k, TokenKind::Eof | TokenKind::Newline))
      
        1332
                    .collect();
      
        1333
        
        1334
                let fixed_kinds = fixed_kinds(fixed_src);
      
        1335
        
        1336
                assert_eq!(
      
        1337
                    free_kinds, fixed_kinds,
      
        1338
                    "free-form and fixed-form produced different tokens:\n  free:  {:?}\n  fixed: {:?}",
      
        1339
                    free_kinds, fixed_kinds
      
        1340
                );
      
        1341
            }
      
        1342
        
        1343
            // ---- Blank lines ----
      
        1344
        
        1345
            #[test]
      
        1346
            fn blank_lines_handled() {
      
        1347
                let src = "      X = 1\n\n      Y = 2\n";
      
        1348
                let kinds = fixed_kinds(src);
      
        1349
                assert!(
      
        1350
                    kinds
      
        1351
                        .iter()
      
        1352
                        .filter(|k| **k == TokenKind::Identifier)
      
        1353
                        .count()
      
        1354
                        >= 2
      
        1355
                );
      
        1356
            }
      
        1357
        
        1358
            // ---- Hollerith ----
      
        1359
        
        1360
            #[test]
      
        1361
            fn hollerith_protect_converts_to_string() {
      
        1362
                assert_eq!(protect_hollerith("3HABC"), "'ABC'");
      
        1363
                assert_eq!(protect_hollerith("6HFOOBAR"), "'FOOBAR'");
      
        1364
            }
      
        1365
        
        1366
            #[test]
      
        1367
            fn hollerith_with_spaces_preserved() {
      
        1368
                // 6H HELLO has a leading space — must be preserved.
      
        1369
                assert_eq!(protect_hollerith("6H HELLO"), "' HELLO'");
      
        1370
            }
      
        1371
        
        1372
            #[test]
      
        1373
            fn hollerith_not_after_letter() {
      
        1374
                // X3HABC — the 3H is preceded by a letter, so it's NOT a Hollerith.
      
        1375
                assert_eq!(protect_hollerith("X3HABC"), "X3HABC");
      
        1376
            }
      
        1377
        
        1378
            #[test]
      
        1379
            fn hollerith_after_operator() {
      
        1380
                // =3HABC — preceded by =, not a letter, so IS a Hollerith.
      
        1381
                assert_eq!(protect_hollerith("=3HABC"), "='ABC'");
      
        1382
            }
      
        1383
        
        1384
            // ---- Real fixed-form files from refs ----
      
        1385
        
        1386
            #[test]
      
        1387
            fn tokenize_flang_fixed_form_test() {
      
        1388
                let path = concat!(
      
        1389
                    env!("CARGO_MANIFEST_DIR"),
      
        1390
                    "/../.refs/llvm/flang/test/Driver/Inputs/fixed-form-test.f"
      
        1391
                );
      
        1392
                if !std::path::Path::new(path).exists() {
      
        1393
                    return;
      
        1394
                }
      
        1395
                let src = std::fs::read_to_string(path).unwrap();
      
        1396
                let tokens = tokenize_fixed(&src, 0);
      
        1397
                assert!(tokens.is_ok(), "failed: {:?}", tokens.err());
      
        1398
            }
      
        1399
        
        1400
            #[test]
      
        1401
            fn tokenize_gcc_nested_forall() {
      
        1402
                let path = concat!(
      
        1403
                    env!("CARGO_MANIFEST_DIR"),
      
        1404
                    "/../.refs/gcc/gcc/testsuite/gfortran.dg/nested_forall_1.f"
      
        1405
                );
      
        1406
                if !std::path::Path::new(path).exists() {
      
        1407
                    return;
      
        1408
                }
      
        1409
                let src = std::fs::read_to_string(path).unwrap();
      
        1410
                let tokens = tokenize_fixed(&src, 0);
      
        1411
                assert!(tokens.is_ok(), "failed: {:?}", tokens.err());
      
        1412
                let toks = tokens.unwrap();
      
        1413
                assert!(toks.len() > 50, "expected 50+ tokens, got {}", toks.len());
      
        1414
            }
      
        1415
        
        1416
            // ======================================================================
      
        1417
            // Whitespace insensitivity tests — the core challenge of fixed-form
      
        1418
            // ======================================================================
      
        1419
        
        1420
            #[test]
      
        1421
            fn whitespace_stripped_goto() {
      
        1422
                // GO TO 100 collapses to GOTO100 in fixed-form source.
      
        1423
                let texts = fixed_texts("      GOTO100\n");
      
        1424
                assert_eq!(texts, vec!["GOTO", "100"], "got: {:?}", texts);
      
        1425
            }
      
        1426
        
        1427
            #[test]
      
        1428
            fn whitespace_stripped_integer_decl() {
      
        1429
                // INTEGER I collapses to INTEGERI and must still parse as a declaration.
      
        1430
                let texts = fixed_texts("      INTEGERI\n");
      
        1431
                assert_eq!(texts, vec!["INTEGER", "I"], "got: {:?}", texts);
      
        1432
            }
      
        1433
        
        1434
            #[test]
      
        1435
            fn whitespace_stripped_doubleprecision() {
      
        1436
                // DOUBLE PRECISION X collapses to DOUBLEPRECISIONX.
      
        1437
                let texts = fixed_texts("      DOUBLEPRECISIONX\n");
      
        1438
                assert_eq!(texts, vec!["DOUBLEPRECISION", "X"], "got: {:?}", texts);
      
        1439
            }
      
        1440
        
        1441
            #[test]
      
        1442
            fn whitespace_stripped_program_name() {
      
        1443
                let texts = fixed_texts("      PROGRAMHELLO\n");
      
        1444
                assert_eq!(texts, vec!["PROGRAM", "HELLO"], "got: {:?}", texts);
      
        1445
            }
      
        1446
        
        1447
            #[test]
      
        1448
            fn whitespace_stripped_typed_function() {
      
        1449
                let texts = fixed_texts("      INTEGERFUNCTIONF(X)\n");
      
        1450
                assert_eq!(texts, vec!["INTEGER", "FUNCTION", "F", "(", "X", ")"]);
      
        1451
            }
      
        1452
        
        1453
            #[test]
      
        1454
            fn index_not_broken() {
      
        1455
                // INDEX must NOT be split into IN+DEX — this was the showstopper bug.
      
        1456
                let _kinds = fixed_kinds("      X=INDEX(A,'B')\n");
      
        1457
                let texts = fixed_texts("      X=INDEX(A,'B')\n");
      
        1458
                assert!(
      
        1459
                    texts.contains(&"INDEX".to_string()),
      
        1460
                    "INDEX was incorrectly split, got: {:?}",
      
        1461
                    texts
      
        1462
                );
      
        1463
            }
      
        1464
        
        1465
            #[test]
      
        1466
            fn include_not_broken() {
      
        1467
                // INCLUDE must not become IN+CLUDE.
      
        1468
                let texts = fixed_texts("      INCLUDEVAR=1\n");
      
        1469
                assert_eq!(texts[0], "INCLUDEVAR", "got: {:?}", texts);
      
        1470
            }
      
        1471
        
        1472
            #[test]
      
        1473
            fn if_ident_not_broken() {
      
        1474
                // IFLAG must not become IF+LAG.
      
        1475
                let texts = fixed_texts("      IFLAG=1\n");
      
        1476
                assert_eq!(texts[0], "IFLAG", "got: {:?}", texts);
      
        1477
            }
      
        1478
        
        1479
            #[test]
      
        1480
            fn whitespace_stripped_assignment() {
      
        1481
                // X=42 → identifier, =, integer
      
        1482
                let kinds = fixed_kinds("      X=42\n");
      
        1483
                assert_eq!(
      
        1484
                    kinds,
      
        1485
                    vec![
      
        1486
                        TokenKind::Identifier,
      
        1487
                        TokenKind::Assign,
      
        1488
                        TokenKind::IntegerLiteral,
      
        1489
                    ]
      
        1490
                );
      
        1491
            }
      
        1492
        
        1493
            #[test]
      
        1494
            fn whitespace_stripped_expression() {
      
        1495
                // A+B*C → identifier, +, identifier, *, identifier
      
        1496
                let kinds = fixed_kinds("      A+B*C\n");
      
        1497
                assert_eq!(
      
        1498
                    kinds,
      
        1499
                    vec![
      
        1500
                        TokenKind::Identifier,
      
        1501
                        TokenKind::Plus,
      
        1502
                        TokenKind::Identifier,
      
        1503
                        TokenKind::Star,
      
        1504
                        TokenKind::Identifier,
      
        1505
                    ]
      
        1506
                );
      
        1507
            }
      
        1508
        
        1509
            #[test]
      
        1510
            fn whitespace_stripped_with_parens() {
      
        1511
                // X=REAL(I) → identifier, =, identifier, (, identifier, )
      
        1512
                let kinds = fixed_kinds("      X=REAL(I)\n");
      
        1513
                assert_eq!(
      
        1514
                    kinds,
      
        1515
                    vec![
      
        1516
                        TokenKind::Identifier,
      
        1517
                        TokenKind::Assign,
      
        1518
                        TokenKind::Identifier,
      
        1519
                        TokenKind::LParen,
      
        1520
                        TokenKind::Identifier,
      
        1521
                        TokenKind::RParen,
      
        1522
                    ]
      
        1523
                );
      
        1524
            }
      
        1525
        
        1526
            #[test]
      
        1527
            fn whitespace_stripped_dot_op() {
      
        1528
                // A.AND.B → identifier, .and., identifier
      
        1529
                let kinds = fixed_kinds("      A.AND.B\n");
      
        1530
                assert_eq!(
      
        1531
                    kinds,
      
        1532
                    vec![
      
        1533
                        TokenKind::Identifier,
      
        1534
                        TokenKind::DotOp("and".into()),
      
        1535
                        TokenKind::Identifier,
      
        1536
                    ]
      
        1537
                );
      
        1538
            }
      
        1539
        
        1540
            #[test]
      
        1541
            fn whitespace_stripped_real_literal() {
      
        1542
                // X=1.0D0 → identifier, =, real
      
        1543
                let kinds = fixed_kinds("      X=1.0D0\n");
      
        1544
                assert_eq!(
      
        1545
                    kinds,
      
        1546
                    vec![
      
        1547
                        TokenKind::Identifier,
      
        1548
                        TokenKind::Assign,
      
        1549
                        TokenKind::RealLiteral,
      
        1550
                    ]
      
        1551
                );
      
        1552
            }
      
        1553
        
        1554
            #[test]
      
        1555
            fn whitespace_stripped_comparison() {
      
        1556
                // 1.EQ.2 → integer, .eq., integer
      
        1557
                let kinds = fixed_kinds("      IF(I.EQ.1)STOP\n");
      
        1558
                assert!(
      
        1559
                    kinds.contains(&TokenKind::DotOp("eq".into())),
      
        1560
                    "got: {:?}",
      
        1561
                    kinds
      
        1562
                );
      
        1563
            }
      
        1564
        
        1565
            #[test]
      
        1566
            fn whitespace_stripped_string_preserved() {
      
        1567
                // Whitespace INSIDE strings must be preserved.
      
        1568
                let kinds = fixed_kinds("      X='HELLO WORLD'\n");
      
        1569
                assert!(kinds.contains(&TokenKind::StringLiteral));
      
        1570
                let texts = fixed_texts("      X='HELLO WORLD'\n");
      
        1571
                assert!(
      
        1572
                    texts.iter().any(|t| t.contains("HELLO WORLD")),
      
        1573
                    "got: {:?}",
      
        1574
                    texts
      
        1575
                );
      
        1576
            }
      
        1577
        
        1578
            // ---- Continuation over blank lines ----
      
        1579
        
        1580
            #[test]
      
        1581
            fn continuation_over_blank_line() {
      
        1582
                let src = "      X = 1 +\n\n     +  2\n";
      
        1583
                let kinds = fixed_kinds(src);
      
        1584
                let int_count = kinds
      
        1585
                    .iter()
      
        1586
                    .filter(|k| **k == TokenKind::IntegerLiteral)
      
        1587
                    .count();
      
        1588
                assert_eq!(
      
        1589
                    int_count, 2,
      
        1590
                    "blank line should not break continuation, got: {:?}",
      
        1591
                    kinds
      
        1592
                );
      
        1593
            }
      
        1594
        
        1595
            // ---- DO/assignment ambiguity ----
      
        1596
        
        1597
            #[test]
      
        1598
            fn do_loop_with_comma() {
      
        1599
                // DO10I=1,10 → DO loop: DO + 10 + I + = + 1 + , + 10
      
        1600
                let kinds = fixed_kinds("      DO10I=1,10\n");
      
        1601
                assert!(
      
        1602
                    kinds.contains(&TokenKind::Comma),
      
        1603
                    "DO loop must have comma, got: {:?}",
      
        1604
                    kinds
      
        1605
                );
      
        1606
                let texts = fixed_texts("      DO10I=1,10\n");
      
        1607
                assert_eq!(
      
        1608
                    texts[0], "DO",
      
        1609
                    "first token should be DO keyword, got: {:?}",
      
        1610
                    texts
      
        1611
                );
      
        1612
            }
      
        1613
        
        1614
            #[test]
      
        1615
            fn do_assignment_no_comma() {
      
        1616
                // DO10I=1.10 → assignment: DO10I + = + 1.10 (no comma → not a loop)
      
        1617
                let kinds = fixed_kinds("      DO10I=1.10\n");
      
        1618
                assert!(
      
        1619
                    !kinds.contains(&TokenKind::Comma),
      
        1620
                    "assignment should have no comma, got: {:?}",
      
        1621
                    kinds
      
        1622
                );
      
        1623
                let texts = fixed_texts("      DO10I=1.10\n");
      
        1624
                assert_eq!(
      
        1625
                    texts[0], "DO10I",
      
        1626
                    "should be single identifier, got: {:?}",
      
        1627
                    texts
      
        1628
                );
      
        1629
            }
      
        1630
        
        1631
            #[test]
      
        1632
            fn do_assignment_no_comma_integer() {
      
        1633
                // DO10I=1 → assignment (no comma)
      
        1634
                let kinds = fixed_kinds("      DO10I=1\n");
      
        1635
                assert!(!kinds.contains(&TokenKind::Comma));
      
        1636
                let texts = fixed_texts("      DO10I=1\n");
      
        1637
                assert_eq!(texts[0], "DO10I");
      
        1638
            }
      
        1639
        
        1640
            // ---- BOZ in fixed-form ----
      
        1641
        
        1642
            #[test]
      
        1643
            fn boz_in_fixed_form() {
      
        1644
                let kinds = fixed_kinds("      X=B'1010'\n");
      
        1645
                assert!(kinds.contains(&TokenKind::BozLiteral), "got: {:?}", kinds);
      
        1646
            }
      
        1647
        
        1648
            #[test]
      
        1649
            fn boz_hex_in_fixed_form() {
      
        1650
                let kinds = fixed_kinds("      X=Z'FF'\n");
      
        1651
                assert!(kinds.contains(&TokenKind::BozLiteral), "got: {:?}", kinds);
      
        1652
            }
      
        1653
        
        1654
            // ---- Hollerith integration ----
      
        1655
        
        1656
            #[test]
      
        1657
            fn hollerith_in_source() {
      
        1658
                // 3HABC in a statement should produce a string literal.
      
        1659
                let kinds = fixed_kinds("      X=3HABC\n");
      
        1660
                assert!(
      
        1661
                    kinds.contains(&TokenKind::StringLiteral),
      
        1662
                    "got: {:?}",
      
        1663
                    kinds
      
        1664
                );
      
        1665
                let texts = fixed_texts("      X=3HABC\n");
      
        1666
                assert!(
      
        1667
                    texts.iter().any(|t| t.contains("ABC")),
      
        1668
                    "Hollerith content missing, got: {:?}",
      
        1669
                    texts
      
        1670
                );
      
        1671
            }
      
        1672
        
        1673
            #[test]
      
        1674
            fn hollerith_with_spaces_in_source() {
      
        1675
                // 6H HELLO preserves the space.
      
        1676
                let texts = fixed_texts("      X=6H HELLO\n");
      
        1677
                assert!(
      
        1678
                    texts.iter().any(|t| t.contains(" HELLO")),
      
        1679
                    "space lost, got: {:?}",
      
        1680
                    texts
      
        1681
                );
      
        1682
            }
      
        1683
        
        1684
            #[test]
      
        1685
            fn hollerith_zero_length() {
      
        1686
                // 0H should produce empty string literal.
      
        1687
                assert_eq!(protect_hollerith("=0H+"), "=''+");
      
        1688
            }
      
        1689
        
        1690
            // ---- String in fixed-form ----
      
        1691
        
        1692
            #[test]
      
        1693
            fn string_literal_in_fixed_form() {
      
        1694
                let kinds = fixed_kinds("      X = 'IT''S'\n");
      
        1695
                assert!(kinds.contains(&TokenKind::StringLiteral));
      
        1696
                let texts = fixed_texts("      X = 'IT''S'\n");
      
        1697
                assert!(
      
        1698
                    texts.iter().any(|t| t.contains("IT''S")),
      
        1699
                    "got: {:?}",
      
        1700
                    texts
      
        1701
                );
      
        1702
            }
      
        1703
        
        1704
            #[test]
      
        1705
            fn unterminated_string_error() {
      
        1706
                let result = tokenize_fixed("      X = 'UNTERMINATED\n", 0);
      
        1707
                assert!(result.is_err(), "should error on unterminated string");
      
        1708
            }
      
        1709
        
        1710
            #[test]
      
        1711
            fn doublecomplex_keyword() {
      
        1712
                use crate::lexer::is_keyword;
      
        1713
                assert!(is_keyword("doublecomplex").is_some());
      
        1714
                assert!(is_keyword("DOUBLECOMPLEX").is_some());
      
        1715
            }
      
        1716
        
        1717
            #[test]
      
        1718
            fn continue_keyword() {
      
        1719
                use crate::lexer::is_keyword;
      
        1720
                assert!(is_keyword("continue").is_some());
      
        1721
                assert!(is_keyword("CONTINUE").is_some());
      
        1722
            }
      
        1723
        }
      
        1724