fortsh Public

Watch 0 Fork 0 Star 0
Fortran · 56455 bytes Raw Blame History
  
        1
        ! =====================================
      
        2
        ! Lexer Module - Phase 1 of Grammar-Aware Parser
      
        3
        ! =====================================
      
        4
        ! Tokenizes shell input into meaningful units
      
        5
        ! Part of the parser rewrite project
      
        6
        !
      
        7
        ! Status: PHASE 1 - Full implementation
      
        8
        ! Author: Parser Rewrite Team
      
        9
        ! Created: 2025-11-05
      
        10
        
        11
        module lexer
      
        12
          use iso_fortran_env
      
        13
          use shell_types
      
        14
          use shell_types, only: QUOTE_NONE, QUOTE_SINGLE, QUOTE_DOUBLE
      
        15
          implicit none
      
        16
          private
      
        17
        
        18
          ! Public interface
      
        19
          public :: tokenize
      
        20
          public :: next_token
      
        21
          public :: peek_token
      
        22
          public :: is_keyword
      
        23
          public :: is_operator
      
        24
        
        25
          ! Lexer state enumeration
      
        26
          integer, parameter :: LEX_NORMAL = 1
      
        27
          integer, parameter :: LEX_IN_SINGLE_QUOTE = 2
      
        28
          integer, parameter :: LEX_IN_DOUBLE_QUOTE = 3
      
        29
          integer, parameter :: LEX_IN_WORD = 4
      
        30
          integer, parameter :: LEX_IN_OPERATOR = 5
      
        31
          integer, parameter :: LEX_IN_DOLLAR_SINGLE_QUOTE = 6
      
        32
        
        33
          ! Context tracking for [[ ]] test expressions
      
        34
          ! Inside [[ ]], && || < > are test operators, not shell operators
      
        35
          logical :: in_double_bracket_context = .false.
      
        36
        
        37
        contains
      
        38
        
        39
          ! =====================================
      
        40
          ! Character Classification Helpers
      
        41
          ! =====================================
      
        42
        
        43
          pure function is_whitespace(ch) result(is_ws)
      
        44
            character(len=1), intent(in) :: ch
      
        45
            logical :: is_ws
      
        46
            is_ws = (ch == ' ' .or. ch == char(9) .or. ch == char(13))  ! space, tab, CR
      
        47
          end function is_whitespace
      
        48
        
        49
          pure function is_operator_start(ch) result(is_op)
      
        50
            character(len=1), intent(in) :: ch
      
        51
            logical :: is_op
      
        52
            is_op = (ch == '|' .or. ch == '&' .or. ch == ';' .or. &
      
        53
                     ch == '<' .or. ch == '>' .or. ch == '(' .or. ch == ')')
      
        54
          end function is_operator_start
      
        55
        
        56
          pure function is_word_char(ch) result(is_wc)
      
        57
            character(len=1), intent(in) :: ch
      
        58
            logical :: is_wc
      
        59
            ! Word characters: anything that's not whitespace, operator, or special
      
        60
            is_wc = .not. (is_whitespace(ch) .or. is_operator_start(ch) .or. &
      
        61
                           ch == char(10) .or. ch == '#' .or. ch == '"' .or. &
      
        62
                           ch == "'" .or. ch == '\')
      
        63
          end function is_word_char
      
        64
        
        65
          ! =====================================
      
        66
          ! Operator Recognition
      
        67
          ! =====================================
      
        68
        
        69
          function is_operator(str) result(is_op)
      
        70
            character(len=*), intent(in) :: str
      
        71
            logical :: is_op
      
        72
        
        73
            select case(trim(str))
      
        74
            ! Logical operators
      
        75
            case('&&', '||')
      
        76
              is_op = .true.
      
        77
            ! Pipe and background
      
        78
            case('|', '&')
      
        79
              is_op = .true.
      
        80
            ! Separators
      
        81
            case(';', ';;')
      
        82
              is_op = .true.
      
        83
            ! Redirections
      
        84
            case('<', '>', '>>', '<>', '>&', '<&', '>|', '<<', '<<-', '<<<')
      
        85
              is_op = .true.
      
        86
            ! Grouping
      
        87
            case('(', ')', '{', '}')
      
        88
              is_op = .true.
      
        89
            case default
      
        90
              is_op = .false.
      
        91
            end select
      
        92
          end function is_operator
      
        93
        
        94
          ! =====================================
      
        95
          ! is_keyword - Check if word is a shell keyword
      
        96
          ! =====================================
      
        97
          function is_keyword(word) result(is_kw)
      
        98
            character(len=*), intent(in) :: word
      
        99
            logical :: is_kw
      
        100
        
        101
            select case(trim(word))
      
        102
            ! Control flow keywords
      
        103
            case('if', 'then', 'else', 'elif', 'fi')
      
        104
              is_kw = .true.
      
        105
            case('for', 'in', 'do', 'done')
      
        106
              is_kw = .true.
      
        107
            case('while', 'until')
      
        108
              is_kw = .true.
      
        109
            case('case', 'esac')
      
        110
              is_kw = .true.
      
        111
            ! Other keywords
      
        112
            case('function', 'select', 'time', 'coproc')
      
        113
              is_kw = .true.
      
        114
            case('{', '}')
      
        115
              is_kw = .true.
      
        116
            case('!') ! Negation operator (context-dependent)
      
        117
              is_kw = .true.
      
        118
            case default
      
        119
              is_kw = .false.
      
        120
            end select
      
        121
          end function is_keyword
      
        122
        
        123
          ! =====================================
      
        124
          ! tokenize - Main entry point for lexical analysis
      
        125
          ! =====================================
      
        126
          subroutine tokenize(input, tokens, num_tokens)
      
        127
            character(len=*), intent(in) :: input
      
        128
            type(token_t), intent(out) :: tokens(:)
      
        129
            integer, intent(out) :: num_tokens
      
        130
        
        131
            integer :: pos, input_len, state, token_start
      
        132
            character(len=1) :: ch, next_ch
      
        133
            character(len=MAX_TOKEN_LEN) :: current_token
      
        134
            integer :: token_len, paren_depth
      
        135
            logical :: in_escape, continuing_word, token_has_quoted_part
      
        136
        
        137
            num_tokens = 0
      
        138
            pos = 1
      
        139
            input_len = len_trim(input)
      
        140
            state = LEX_NORMAL
      
        141
            token_start = 1
      
        142
            current_token = ''
      
        143
            token_len = 0
      
        144
            in_escape = .false.
      
        145
            paren_depth = 0
      
        146
            continuing_word = .false.
      
        147
            token_has_quoted_part = .false.
      
        148
            in_double_bracket_context = .false.
      
        149
        
        150
            do while (pos <= input_len .and. num_tokens < size(tokens))
      
        151
              ch = input(pos:pos)
      
        152
        
        153
              ! Get next character for lookahead (if available)
      
        154
              if (pos < input_len) then
      
        155
                next_ch = input(pos+1:pos+1)
      
        156
              else
      
        157
                next_ch = ' '
      
        158
              end if
      
        159
        
        160
              select case(state)
      
        161
        
        162
              ! ============ NORMAL STATE ============
      
        163
              case(LEX_NORMAL)
      
        164
        
        165
                ! Skip whitespace
      
        166
                if (is_whitespace(ch)) then
      
        167
                  pos = pos + 1
      
        168
                  cycle
      
        169
                end if
      
        170
        
        171
                ! Newline - significant token
      
        172
                if (ch == char(10)) then
      
        173
                  call add_token(tokens, num_tokens, TOKEN_NEWLINE, char(10), pos, pos, .false.)
      
        174
                  pos = pos + 1
      
        175
                  cycle
      
        176
                end if
      
        177
        
        178
                ! Comments: # to end of line
      
        179
                if (ch == '#') then
      
        180
                  ! Skip until newline or end of input
      
        181
                  do while (pos <= input_len .and. input(pos:pos) /= char(10))
      
        182
                    pos = pos + 1
      
        183
                  end do
      
        184
                  cycle
      
        185
                end if
      
        186
        
        187
                ! Single quote: literal string
      
        188
                if (ch == "'") then
      
        189
                  state = LEX_IN_SINGLE_QUOTE
      
        190
                  ! Only reset token if we're NOT continuing a word
      
        191
                  if (.not. continuing_word) then
      
        192
                    token_start = pos
      
        193
                    token_len = 0
      
        194
                    current_token = ''
      
        195
                  end if
      
        196
                  pos = pos + 1
      
        197
                  cycle
      
        198
                end if
      
        199
        
        200
                ! Double quote: expandable string
      
        201
                if (ch == '"') then
      
        202
                  state = LEX_IN_DOUBLE_QUOTE
      
        203
                  ! Only reset token if we're NOT continuing a word
      
        204
                  if (.not. continuing_word) then
      
        205
                    token_start = pos
      
        206
                    token_len = 0
      
        207
                    current_token = ''
      
        208
                  end if
      
        209
                  pos = pos + 1
      
        210
                  cycle
      
        211
                end if
      
        212
        
        213
                ! Backslash escape
      
        214
                if (ch == '\') then
      
        215
                  if (pos < input_len) then
      
        216
                    ! Start a word token with the escaped character
      
        217
                    state = LEX_IN_WORD
      
        218
                    token_start = pos
      
        219
                    in_escape = .true.  ! Mark this token as escaped
      
        220
                    ! For characters that would trigger expansion ($, `, etc), preserve backslash
      
        221
                    ! so the expansion phase knows not to expand them
      
        222
                    if (next_ch == '$' .or. next_ch == '`') then
      
        223
                      token_len = 2
      
        224
                      current_token(1:2) = '\' // next_ch
      
        225
                    else
      
        226
                      token_len = 1
      
        227
                      current_token = next_ch
      
        228
                    end if
      
        229
                    pos = pos + 2  ! Skip backslash and next char
      
        230
                    cycle
      
        231
                  end if
      
        232
                end if
      
        233
        
        234
                ! Multi-character operators
      
        235
                ! Inside [[ ]], treat & | < > ( ) as word characters (test operators)
      
        236
                if (in_double_bracket_context .and. &
      
        237
                    (ch == '&' .or. ch == '|' .or. ch == '<' .or. ch == '>' .or. &
      
        238
                     ch == '(' .or. ch == ')')) then
      
        239
                  state = LEX_IN_WORD
      
        240
                  token_start = pos
      
        241
                  token_len = 1
      
        242
                  current_token = ch
      
        243
                  pos = pos + 1
      
        244
                  cycle
      
        245
                end if
      
        246
                if (is_operator_start(ch)) then
      
        247
                  state = LEX_IN_OPERATOR
      
        248
                  token_start = pos
      
        249
                  token_len = 1
      
        250
                  current_token = ch
      
        251
                  pos = pos + 1
      
        252
                  cycle
      
        253
                end if
      
        254
        
        255
                ! Check for $'...' ANSI-C quoting
      
        256
                if (ch == '$' .and. pos < input_len .and. next_ch == "'") then
      
        257
                  state = LEX_IN_DOLLAR_SINGLE_QUOTE
      
        258
                  token_start = pos
      
        259
                  token_len = 0
      
        260
                  current_token = ''
      
        261
                  continuing_word = .false.
      
        262
                  token_has_quoted_part = .true.
      
        263
                  pos = pos + 2  ! Skip $'
      
        264
                  cycle
      
        265
                end if
      
        266
        
        267
                ! Check for $( or $(( - these should be kept in word tokens for expansion
      
        268
                if (ch == '$' .and. pos < input_len .and. next_ch == '(') then
      
        269
                  ! This is command substitution or arithmetic - include in word
      
        270
                  state = LEX_IN_WORD
      
        271
                  token_start = pos
      
        272
                  token_len = 2
      
        273
                  current_token = '$('
      
        274
                  paren_depth = 1  ! Track that we're inside $(
      
        275
                  pos = pos + 2
      
        276
                  cycle
      
        277
                end if
      
        278
        
        279
                ! Check for ${ - parameter expansion should be kept in word tokens
      
        280
                if (ch == '$' .and. pos < input_len .and. next_ch == '{') then
      
        281
                  ! This is parameter expansion - include in word
      
        282
                  state = LEX_IN_WORD
      
        283
                  token_start = pos
      
        284
                  token_len = 2
      
        285
                  current_token = '${'
      
        286
                  paren_depth = 1  ! Track that we're inside ${
      
        287
                  pos = pos + 2
      
        288
                  cycle
      
        289
                end if
      
        290
        
        291
                ! Check for $' - ANSI-C quoting
      
        292
                if (ch == '$' .and. pos < input_len .and. next_ch == "'") then
      
        293
                  state = LEX_IN_WORD
      
        294
                  token_start = pos
      
        295
                  token_len = 0
      
        296
                  current_token = ''
      
        297
                  token_has_quoted_part = .true.
      
        298
                  pos = pos + 2  ! Skip $'
      
        299
                  call process_ansi_c_quote(input, pos, input_len, current_token, token_len)
      
        300
                  cycle
      
        301
                end if
      
        302
        
        303
                ! Assignment detection: VAR=value
      
        304
                ! (This is complex - we'll detect it as WORD and let parser handle it)
      
        305
        
        306
                ! Start of word
      
        307
                state = LEX_IN_WORD
      
        308
                token_start = pos
      
        309
                token_len = 1
      
        310
                current_token = ch
      
        311
                pos = pos + 1
      
        312
        
        313
              ! ============ SINGLE QUOTE STATE ============
      
        314
              case(LEX_IN_SINGLE_QUOTE)
      
        315
                if (ch == "'") then
      
        316
                  ! End of single-quoted string
      
        317
                  ! Add sentinel char(3) to mark end of single-quoted literal
      
        318
                  if (token_len < MAX_TOKEN_LEN) then
      
        319
                    token_len = token_len + 1
      
        320
                    current_token(token_len:token_len) = char(3)
      
        321
                  end if
      
        322
                  pos = pos + 1  ! Move past closing quote
      
        323
                  ! Check if next character continues the word (adjacent quote, word char, or escape)
      
        324
                  if (pos <= input_len) then
      
        325
                    next_ch = input(pos:pos)
      
        326
                    if (next_ch == "'" .or. next_ch == '"') then
      
        327
                      ! Adjacent quote follows - continue building this token
      
        328
                      state = LEX_IN_WORD
      
        329
                      continuing_word = .false.
      
        330
                      cycle
      
        331
                    else if (next_ch == '\') then
      
        332
                      ! Backslash escape follows - continue building this token
      
        333
                      state = LEX_IN_WORD
      
        334
                      continuing_word = .false.
      
        335
                      cycle
      
        336
                    else if (is_word_char(next_ch)) then
      
        337
                      ! Word character follows - continue building this token
      
        338
                      state = LEX_IN_WORD
      
        339
                      continuing_word = .false.
      
        340
                      cycle
      
        341
                    end if
      
        342
                  end if
      
        343
                  ! No adjacent quote or word char - finalize token
      
        344
                  if (continuing_word) then
      
        345
                    ! We're building a multi-part word - go back to LEX_IN_WORD
      
        346
                    state = LEX_IN_WORD
      
        347
                    continuing_word = .false.
      
        348
                  else
      
        349
                    ! Standalone quoted string - emit token
      
        350
                    call add_token(tokens, num_tokens, TOKEN_WORD, current_token(1:token_len), &
      
        351
                                   token_start, pos-1, .true., quote_type=QUOTE_SINGLE)
      
        352
                    state = LEX_NORMAL
      
        353
                  end if
      
        354
                else
      
        355
                  ! Add character to token (everything is literal)
      
        356
                  if (token_len < MAX_TOKEN_LEN) then
      
        357
                    token_len = token_len + 1
      
        358
                    current_token(token_len:token_len) = ch
      
        359
                  end if
      
        360
                  pos = pos + 1
      
        361
                end if
      
        362
        
        363
              ! ============ DOLLAR SINGLE QUOTE STATE ($'...') ============
      
        364
              case(LEX_IN_DOLLAR_SINGLE_QUOTE)
      
        365
                if (ch == "'") then
      
        366
                  ! End of $'...' string — add sentinels to mark as quoted
      
        367
                  if (token_len < MAX_TOKEN_LEN) then
      
        368
                    token_len = token_len + 1
      
        369
                    current_token(token_len:token_len) = char(3)  ! end sentinel
      
        370
                  end if
      
        371
                  pos = pos + 1
      
        372
                  ! Check if next character continues the word
      
        373
                  if (pos <= input_len) then
      
        374
                    next_ch = input(pos:pos)
      
        375
                    if (next_ch == "'" .or. next_ch == '"' .or. next_ch == '\' .or. &
      
        376
                        is_word_char(next_ch)) then
      
        377
                      state = LEX_IN_WORD
      
        378
                      continuing_word = .false.
      
        379
                      cycle
      
        380
                    end if
      
        381
                  end if
      
        382
                  if (continuing_word) then
      
        383
                    state = LEX_IN_WORD
      
        384
                    continuing_word = .false.
      
        385
                  else
      
        386
                    call add_token(tokens, num_tokens, TOKEN_WORD, current_token(1:token_len), &
      
        387
                                   token_start, pos-1, .true., quote_type=QUOTE_SINGLE)
      
        388
                    state = LEX_NORMAL
      
        389
                  end if
      
        390
                else if (ch == '\' .and. pos < input_len) then
      
        391
                  ! Escape sequences in $'...'
      
        392
                  next_ch = input(pos+1:pos+1)
      
        393
                  if (token_len < MAX_TOKEN_LEN) then
      
        394
                    select case(next_ch)
      
        395
                    case('a')
      
        396
                      token_len = token_len + 1
      
        397
                      current_token(token_len:token_len) = char(7)   ! bell
      
        398
                    case('b')
      
        399
                      token_len = token_len + 1
      
        400
                      current_token(token_len:token_len) = char(8)   ! backspace
      
        401
                    case('e', 'E')
      
        402
                      token_len = token_len + 1
      
        403
                      current_token(token_len:token_len) = char(27)  ! escape
      
        404
                    case('f')
      
        405
                      token_len = token_len + 1
      
        406
                      current_token(token_len:token_len) = char(12)  ! form feed
      
        407
                    case('n')
      
        408
                      token_len = token_len + 1
      
        409
                      current_token(token_len:token_len) = char(10)  ! newline
      
        410
                    case('r')
      
        411
                      token_len = token_len + 1
      
        412
                      current_token(token_len:token_len) = char(13)  ! carriage return
      
        413
                    case('t')
      
        414
                      token_len = token_len + 1
      
        415
                      current_token(token_len:token_len) = char(9)   ! tab
      
        416
                    case('v')
      
        417
                      token_len = token_len + 1
      
        418
                      current_token(token_len:token_len) = char(11)  ! vertical tab
      
        419
                    case('\')
      
        420
                      token_len = token_len + 1
      
        421
                      current_token(token_len:token_len) = '\'
      
        422
                    case("'")
      
        423
                      token_len = token_len + 1
      
        424
                      current_token(token_len:token_len) = "'"
      
        425
                    case('"')
      
        426
                      token_len = token_len + 1
      
        427
                      current_token(token_len:token_len) = '"'
      
        428
                    case('x')
      
        429
                      ! Hex escape: \xHH (up to 2 hex digits)
      
        430
                      block
      
        431
                        integer :: hval, hdigits
      
        432
                        character :: hch
      
        433
                        hval = 0; hdigits = 0
      
        434
                        pos = pos + 2  ! skip \x
      
        435
                        do while (pos <= input_len .and. hdigits < 2)
      
        436
                          hch = input(pos:pos)
      
        437
                          if (hch >= '0' .and. hch <= '9') then
      
        438
                            hval = hval * 16 + (ichar(hch) - ichar('0'))
      
        439
                          else if (hch >= 'a' .and. hch <= 'f') then
      
        440
                            hval = hval * 16 + (ichar(hch) - ichar('a') + 10)
      
        441
                          else if (hch >= 'A' .and. hch <= 'F') then
      
        442
                            hval = hval * 16 + (ichar(hch) - ichar('A') + 10)
      
        443
                          else
      
        444
                            exit
      
        445
                          end if
      
        446
                          pos = pos + 1
      
        447
                          hdigits = hdigits + 1
      
        448
                        end do
      
        449
                        if (hdigits > 0 .and. hval <= 255) then
      
        450
                          token_len = token_len + 1
      
        451
                          current_token(token_len:token_len) = char(hval)
      
        452
                        end if
      
        453
                        cycle  ! pos already advanced past hex digits
      
        454
                      end block
      
        455
                    case('0', '1', '2', '3', '4', '5', '6', '7')
      
        456
                      ! Octal escape: \nnn (up to 3 octal digits)
      
        457
                      block
      
        458
                        integer :: oval, odigits
      
        459
                        character :: och
      
        460
                        oval = 0; odigits = 0
      
        461
                        pos = pos + 1  ! skip backslash only
      
        462
                        do while (pos <= input_len .and. odigits < 3)
      
        463
                          och = input(pos:pos)
      
        464
                          if (och >= '0' .and. och <= '7') then
      
        465
                            oval = oval * 8 + (ichar(och) - ichar('0'))
      
        466
                          else
      
        467
                            exit
      
        468
                          end if
      
        469
                          pos = pos + 1
      
        470
                          odigits = odigits + 1
      
        471
                        end do
      
        472
                        if (odigits > 0 .and. oval <= 255) then
      
        473
                          token_len = token_len + 1
      
        474
                          current_token(token_len:token_len) = char(oval)
      
        475
                        end if
      
        476
                        cycle  ! pos already advanced past octal digits
      
        477
                      end block
      
        478
                    case default
      
        479
                      ! Unknown escape — keep both chars
      
        480
                      token_len = token_len + 1
      
        481
                      current_token(token_len:token_len) = ch
      
        482
                      token_len = token_len + 1
      
        483
                      current_token(token_len:token_len) = next_ch
      
        484
                    end select
      
        485
                  end if
      
        486
                  pos = pos + 2
      
        487
                else
      
        488
                  ! Regular character — add literally
      
        489
                  if (token_len < MAX_TOKEN_LEN) then
      
        490
                    token_len = token_len + 1
      
        491
                    current_token(token_len:token_len) = ch
      
        492
                  end if
      
        493
                  pos = pos + 1
      
        494
                end if
      
        495
        
        496
              ! ============ DOUBLE QUOTE STATE ============
      
        497
              case(LEX_IN_DOUBLE_QUOTE)
      
        498
                if (ch == '\' .and. pos < input_len) then
      
        499
                  ! Backslash escape in double quotes (only for $, `, ", \, newline)
      
        500
                  if (next_ch == '$' .or. next_ch == '`') then
      
        501
                    ! For \$ and \` - keep BOTH chars so expansion can see the escape
      
        502
                    if (token_len < MAX_TOKEN_LEN - 1) then
      
        503
                      token_len = token_len + 1
      
        504
                      current_token(token_len:token_len) = ch
      
        505
                      token_len = token_len + 1
      
        506
                      current_token(token_len:token_len) = next_ch
      
        507
                    end if
      
        508
                    pos = pos + 2
      
        509
                  else if (next_ch == '"' .or. next_ch == '\' .or. next_ch == char(10)) then
      
        510
                    ! For \" and \\ and \newline - add only escaped character
      
        511
                    if (token_len < MAX_TOKEN_LEN) then
      
        512
                      token_len = token_len + 1
      
        513
                      current_token(token_len:token_len) = next_ch
      
        514
                    end if
      
        515
                    pos = pos + 2
      
        516
                  else
      
        517
                    ! Backslash is literal
      
        518
                    if (token_len < MAX_TOKEN_LEN) then
      
        519
                      token_len = token_len + 1
      
        520
                      current_token(token_len:token_len) = ch
      
        521
                    end if
      
        522
                    pos = pos + 1
      
        523
                  end if
      
        524
                else if (ch == '$' .and. pos < input_len .and. next_ch == '(') then
      
        525
                  ! Command substitution inside double quotes - need to find matching )
      
        526
                  ! while ignoring quotes inside $()
      
        527
                  if (token_len < MAX_TOKEN_LEN - 1) then
      
        528
                    token_len = token_len + 1
      
        529
                    current_token(token_len:token_len) = '$'
      
        530
                    token_len = token_len + 1
      
        531
                    current_token(token_len:token_len) = '('
      
        532
                  end if
      
        533
                  pos = pos + 2
      
        534
                  paren_depth = 1
      
        535
                  ! Scan to find matching ), respecting nested parens and quotes
      
        536
                  do while (pos <= input_len .and. paren_depth > 0)
      
        537
                    ch = input(pos:pos)
      
        538
                    if (ch == '"') then
      
        539
                      ! Skip double-quoted string inside command substitution
      
        540
                      if (token_len < MAX_TOKEN_LEN) then
      
        541
                        token_len = token_len + 1
      
        542
                        current_token(token_len:token_len) = ch
      
        543
                      end if
      
        544
                      pos = pos + 1
      
        545
                      do while (pos <= input_len)
      
        546
                        ch = input(pos:pos)
      
        547
                        if (ch == '\' .and. pos < input_len) then
      
        548
                          ! Skip escaped char
      
        549
                          if (token_len < MAX_TOKEN_LEN - 1) then
      
        550
                            token_len = token_len + 1
      
        551
                            current_token(token_len:token_len) = ch
      
        552
                            token_len = token_len + 1
      
        553
                            current_token(token_len:token_len) = input(pos+1:pos+1)
      
        554
                          end if
      
        555
                          pos = pos + 2
      
        556
                        else if (ch == '"') then
      
        557
                          if (token_len < MAX_TOKEN_LEN) then
      
        558
                            token_len = token_len + 1
      
        559
                            current_token(token_len:token_len) = ch
      
        560
                          end if
      
        561
                          pos = pos + 1
      
        562
                          exit
      
        563
                        else
      
        564
                          if (token_len < MAX_TOKEN_LEN) then
      
        565
                            token_len = token_len + 1
      
        566
                            current_token(token_len:token_len) = ch
      
        567
                          end if
      
        568
                          pos = pos + 1
      
        569
                        end if
      
        570
                      end do
      
        571
                    else if (ch == "'") then
      
        572
                      ! Skip single-quoted string
      
        573
                      if (token_len < MAX_TOKEN_LEN) then
      
        574
                        token_len = token_len + 1
      
        575
                        current_token(token_len:token_len) = ch
      
        576
                      end if
      
        577
                      pos = pos + 1
      
        578
                      do while (pos <= input_len .and. input(pos:pos) /= "'")
      
        579
                        if (token_len < MAX_TOKEN_LEN) then
      
        580
                          token_len = token_len + 1
      
        581
                          current_token(token_len:token_len) = input(pos:pos)
      
        582
                        end if
      
        583
                        pos = pos + 1
      
        584
                      end do
      
        585
                      if (pos <= input_len) then
      
        586
                        if (token_len < MAX_TOKEN_LEN) then
      
        587
                          token_len = token_len + 1
      
        588
                          current_token(token_len:token_len) = "'"
      
        589
                        end if
      
        590
                        pos = pos + 1
      
        591
                      end if
      
        592
                    else if (ch == '(') then
      
        593
                      paren_depth = paren_depth + 1
      
        594
                      if (token_len < MAX_TOKEN_LEN) then
      
        595
                        token_len = token_len + 1
      
        596
                        current_token(token_len:token_len) = ch
      
        597
                      end if
      
        598
                      pos = pos + 1
      
        599
                    else if (ch == ')') then
      
        600
                      paren_depth = paren_depth - 1
      
        601
                      if (token_len < MAX_TOKEN_LEN) then
      
        602
                        token_len = token_len + 1
      
        603
                        current_token(token_len:token_len) = ch
      
        604
                      end if
      
        605
                      pos = pos + 1
      
        606
                    else
      
        607
                      if (token_len < MAX_TOKEN_LEN) then
      
        608
                        token_len = token_len + 1
      
        609
                        current_token(token_len:token_len) = ch
      
        610
                      end if
      
        611
                      pos = pos + 1
      
        612
                    end if
      
        613
                  end do
      
        614
                else if (ch == '"') then
      
        615
                  ! End of double-quoted string
      
        616
                  pos = pos + 1  ! Move past closing quote
      
        617
                  ! Check if next character continues the word (adjacent quote, word char, or escape)
      
        618
                  if (pos <= input_len) then
      
        619
                    next_ch = input(pos:pos)
      
        620
                    if (next_ch == "'" .or. next_ch == '"') then
      
        621
                      ! Adjacent quote follows - continue building this token
      
        622
                      ! Add sentinel to mark quote boundary (so expansion knows where quoted part ends)
      
        623
                      if (token_len < MAX_TOKEN_LEN) then
      
        624
                        token_len = token_len + 1
      
        625
                        current_token(token_len:token_len) = char(1)  ! ASCII SOH as sentinel
      
        626
                      end if
      
        627
                      state = LEX_IN_WORD
      
        628
                      continuing_word = .false.
      
        629
                      cycle
      
        630
                    else if (next_ch == '\') then
      
        631
                      ! Backslash escape follows - continue building this token
      
        632
                      ! Add sentinel to mark quote boundary
      
        633
                      if (token_len < MAX_TOKEN_LEN) then
      
        634
                        token_len = token_len + 1
      
        635
                        current_token(token_len:token_len) = char(1)  ! ASCII SOH as sentinel
      
        636
                      end if
      
        637
                      state = LEX_IN_WORD
      
        638
                      continuing_word = .false.
      
        639
                      cycle
      
        640
                    else if (is_word_char(next_ch)) then
      
        641
                      ! Word character follows - continue building this token
      
        642
                      ! Add sentinel to mark quote boundary (so expansion knows where quoted part ends)
      
        643
                      if (token_len < MAX_TOKEN_LEN) then
      
        644
                        token_len = token_len + 1
      
        645
                        current_token(token_len:token_len) = char(1)  ! ASCII SOH as sentinel
      
        646
                      end if
      
        647
                      state = LEX_IN_WORD
      
        648
                      continuing_word = .false.
      
        649
                      cycle
      
        650
                    end if
      
        651
                  end if
      
        652
                  ! No adjacent quote or word char - finalize token
      
        653
                  if (continuing_word) then
      
        654
                    ! We're building a multi-part word - go back to LEX_IN_WORD
      
        655
                    state = LEX_IN_WORD
      
        656
                    continuing_word = .false.
      
        657
                  else
      
        658
                    ! Standalone quoted string - emit token
      
        659
                    call add_token(tokens, num_tokens, TOKEN_WORD, current_token(1:token_len), &
      
        660
                                   token_start, pos-1, .true., quote_type=QUOTE_DOUBLE)
      
        661
                    state = LEX_NORMAL
      
        662
                  end if
      
        663
                else
      
        664
                  ! Add character to token
      
        665
                  if (token_len < MAX_TOKEN_LEN) then
      
        666
                    token_len = token_len + 1
      
        667
                    current_token(token_len:token_len) = ch
      
        668
                  end if
      
        669
                  pos = pos + 1
      
        670
                end if
      
        671
        
        672
              ! ============ WORD STATE ============
      
        673
              case(LEX_IN_WORD)
      
        674
                ! Check if we're inside $() - if so, keep EVERYTHING including spaces
      
        675
                ! IMPORTANT: Also check paren_depth > 0 to ensure we're actually inside the $()
      
        676
                if (index(current_token(1:token_len), '$(') > 0 .and. paren_depth > 0) then
      
        677
                  ! Inside command substitution - track paren depth
      
        678
                  if (ch == '(') then
      
        679
                    paren_depth = paren_depth + 1
      
        680
                    if (token_len < MAX_TOKEN_LEN) then
      
        681
                      token_len = token_len + 1
      
        682
                      current_token(token_len:token_len) = ch
      
        683
                    end if
      
        684
                    pos = pos + 1
      
        685
                  else if (ch == ')') then
      
        686
                    paren_depth = paren_depth - 1
      
        687
                    if (token_len < MAX_TOKEN_LEN) then
      
        688
                      token_len = token_len + 1
      
        689
                      current_token(token_len:token_len) = ch
      
        690
                    end if
      
        691
                    pos = pos + 1
      
        692
                    ! If paren_depth hits 0, we closed the $(...)
      
        693
                    if (paren_depth == 0) then
      
        694
                      ! End of command substitution - finish token
      
        695
                      call add_word_or_keyword(tokens, num_tokens, current_token(1:token_len), &
      
        696
                                              token_start, pos-1, token_has_quoted_part, in_escape)
      
        697
                      state = LEX_NORMAL
      
        698
                      in_escape = .false.
      
        699
                      token_has_quoted_part = .false.
      
        700
                    end if
      
        701
                  else
      
        702
                    ! Inside $() - keep EVERYTHING including spaces
      
        703
                    if (token_len < MAX_TOKEN_LEN) then
      
        704
                      token_len = token_len + 1
      
        705
                      current_token(token_len:token_len) = ch
      
        706
                    end if
      
        707
                    pos = pos + 1
      
        708
                  end if
      
        709
                ! Check if we're inside ${ - if so, keep EVERYTHING until closing }
      
        710
                ! IMPORTANT: Also check paren_depth > 0 to ensure we're actually inside the ${}
      
        711
                else if (index(current_token(1:token_len), '${') > 0 .and. paren_depth > 0) then
      
        712
                  ! Inside parameter expansion - track brace depth
      
        713
                  if (ch == '{') then
      
        714
                    paren_depth = paren_depth + 1
      
        715
                    if (token_len < MAX_TOKEN_LEN) then
      
        716
                      token_len = token_len + 1
      
        717
                      current_token(token_len:token_len) = ch
      
        718
                    end if
      
        719
                    pos = pos + 1
      
        720
                  else if (ch == '}') then
      
        721
                    paren_depth = paren_depth - 1
      
        722
                    if (token_len < MAX_TOKEN_LEN) then
      
        723
                      token_len = token_len + 1
      
        724
                      current_token(token_len:token_len) = ch
      
        725
                    end if
      
        726
                    pos = pos + 1
      
        727
                    ! If paren_depth hits 0, we closed the ${...}
      
        728
                    if (paren_depth == 0) then
      
        729
                      ! Check if next character continues the word (e.g., ${A}${B})
      
        730
                      ! Don't end token if next char is $ or other word character
      
        731
                      if (pos <= input_len) then
      
        732
                        next_ch = input(pos:pos)
      
        733
                        ! If next character starts a new expansion or is alphanumeric, continue token
      
        734
                        if (next_ch == '$' .or. next_ch == '{' .or. &
      
        735
                            (next_ch >= 'a' .and. next_ch <= 'z') .or. &
      
        736
                            (next_ch >= 'A' .and. next_ch <= 'Z') .or. &
      
        737
                            (next_ch >= '0' .and. next_ch <= '9') .or. &
      
        738
                            next_ch == '_' .or. next_ch == '-' .or. next_ch == '.') then
      
        739
                          ! Continue building the same token - don't end it yet
      
        740
                          ! state stays LEX_WORD
      
        741
                        else
      
        742
                          ! End of parameter expansion - finish token
      
        743
                          call add_word_or_keyword(tokens, num_tokens, current_token(1:token_len), &
      
        744
                                                  token_start, pos-1, token_has_quoted_part, in_escape)
      
        745
                          state = LEX_NORMAL
      
        746
                          in_escape = .false.
      
        747
                          token_has_quoted_part = .false.
      
        748
                        end if
      
        749
                      else
      
        750
                        ! End of input - finish token
      
        751
                        call add_word_or_keyword(tokens, num_tokens, current_token(1:token_len), &
      
        752
                                                token_start, pos-1, token_has_quoted_part, in_escape)
      
        753
                        state = LEX_NORMAL
      
        754
                        in_escape = .false.
      
        755
                        token_has_quoted_part = .false.
      
        756
                      end if
      
        757
                    end if
      
        758
                  else
      
        759
                    ! Inside ${ - keep EVERYTHING including spaces
      
        760
                    if (token_len < MAX_TOKEN_LEN) then
      
        761
                      token_len = token_len + 1
      
        762
                      current_token(token_len:token_len) = ch
      
        763
                    end if
      
        764
                    pos = pos + 1
      
        765
                  end if
      
        766
                else if (ch == '\' .and. pos < input_len) then
      
        767
                  ! Backslash escape in word
      
        768
                  ! For expansion-triggering chars, preserve backslash
      
        769
                  if (next_ch == '$' .or. next_ch == '`') then
      
        770
                    if (token_len < MAX_TOKEN_LEN - 1) then
      
        771
                      token_len = token_len + 1
      
        772
                      current_token(token_len:token_len) = '\'
      
        773
                      token_len = token_len + 1
      
        774
                      current_token(token_len:token_len) = next_ch
      
        775
                    end if
      
        776
                  else
      
        777
                    if (token_len < MAX_TOKEN_LEN) then
      
        778
                      token_len = token_len + 1
      
        779
                      current_token(token_len:token_len) = next_ch
      
        780
                    end if
      
        781
                  end if
      
        782
                  pos = pos + 2
      
        783
                else if (ch == "'" .or. ch == '"') then
      
        784
                  ! Check for $' (ANSI-C quoting) when last char in token is $
      
        785
                  if (ch == "'" .and. token_len >= 1 .and. &
      
        786
                      current_token(token_len:token_len) == '$') then
      
        787
                    ! Remove trailing $ and process ANSI-C quoted string
      
        788
                    token_len = token_len - 1
      
        789
                    token_has_quoted_part = .true.
      
        790
                    pos = pos + 1  ! Skip opening '
      
        791
                    call process_ansi_c_quote(input, pos, input_len, current_token, token_len)
      
        792
                    cycle
      
        793
                  end if
      
        794
                  ! Quote in middle of word - continue building the same token
      
        795
                  ! Mark that we're continuing a word so quote handler doesn't reset the token
      
        796
                  continuing_word = .true.
      
        797
                  token_has_quoted_part = .true.  ! Track that this word contains quoted content
      
        798
                  ! Transition to appropriate quote state
      
        799
                  if (ch == "'") then
      
        800
                    ! Check for $' (ANSI-C quoting) — the $ is already in the token
      
        801
                    if (token_len >= 1 .and. current_token(token_len:token_len) == '$') then
      
        802
                      token_len = token_len - 1  ! Remove the $ from token
      
        803
                      state = LEX_IN_DOLLAR_SINGLE_QUOTE
      
        804
                      pos = pos + 1  ! Skip the opening quote
      
        805
                      cycle
      
        806
                    end if
      
        807
                    ! Add sentinel char(2) to mark start of single-quoted literal (no expansion)
      
        808
                    if (token_len < MAX_TOKEN_LEN) then
      
        809
                      token_len = token_len + 1
      
        810
                      current_token(token_len:token_len) = char(2)
      
        811
                    end if
      
        812
                    state = LEX_IN_SINGLE_QUOTE
      
        813
                  else
      
        814
                    state = LEX_IN_DOUBLE_QUOTE
      
        815
                  end if
      
        816
                  pos = pos + 1  ! Skip the opening quote
      
        817
                else if (ch == '#') then
      
        818
                  ! # is normally comment, but in $# it's part of variable
      
        819
                  ! Keep it if current token is just $
      
        820
                  if (token_len == 1 .and. current_token(1:1) == '$') then
      
        821
                    token_len = token_len + 1
      
        822
                    current_token(token_len:token_len) = ch
      
        823
                    pos = pos + 1
      
        824
                  else
      
        825
                    ! End word, let # start a comment
      
        826
                    call add_word_or_keyword(tokens, num_tokens, current_token(1:token_len), &
      
        827
                                            token_start, pos-1, token_has_quoted_part, in_escape)
      
        828
                    state = LEX_NORMAL
      
        829
                    in_escape = .false.
      
        830
                    token_has_quoted_part = .false.
      
        831
                  end if
      
        832
                else if (ch == '$' .and. pos < input_len .and. next_ch == '(') then
      
        833
                  ! $( for command/arithmetic substitution - keep in word
      
        834
                  if (token_len < MAX_TOKEN_LEN - 1) then
      
        835
                    token_len = token_len + 1
      
        836
                    current_token(token_len:token_len) = ch
      
        837
                    token_len = token_len + 1
      
        838
                    current_token(token_len:token_len) = next_ch
      
        839
                    paren_depth = 1  ! Track that we're inside $(
      
        840
                  end if
      
        841
                  pos = pos + 2
      
        842
                else if (ch == '$' .and. pos < input_len .and. next_ch == '{') then
      
        843
                  ! ${ for parameter expansion - keep in word
      
        844
                  if (token_len < MAX_TOKEN_LEN - 1) then
      
        845
                    token_len = token_len + 1
      
        846
                    current_token(token_len:token_len) = ch
      
        847
                    token_len = token_len + 1
      
        848
                    current_token(token_len:token_len) = next_ch
      
        849
                    paren_depth = 1  ! Track that we're inside ${
      
        850
                  end if
      
        851
                  pos = pos + 2
      
        852
                else if ((ch >= '0' .and. ch <= '9') .or. ch == '+' .or. ch == '-' .or. &
      
        853
                         ch == '*' .or. ch == '/' .or. ch == '%') then
      
        854
                  ! Keep these chars in word (for variables and arithmetic)
      
        855
                  if (token_len < MAX_TOKEN_LEN) then
      
        856
                    token_len = token_len + 1
      
        857
                    current_token(token_len:token_len) = ch
      
        858
                  end if
      
        859
                  pos = pos + 1
      
        860
                else if (ch == '(' .or. ch == ')') then
      
        861
                  ! Inside [[ ]], keep parens as part of word (regex patterns, grouping)
      
        862
                  if (in_double_bracket_context) then
      
        863
                    if (token_len < MAX_TOKEN_LEN) then
      
        864
                      token_len = token_len + 1
      
        865
                      current_token(token_len:token_len) = ch
      
        866
                    end if
      
        867
                    pos = pos + 1
      
        868
                  ! Parentheses: Keep ONLY if inside $(( or $(
      
        869
                  ! Check if current token ends with $ (for x=$(cmd) or just $(cmd))
      
        870
                  ! NOTE: Only for '(' - ')' after $ (like $$) should end the word
      
        871
                  else if (ch == '(' .and. token_len >= 1 .and. current_token(token_len:token_len) == '$') then
      
        872
                    ! Just added $, now seeing ( - this is $( substitution - keep both
      
        873
                    if (token_len < MAX_TOKEN_LEN) then
      
        874
                      token_len = token_len + 1
      
        875
                      current_token(token_len:token_len) = ch
      
        876
                    end if
      
        877
                    pos = pos + 1
      
        878
                  else if (token_len >= 2 .and. index(current_token(1:token_len), '$(') > 0) then
      
        879
                    ! Already inside $(...) - keep parens
      
        880
                    if (token_len < MAX_TOKEN_LEN) then
      
        881
                      token_len = token_len + 1
      
        882
                      current_token(token_len:token_len) = ch
      
        883
                    end if
      
        884
                    pos = pos + 1
      
        885
                  else if (ch == '(' .and. token_len >= 1 .and. &
      
        886
                           current_token(token_len:token_len) == '=') then
      
        887
                    ! Array assignment: VAR=(...) - include the parenthesized content
      
        888
                    ! Scan for matching ) respecting quotes and nested parens
      
        889
                    if (token_len < MAX_TOKEN_LEN) then
      
        890
                      token_len = token_len + 1
      
        891
                      current_token(token_len:token_len) = '('
      
        892
                    end if
      
        893
                    pos = pos + 1
      
        894
                    paren_depth = 1
      
        895
                    do while (pos <= input_len .and. paren_depth > 0)
      
        896
                      ch = input(pos:pos)
      
        897
                      if (ch == '"') then
      
        898
                        ! Skip double-quoted string
      
        899
                        if (token_len < MAX_TOKEN_LEN) then
      
        900
                          token_len = token_len + 1
      
        901
                          current_token(token_len:token_len) = ch
      
        902
                        end if
      
        903
                        pos = pos + 1
      
        904
                        do while (pos <= input_len .and. input(pos:pos) /= '"')
      
        905
                          if (input(pos:pos) == '\' .and. pos < input_len) then
      
        906
                            if (token_len < MAX_TOKEN_LEN - 1) then
      
        907
                              token_len = token_len + 1
      
        908
                              current_token(token_len:token_len) = input(pos:pos)
      
        909
                              token_len = token_len + 1
      
        910
                              current_token(token_len:token_len) = input(pos+1:pos+1)
      
        911
                            end if
      
        912
                            pos = pos + 2
      
        913
                          else
      
        914
                            if (token_len < MAX_TOKEN_LEN) then
      
        915
                              token_len = token_len + 1
      
        916
                              current_token(token_len:token_len) = input(pos:pos)
      
        917
                            end if
      
        918
                            pos = pos + 1
      
        919
                          end if
      
        920
                        end do
      
        921
                        if (pos <= input_len) then
      
        922
                          if (token_len < MAX_TOKEN_LEN) then
      
        923
                            token_len = token_len + 1
      
        924
                            current_token(token_len:token_len) = '"'
      
        925
                          end if
      
        926
                          pos = pos + 1
      
        927
                        end if
      
        928
                      else if (ch == "'") then
      
        929
                        ! Skip single-quoted string
      
        930
                        if (token_len < MAX_TOKEN_LEN) then
      
        931
                          token_len = token_len + 1
      
        932
                          current_token(token_len:token_len) = ch
      
        933
                        end if
      
        934
                        pos = pos + 1
      
        935
                        do while (pos <= input_len .and. input(pos:pos) /= "'")
      
        936
                          if (token_len < MAX_TOKEN_LEN) then
      
        937
                            token_len = token_len + 1
      
        938
                            current_token(token_len:token_len) = input(pos:pos)
      
        939
                          end if
      
        940
                          pos = pos + 1
      
        941
                        end do
      
        942
                        if (pos <= input_len) then
      
        943
                          if (token_len < MAX_TOKEN_LEN) then
      
        944
                            token_len = token_len + 1
      
        945
                            current_token(token_len:token_len) = "'"
      
        946
                          end if
      
        947
                          pos = pos + 1
      
        948
                        end if
      
        949
                      else if (ch == '(') then
      
        950
                        paren_depth = paren_depth + 1
      
        951
                        if (token_len < MAX_TOKEN_LEN) then
      
        952
                          token_len = token_len + 1
      
        953
                          current_token(token_len:token_len) = ch
      
        954
                        end if
      
        955
                        pos = pos + 1
      
        956
                      else if (ch == ')') then
      
        957
                        paren_depth = paren_depth - 1
      
        958
                        if (token_len < MAX_TOKEN_LEN) then
      
        959
                          token_len = token_len + 1
      
        960
                          current_token(token_len:token_len) = ch
      
        961
                        end if
      
        962
                        pos = pos + 1
      
        963
                      else
      
        964
                        if (token_len < MAX_TOKEN_LEN) then
      
        965
                          token_len = token_len + 1
      
        966
                          current_token(token_len:token_len) = ch
      
        967
                        end if
      
        968
                        pos = pos + 1
      
        969
                      end if
      
        970
                    end do
      
        971
                    ! Token complete with closing )
      
        972
                    call add_word_or_keyword(tokens, num_tokens, current_token(1:token_len), &
      
        973
                                            token_start, pos-1, token_has_quoted_part, in_escape)
      
        974
                    state = LEX_NORMAL
      
        975
                    in_escape = .false.
      
        976
                    token_has_quoted_part = .false.
      
        977
                  else
      
        978
                    ! Not in substitution - end word, let paren be operator
      
        979
                    call add_word_or_keyword(tokens, num_tokens, current_token(1:token_len), &
      
        980
                                            token_start, pos-1, token_has_quoted_part, in_escape)
      
        981
                    state = LEX_NORMAL
      
        982
                    in_escape = .false.
      
        983
                    token_has_quoted_part = .false.
      
        984
                  end if
      
        985
                else if (ch == '{' .or. ch == '}') then
      
        986
                  ! Braces: Keep in word for brace expansion (e.g., {1,2,3} or file{a,b}.txt)
      
        987
                  ! They're only command group operators when surrounded by whitespace
      
        988
                  if (token_len < MAX_TOKEN_LEN) then
      
        989
                    token_len = token_len + 1
      
        990
                    current_token(token_len:token_len) = ch
      
        991
                  end if
      
        992
                  pos = pos + 1
      
        993
                else if (in_double_bracket_context .and. &
      
        994
                         (ch == '&' .or. ch == '|' .or. ch == '<' .or. ch == '>' .or. &
      
        995
                          ch == '(' .or. ch == ')')) then
      
        996
                  ! Inside [[ ]], these are test operators, not shell operators
      
        997
                  if (token_len < MAX_TOKEN_LEN) then
      
        998
                    token_len = token_len + 1
      
        999
                    current_token(token_len:token_len) = ch
      
        1000
                  end if
      
        1001
                  pos = pos + 1
      
        1002
                else if (is_word_char(ch)) then
      
        1003
                  ! Continue word
      
        1004
                  if (token_len < MAX_TOKEN_LEN) then
      
        1005
                    token_len = token_len + 1
      
        1006
                    current_token(token_len:token_len) = ch
      
        1007
                  end if
      
        1008
                  pos = pos + 1
      
        1009
                else
      
        1010
                  ! End of word
      
        1011
                  call add_word_or_keyword(tokens, num_tokens, current_token(1:token_len), &
      
        1012
                                          token_start, pos-1, token_has_quoted_part, in_escape)
      
        1013
                  state = LEX_NORMAL
      
        1014
                  in_escape = .false.
      
        1015
                  token_has_quoted_part = .false.
      
        1016
                  ! Don't increment pos, let NORMAL state handle this character
      
        1017
                end if
      
        1018
        
        1019
              ! ============ OPERATOR STATE ============
      
        1020
              case(LEX_IN_OPERATOR)
      
        1021
                ! Try to match multi-character operators
      
        1022
                if (token_len == 1) then
      
        1023
                  select case(current_token(1:1))
      
        1024
                  case('&')
      
        1025
                    if (ch == '&') then
      
        1026
                      current_token(2:2) = ch
      
        1027
                      token_len = 2
      
        1028
                      call add_token(tokens, num_tokens, TOKEN_OPERATOR, '&&', token_start, pos, .false.)
      
        1029
                      state = LEX_NORMAL
      
        1030
                      pos = pos + 1
      
        1031
                    else
      
        1032
                      call add_token(tokens, num_tokens, TOKEN_OPERATOR, '&', token_start, pos-1, .false.)
      
        1033
                      state = LEX_NORMAL
      
        1034
                    end if
      
        1035
                  case('|')
      
        1036
                    if (ch == '|') then
      
        1037
                      current_token(2:2) = ch
      
        1038
                      token_len = 2
      
        1039
                      call add_token(tokens, num_tokens, TOKEN_OPERATOR, '||', token_start, pos, .false.)
      
        1040
                      state = LEX_NORMAL
      
        1041
                      pos = pos + 1
      
        1042
                    else
      
        1043
                      call add_token(tokens, num_tokens, TOKEN_OPERATOR, '|', token_start, pos-1, .false.)
      
        1044
                      state = LEX_NORMAL
      
        1045
                    end if
      
        1046
                  case('>')
      
        1047
                    if (ch == '>') then
      
        1048
                      current_token(2:2) = ch
      
        1049
                      token_len = 2
      
        1050
                      call add_token(tokens, num_tokens, TOKEN_REDIRECT, '>>', token_start, pos, .false.)
      
        1051
                      state = LEX_NORMAL
      
        1052
                      pos = pos + 1
      
        1053
                    else if (ch == '&') then
      
        1054
                      current_token(2:2) = ch
      
        1055
                      token_len = 2
      
        1056
                      call add_token(tokens, num_tokens, TOKEN_REDIRECT, '>&', token_start, pos, .false.)
      
        1057
                      state = LEX_NORMAL
      
        1058
                      pos = pos + 1
      
        1059
                    else if (ch == '|') then
      
        1060
                      current_token(2:2) = ch
      
        1061
                      token_len = 2
      
        1062
                      call add_token(tokens, num_tokens, TOKEN_REDIRECT, '>|', token_start, pos, .false.)
      
        1063
                      state = LEX_NORMAL
      
        1064
                      pos = pos + 1
      
        1065
                    else
      
        1066
                      call add_token(tokens, num_tokens, TOKEN_REDIRECT, '>', token_start, pos-1, .false.)
      
        1067
                      state = LEX_NORMAL
      
        1068
                    end if
      
        1069
                  case('<')
      
        1070
                    if (ch == '<') then
      
        1071
                      ! Could be << or <<< or <<-
      
        1072
                      if (pos < input_len .and. next_ch == '<') then
      
        1073
                        current_token(2:3) = '<<'
      
        1074
                        token_len = 3
      
        1075
                        call add_token(tokens, num_tokens, TOKEN_REDIRECT, '<<<', token_start, pos+1, .false.)
      
        1076
                        state = LEX_NORMAL
      
        1077
                        pos = pos + 2
      
        1078
                      else if (pos < input_len .and. next_ch == '-') then
      
        1079
                        current_token(2:3) = '<-'
      
        1080
                        token_len = 3
      
        1081
                        call add_token(tokens, num_tokens, TOKEN_REDIRECT, '<<-', token_start, pos+1, .false.)
      
        1082
                        state = LEX_NORMAL
      
        1083
                        pos = pos + 2
      
        1084
                      else
      
        1085
                        current_token(2:2) = ch
      
        1086
                        token_len = 2
      
        1087
                        call add_token(tokens, num_tokens, TOKEN_REDIRECT, '<<', token_start, pos, .false.)
      
        1088
                        state = LEX_NORMAL
      
        1089
                        pos = pos + 1
      
        1090
                      end if
      
        1091
                    else if (ch == '>') then
      
        1092
                      current_token(2:2) = ch
      
        1093
                      token_len = 2
      
        1094
                      call add_token(tokens, num_tokens, TOKEN_REDIRECT, '<>', token_start, pos, .false.)
      
        1095
                      state = LEX_NORMAL
      
        1096
                      pos = pos + 1
      
        1097
                    else if (ch == '&') then
      
        1098
                      current_token(2:2) = ch
      
        1099
                      token_len = 2
      
        1100
                      call add_token(tokens, num_tokens, TOKEN_REDIRECT, '<&', token_start, pos, .false.)
      
        1101
                      state = LEX_NORMAL
      
        1102
                      pos = pos + 1
      
        1103
                    else
      
        1104
                      call add_token(tokens, num_tokens, TOKEN_REDIRECT, '<', token_start, pos-1, .false.)
      
        1105
                      state = LEX_NORMAL
      
        1106
                    end if
      
        1107
                  case(';')
      
        1108
                    if (ch == ';') then
      
        1109
                      current_token(2:2) = ch
      
        1110
                      token_len = 2
      
        1111
                      call add_token(tokens, num_tokens, TOKEN_OPERATOR, ';;', token_start, pos, .false.)
      
        1112
                      state = LEX_NORMAL
      
        1113
                      pos = pos + 1
      
        1114
                    else
      
        1115
                      call add_token(tokens, num_tokens, TOKEN_OPERATOR, ';', token_start, pos-1, .false.)
      
        1116
                      state = LEX_NORMAL
      
        1117
                    end if
      
        1118
                  case('(', ')')
      
        1119
                    call add_token(tokens, num_tokens, TOKEN_OPERATOR, current_token(1:1), &
      
        1120
                                  token_start, pos-1, .false.)
      
        1121
                    state = LEX_NORMAL
      
        1122
                  case default
      
        1123
                    ! Single-character operator
      
        1124
                    call add_token(tokens, num_tokens, TOKEN_OPERATOR, current_token(1:1), &
      
        1125
                                  token_start, pos-1, .false.)
      
        1126
                    state = LEX_NORMAL
      
        1127
                  end select
      
        1128
                else
      
        1129
                  ! Multi-character operator complete
      
        1130
                  state = LEX_NORMAL
      
        1131
                end if
      
        1132
        
        1133
              end select
      
        1134
            end do
      
        1135
        
        1136
            ! Flush any remaining token
      
        1137
            if (state == LEX_IN_WORD .and. token_len > 0) then
      
        1138
              call add_word_or_keyword(tokens, num_tokens, current_token(1:token_len), &
      
        1139
                                      token_start, input_len, token_has_quoted_part, in_escape)
      
        1140
            else if (state == LEX_IN_SINGLE_QUOTE .or. state == LEX_IN_DOUBLE_QUOTE) then
      
        1141
              ! Unterminated quote - add as word with error marker
      
        1142
              if (state == LEX_IN_SINGLE_QUOTE) then
      
        1143
                call add_token(tokens, num_tokens, TOKEN_WORD, current_token(1:token_len), &
      
        1144
                            token_start, input_len, .true., quote_type=QUOTE_SINGLE)
      
        1145
              else
      
        1146
                call add_token(tokens, num_tokens, TOKEN_WORD, current_token(1:token_len), &
      
        1147
                            token_start, input_len, .true., quote_type=QUOTE_DOUBLE)
      
        1148
              end if
      
        1149
            else if (state == LEX_IN_OPERATOR .and. token_len > 0) then
      
        1150
              ! Flush operator
      
        1151
              call add_token(tokens, num_tokens, TOKEN_OPERATOR, current_token(1:token_len), &
      
        1152
                            token_start, input_len, .false.)
      
        1153
            end if
      
        1154
        
        1155
            ! Add EOF token
      
        1156
            call add_token(tokens, num_tokens, TOKEN_EOF, '', input_len+1, input_len+1, .false.)
      
        1157
        
        1158
          end subroutine tokenize
      
        1159
        
        1160
          ! =====================================
      
        1161
          ! Helper: Add token to array
      
        1162
          ! =====================================
      
        1163
          subroutine add_token(tokens, num_tokens, tok_type, value, start_pos, end_pos, quoted, escaped, quote_type)
      
        1164
            use shell_types, only: QUOTE_NONE
      
        1165
            type(token_t), intent(inout) :: tokens(:)
      
        1166
            integer, intent(inout) :: num_tokens
      
        1167
            integer, intent(in) :: tok_type, start_pos, end_pos
      
        1168
            character(len=*), intent(in) :: value
      
        1169
            logical, intent(in) :: quoted
      
        1170
            logical, intent(in), optional :: escaped
      
        1171
            integer, intent(in), optional :: quote_type
      
        1172
        
        1173
            if (num_tokens < size(tokens)) then
      
        1174
              num_tokens = num_tokens + 1
      
        1175
              tokens(num_tokens)%token_type = tok_type
      
        1176
              tokens(num_tokens)%value = value
      
        1177
              tokens(num_tokens)%value_length = len(value)  ! Store actual content length
      
        1178
              tokens(num_tokens)%start_pos = start_pos
      
        1179
              tokens(num_tokens)%end_pos = end_pos
      
        1180
              tokens(num_tokens)%quoted = quoted
      
        1181
              if (present(escaped)) then
      
        1182
                tokens(num_tokens)%escaped = escaped
      
        1183
              else
      
        1184
                tokens(num_tokens)%escaped = .false.
      
        1185
              end if
      
        1186
              if (present(quote_type)) then
      
        1187
                tokens(num_tokens)%quote_type = quote_type
      
        1188
              else
      
        1189
                tokens(num_tokens)%quote_type = QUOTE_NONE
      
        1190
              end if
      
        1191
            end if
      
        1192
          end subroutine add_token
      
        1193
        
        1194
          ! =====================================
      
        1195
          ! Helper: Add word or keyword token
      
        1196
          ! =====================================
      
        1197
          subroutine add_word_or_keyword(tokens, num_tokens, value, start_pos, end_pos, quoted, escaped)
      
        1198
            type(token_t), intent(inout) :: tokens(:)
      
        1199
            integer, intent(inout) :: num_tokens
      
        1200
            character(len=*), intent(in) :: value
      
        1201
            integer, intent(in) :: start_pos, end_pos
      
        1202
            logical, intent(in) :: quoted
      
        1203
            logical, intent(in), optional :: escaped
      
        1204
        
        1205
            integer :: tok_type
      
        1206
        
        1207
            ! Quoted strings are always words, never keywords
      
        1208
            if (quoted) then
      
        1209
              tok_type = TOKEN_WORD
      
        1210
            else if (is_keyword(value)) then
      
        1211
              tok_type = TOKEN_KEYWORD
      
        1212
            else
      
        1213
              tok_type = TOKEN_WORD
      
        1214
            end if
      
        1215
        
        1216
            ! Track [[ ]] context: inside test expressions, operators become words
      
        1217
            if (.not. quoted) then
      
        1218
              if (trim(value) == '[[') in_double_bracket_context = .true.
      
        1219
              if (trim(value) == ']]') in_double_bracket_context = .false.
      
        1220
            end if
      
        1221
        
        1222
            call add_token(tokens, num_tokens, tok_type, value, start_pos, end_pos, quoted, escaped)
      
        1223
          end subroutine add_word_or_keyword
      
        1224
        
        1225
          ! =====================================
      
        1226
          ! next_token - Get next token from stream
      
        1227
          ! =====================================
      
        1228
          function next_token(tokens, pos) result(tok)
      
        1229
            type(token_t), intent(in) :: tokens(:)
      
        1230
            integer, intent(inout) :: pos
      
        1231
            type(token_t) :: tok
      
        1232
        
        1233
            if (pos <= size(tokens) .and. pos > 0) then
      
        1234
              tok = tokens(pos)
      
        1235
              pos = pos + 1
      
        1236
            else
      
        1237
              ! Return EOF token
      
        1238
              tok%token_type = TOKEN_EOF
      
        1239
              tok%value = ''
      
        1240
              tok%start_pos = 0
      
        1241
              tok%end_pos = 0
      
        1242
              tok%quoted = .false.
      
        1243
            end if
      
        1244
          end function next_token
      
        1245
        
        1246
          ! =====================================
      
        1247
          ! peek_token - Look ahead without consuming
      
        1248
          ! =====================================
      
        1249
          function peek_token(tokens, pos) result(tok)
      
        1250
            type(token_t), intent(in) :: tokens(:)
      
        1251
            integer, intent(in) :: pos
      
        1252
            type(token_t) :: tok
      
        1253
        
        1254
            if (pos <= size(tokens) .and. pos > 0) then
      
        1255
              tok = tokens(pos)
      
        1256
            else
      
        1257
              ! Return EOF token
      
        1258
              tok%token_type = TOKEN_EOF
      
        1259
              tok%value = ''
      
        1260
              tok%start_pos = 0
      
        1261
              tok%end_pos = 0
      
        1262
              tok%quoted = .false.
      
        1263
            end if
      
        1264
          end function peek_token
      
        1265
        
        1266
          ! =====================================
      
        1267
          ! process_ansi_c_quote - Handle $'...' ANSI-C quoting
      
        1268
          ! Reads characters from input starting at pos (after $'),
      
        1269
          ! processes escape sequences, appends to current_token.
      
        1270
          ! Wraps output in char(2)/char(3) sentinels to prevent expansion.
      
        1271
          ! =====================================
      
        1272
          subroutine process_ansi_c_quote(input, pos, input_len, current_token, token_len)
      
        1273
            character(len=*), intent(in) :: input
      
        1274
            integer, intent(inout) :: pos
      
        1275
            integer, intent(in) :: input_len
      
        1276
            character(len=*), intent(inout) :: current_token
      
        1277
            integer, intent(inout) :: token_len
      
        1278
        
        1279
            character :: ch, esc_ch
      
        1280
            integer :: oct_val, hex_val, n_digits, i
      
        1281
        
        1282
            ! Add sentinel to mark start of quoted content (no expansion)
      
        1283
            if (token_len < MAX_TOKEN_LEN) then
      
        1284
              token_len = token_len + 1
      
        1285
              current_token(token_len:token_len) = char(2)
      
        1286
            end if
      
        1287
        
        1288
            do while (pos <= input_len)
      
        1289
              ch = input(pos:pos)
      
        1290
        
        1291
              if (ch == "'") then
      
        1292
                ! Closing quote
      
        1293
                pos = pos + 1
      
        1294
                ! Add sentinel to mark end of quoted content
      
        1295
                if (token_len < MAX_TOKEN_LEN) then
      
        1296
                  token_len = token_len + 1
      
        1297
                  current_token(token_len:token_len) = char(3)
      
        1298
                end if
      
        1299
                return
      
        1300
              end if
      
        1301
        
        1302
              if (ch == '\' .and. pos < input_len) then
      
        1303
                ! Escape sequence
      
        1304
                esc_ch = input(pos+1:pos+1)
      
        1305
                select case(esc_ch)
      
        1306
                case('a')   ! Alert (bell)
      
        1307
                  if (token_len < MAX_TOKEN_LEN) then
      
        1308
                    token_len = token_len + 1
      
        1309
                    current_token(token_len:token_len) = char(7)
      
        1310
                  end if
      
        1311
                  pos = pos + 2
      
        1312
                case('b')   ! Backspace
      
        1313
                  if (token_len < MAX_TOKEN_LEN) then
      
        1314
                    token_len = token_len + 1
      
        1315
                    current_token(token_len:token_len) = char(8)
      
        1316
                  end if
      
        1317
                  pos = pos + 2
      
        1318
                case('e', 'E')  ! Escape
      
        1319
                  if (token_len < MAX_TOKEN_LEN) then
      
        1320
                    token_len = token_len + 1
      
        1321
                    current_token(token_len:token_len) = char(27)
      
        1322
                  end if
      
        1323
                  pos = pos + 2
      
        1324
                case('f')   ! Form feed
      
        1325
                  if (token_len < MAX_TOKEN_LEN) then
      
        1326
                    token_len = token_len + 1
      
        1327
                    current_token(token_len:token_len) = char(12)
      
        1328
                  end if
      
        1329
                  pos = pos + 2
      
        1330
                case('n')   ! Newline
      
        1331
                  if (token_len < MAX_TOKEN_LEN) then
      
        1332
                    token_len = token_len + 1
      
        1333
                    current_token(token_len:token_len) = char(10)
      
        1334
                  end if
      
        1335
                  pos = pos + 2
      
        1336
                case('r')   ! Carriage return
      
        1337
                  if (token_len < MAX_TOKEN_LEN) then
      
        1338
                    token_len = token_len + 1
      
        1339
                    current_token(token_len:token_len) = char(13)
      
        1340
                  end if
      
        1341
                  pos = pos + 2
      
        1342
                case('t')   ! Horizontal tab
      
        1343
                  if (token_len < MAX_TOKEN_LEN) then
      
        1344
                    token_len = token_len + 1
      
        1345
                    current_token(token_len:token_len) = char(9)
      
        1346
                  end if
      
        1347
                  pos = pos + 2
      
        1348
                case('v')   ! Vertical tab
      
        1349
                  if (token_len < MAX_TOKEN_LEN) then
      
        1350
                    token_len = token_len + 1
      
        1351
                    current_token(token_len:token_len) = char(11)
      
        1352
                  end if
      
        1353
                  pos = pos + 2
      
        1354
                case('\')   ! Literal backslash
      
        1355
                  if (token_len < MAX_TOKEN_LEN) then
      
        1356
                    token_len = token_len + 1
      
        1357
                    current_token(token_len:token_len) = '\'
      
        1358
                  end if
      
        1359
                  pos = pos + 2
      
        1360
                case("'")   ! Literal single quote
      
        1361
                  if (token_len < MAX_TOKEN_LEN) then
      
        1362
                    token_len = token_len + 1
      
        1363
                    current_token(token_len:token_len) = "'"
      
        1364
                  end if
      
        1365
                  pos = pos + 2
      
        1366
                case('"')   ! Literal double quote
      
        1367
                  if (token_len < MAX_TOKEN_LEN) then
      
        1368
                    token_len = token_len + 1
      
        1369
                    current_token(token_len:token_len) = '"'
      
        1370
                  end if
      
        1371
                  pos = pos + 2
      
        1372
                case('0', '1', '2', '3', '4', '5', '6', '7')
      
        1373
                  ! Octal: \nnn (up to 3 digits)
      
        1374
                  oct_val = 0
      
        1375
                  n_digits = 0
      
        1376
                  pos = pos + 1  ! skip backslash
      
        1377
                  do while (pos <= input_len .and. n_digits < 3)
      
        1378
                    ch = input(pos:pos)
      
        1379
                    if (ch >= '0' .and. ch <= '7') then
      
        1380
                      oct_val = oct_val * 8 + (ichar(ch) - ichar('0'))
      
        1381
                      pos = pos + 1
      
        1382
                      n_digits = n_digits + 1
      
        1383
                    else
      
        1384
                      exit
      
        1385
                    end if
      
        1386
                  end do
      
        1387
                  if (oct_val > 0 .and. oct_val <= 127 .and. token_len < MAX_TOKEN_LEN) then
      
        1388
                    token_len = token_len + 1
      
        1389
                    current_token(token_len:token_len) = char(oct_val)
      
        1390
                  end if
      
        1391
                case('x')
      
        1392
                  ! Hex: \xHH (up to 2 digits)
      
        1393
                  hex_val = 0
      
        1394
                  n_digits = 0
      
        1395
                  pos = pos + 2  ! skip \x
      
        1396
                  do while (pos <= input_len .and. n_digits < 2)
      
        1397
                    ch = input(pos:pos)
      
        1398
                    if (ch >= '0' .and. ch <= '9') then
      
        1399
                      hex_val = hex_val * 16 + (ichar(ch) - ichar('0'))
      
        1400
                      pos = pos + 1
      
        1401
                      n_digits = n_digits + 1
      
        1402
                    else if (ch >= 'a' .and. ch <= 'f') then
      
        1403
                      hex_val = hex_val * 16 + (ichar(ch) - ichar('a') + 10)
      
        1404
                      pos = pos + 1
      
        1405
                      n_digits = n_digits + 1
      
        1406
                    else if (ch >= 'A' .and. ch <= 'F') then
      
        1407
                      hex_val = hex_val * 16 + (ichar(ch) - ichar('A') + 10)
      
        1408
                      pos = pos + 1
      
        1409
                      n_digits = n_digits + 1
      
        1410
                    else
      
        1411
                      exit
      
        1412
                    end if
      
        1413
                  end do
      
        1414
                  if (hex_val > 0 .and. hex_val <= 127 .and. token_len < MAX_TOKEN_LEN) then
      
        1415
                    token_len = token_len + 1
      
        1416
                    current_token(token_len:token_len) = char(hex_val)
      
        1417
                  end if
      
        1418
                case default
      
        1419
                  ! Unknown escape: include backslash and character literally
      
        1420
                  if (token_len < MAX_TOKEN_LEN - 1) then
      
        1421
                    token_len = token_len + 1
      
        1422
                    current_token(token_len:token_len) = '\'
      
        1423
                    token_len = token_len + 1
      
        1424
                    current_token(token_len:token_len) = esc_ch
      
        1425
                  end if
      
        1426
                  pos = pos + 2
      
        1427
                end select
      
        1428
              else
      
        1429
                ! Regular character
      
        1430
                if (token_len < MAX_TOKEN_LEN) then
      
        1431
                  token_len = token_len + 1
      
        1432
                  current_token(token_len:token_len) = ch
      
        1433
                end if
      
        1434
                pos = pos + 1
      
        1435
              end if
      
        1436
            end do
      
        1437
        
        1438
            ! Unterminated $'...' - add sentinel anyway
      
        1439
            if (token_len < MAX_TOKEN_LEN) then
      
        1440
              token_len = token_len + 1
      
        1441
              current_token(token_len:token_len) = char(3)
      
        1442
            end if
      
        1443
          end subroutine process_ansi_c_quote
      
        1444
        
        1445
        end module lexer
      
        1446