fortbite Public

Watch 0 Fork 0 Star 0

Fortran · 14883 bytes Raw Blame History

  
        1
        !> Lexical analyzer module for FORTBITE
      
        2
        !>
      
        3
        !> Tokenizes mathematical expressions into a stream of tokens for parsing.
      
        4
        !> Handles numbers, identifiers, operators, and special syntax like :: and :=
      
        5
        module fortbite_lexer_m
      
        6
            use fortbite_types_m, only: token_t, TOKEN_EOF, TOKEN_NUMBER, TOKEN_IDENTIFIER, &
      
        7
                                       TOKEN_OPERATOR, TOKEN_LPAREN, TOKEN_RPAREN, &
      
        8
                                       TOKEN_LBRACKET, TOKEN_RBRACKET, TOKEN_SEMICOLON, &
      
        9
                                       TOKEN_COMMA, TOKEN_ASSIGN, TOKEN_PRECISION
      
        10
            use iso_fortran_env, only: real64
      
        11
            implicit none
      
        12
            private
      
        13
            
        14
            public :: tokenize, free_tokens, print_tokens
      
        15
            
        16
            ! Lexer state
      
        17
            type :: lexer_state_t
      
        18
                character(len=:), allocatable :: input
      
        19
                integer :: position
      
        20
                integer :: length
      
        21
                character :: current_char
      
        22
            end type lexer_state_t
      
        23
            
        24
        contains
      
        25
        
        26
            !> Tokenize a mathematical expression
      
        27
            function tokenize(expression) result(tokens)
      
        28
                character(len=*), intent(in) :: expression
      
        29
                type(token_t), allocatable :: tokens(:)
      
        30
                
        31
                type(lexer_state_t) :: lexer
      
        32
                type(token_t) :: current_token
      
        33
                integer :: token_count, capacity
      
        34
                
        35
                ! Initialize lexer state
      
        36
                lexer%input = trim(adjustl(expression))
      
        37
                lexer%length = len_trim(lexer%input)
      
        38
                lexer%position = 1
      
        39
                
        40
                if (lexer%length == 0) then
      
        41
                    allocate(tokens(1))
      
        42
                    tokens(1)%token_type = TOKEN_EOF
      
        43
                    return
      
        44
                end if
      
        45
                
        46
                lexer%current_char = lexer%input(1:1)
      
        47
                
        48
                ! Dynamic token array
      
        49
                capacity = max(16, lexer%length / 2)
      
        50
                allocate(tokens(capacity))
      
        51
                token_count = 0
      
        52
                
        53
                ! Main tokenization loop
      
        54
                do
      
        55
                    call skip_whitespace(lexer)
      
        56
                    if (lexer%current_char == char(0) .or. lexer%position > lexer%length) exit
      
        57
                    
        58
                    current_token = next_token(lexer)
      
        59
                    
        60
                    ! Resize array if needed
      
        61
                    if (token_count >= capacity) then
      
        62
                        capacity = capacity * 2
      
        63
                        call resize_token_array(tokens, capacity)
      
        64
                    end if
      
        65
                    
        66
                    token_count = token_count + 1
      
        67
                    tokens(token_count) = current_token
      
        68
                    
        69
                    if (current_token%token_type == TOKEN_EOF) exit
      
        70
                end do
      
        71
                
        72
                ! Add EOF token if not already present
      
        73
                if (token_count == 0 .or. tokens(token_count)%token_type /= TOKEN_EOF) then
      
        74
                    if (token_count >= capacity) then
      
        75
                        capacity = capacity + 1
      
        76
                        call resize_token_array(tokens, capacity)
      
        77
                    end if
      
        78
                    token_count = token_count + 1
      
        79
                    tokens(token_count)%token_type = TOKEN_EOF
      
        80
                    tokens(token_count)%position = lexer%position
      
        81
                end if
      
        82
                
        83
                ! Trim array to actual size
      
        84
                call resize_token_array(tokens, token_count)
      
        85
            end function tokenize
      
        86
            
        87
            !> Get the next token from the input stream
      
        88
            function next_token(lexer) result(token)
      
        89
                type(lexer_state_t), intent(inout) :: lexer
      
        90
                type(token_t) :: token
      
        91
                
        92
                call skip_whitespace(lexer)
      
        93
                
        94
                if (lexer%position > lexer%length) then
      
        95
                    token%token_type = TOKEN_EOF
      
        96
                    token%position = lexer%position
      
        97
                    return
      
        98
                end if
      
        99
                
        100
                token%position = lexer%position
      
        101
                
        102
                select case (lexer%current_char)
      
        103
                case ('0':'9', '.')
      
        104
                    token = read_number(lexer)
      
        105
                case ('a':'z', 'A':'Z', '_')
      
        106
                    token = read_identifier(lexer)
      
        107
                case ('(')
      
        108
                    token%token_type = TOKEN_LPAREN
      
        109
                    token%text = '('
      
        110
                    call advance(lexer)
      
        111
                case (')')
      
        112
                    token%token_type = TOKEN_RPAREN
      
        113
                    token%text = ')'
      
        114
                    call advance(lexer)
      
        115
                case ('[')
      
        116
                    token%token_type = TOKEN_LBRACKET
      
        117
                    token%text = '['
      
        118
                    call advance(lexer)
      
        119
                case (']')
      
        120
                    token%token_type = TOKEN_RBRACKET
      
        121
                    token%text = ']'
      
        122
                    call advance(lexer)
      
        123
                case (';')
      
        124
                    token%token_type = TOKEN_SEMICOLON
      
        125
                    token%text = ';'
      
        126
                    call advance(lexer)
      
        127
                case (',')
      
        128
                    token%token_type = TOKEN_COMMA
      
        129
                    token%text = ','
      
        130
                    call advance(lexer)
      
        131
                case (':')
      
        132
                    token = read_colon_operator(lexer)
      
        133
                case ('+', '-', '/', '^', '=', '<', '>', '!')
      
        134
                    token = read_operator(lexer)
      
        135
                case ('*')
      
        136
                    token = read_power_or_multiply(lexer)
      
        137
                case default
      
        138
                    ! Unknown character - create error token
      
        139
                    token%token_type = TOKEN_EOF  ! We'll use this as error for now
      
        140
                    token%text = lexer%current_char
      
        141
                    call advance(lexer)
      
        142
                end select
      
        143
            end function next_token
      
        144
            
        145
            !> Read a numeric token (integer or real)
      
        146
            function read_number(lexer) result(token)
      
        147
                type(lexer_state_t), intent(inout) :: lexer
      
        148
                type(token_t) :: token
      
        149
                
        150
                integer :: start_pos
      
        151
                logical :: has_dot, has_exp
      
        152
                
        153
                start_pos = lexer%position
      
        154
                has_dot = .false.
      
        155
                has_exp = .false.
      
        156
                
        157
                ! Handle leading decimal point
      
        158
                if (lexer%current_char == '.') then
      
        159
                    has_dot = .true.
      
        160
                    call advance(lexer)
      
        161
                    if (.not. is_digit(lexer%current_char)) then
      
        162
                        ! Just a dot, not a number
      
        163
                        token%token_type = TOKEN_OPERATOR
      
        164
                        token%text = '.'
      
        165
                        return
      
        166
                    end if
      
        167
                end if
      
        168
                
        169
                ! Read digits
      
        170
                do while (is_digit(lexer%current_char))
      
        171
                    call advance(lexer)
      
        172
                end do
      
        173
                
        174
                ! Handle decimal point
      
        175
                if (lexer%current_char == '.' .and. .not. has_dot) then
      
        176
                    has_dot = .true.
      
        177
                    call advance(lexer)
      
        178
                    do while (is_digit(lexer%current_char))
      
        179
                        call advance(lexer)
      
        180
                    end do
      
        181
                end if
      
        182
                
        183
                ! Handle scientific notation
      
        184
                if ((lexer%current_char == 'e' .or. lexer%current_char == 'E') .and. .not. has_exp) then
      
        185
                    has_exp = .true.
      
        186
                    call advance(lexer)
      
        187
                    if (lexer%current_char == '+' .or. lexer%current_char == '-') then
      
        188
                        call advance(lexer)
      
        189
                    end if
      
        190
                    do while (is_digit(lexer%current_char))
      
        191
                        call advance(lexer)
      
        192
                    end do
      
        193
                end if
      
        194
                
        195
                token%token_type = TOKEN_NUMBER
      
        196
                token%text = lexer%input(start_pos:lexer%position-1)
      
        197
            end function read_number
      
        198
            
        199
            !> Read an identifier or keyword
      
        200
            function read_identifier(lexer) result(token)
      
        201
                type(lexer_state_t), intent(inout) :: lexer
      
        202
                type(token_t) :: token
      
        203
                
        204
                integer :: start_pos
      
        205
                
        206
                start_pos = lexer%position
      
        207
                
        208
                ! Read alphanumeric characters and underscores
      
        209
                do while (is_alphanumeric(lexer%current_char))
      
        210
                    call advance(lexer)
      
        211
                end do
      
        212
                
        213
                token%token_type = TOKEN_IDENTIFIER
      
        214
                token%text = lexer%input(start_pos:lexer%position-1)
      
        215
            end function read_identifier
      
        216
            
        217
            !> Read colon-based operators (: := ::)
      
        218
            function read_colon_operator(lexer) result(token)
      
        219
                type(lexer_state_t), intent(inout) :: lexer
      
        220
                type(token_t) :: token
      
        221
                
        222
                integer :: start_pos
      
        223
                
        224
                start_pos = lexer%position
      
        225
                call advance(lexer)  ! Skip first ':'
      
        226
                
        227
                if (lexer%current_char == '=') then
      
        228
                    ! Assignment operator :=
      
        229
                    call advance(lexer)
      
        230
                    token%token_type = TOKEN_ASSIGN
      
        231
                    token%text = ':='
      
        232
                else if (lexer%current_char == ':') then
      
        233
                    ! Precision operator ::
      
        234
                    call advance(lexer)
      
        235
                    token%token_type = TOKEN_PRECISION
      
        236
                    token%text = '::'
      
        237
                else
      
        238
                    ! Just a colon (should be an error in math expressions)
      
        239
                    token%token_type = TOKEN_OPERATOR
      
        240
                    token%text = ':'
      
        241
                end if
      
        242
            end function read_colon_operator
      
        243
            
        244
            !> Read mathematical operators
      
        245
            function read_operator(lexer) result(token)
      
        246
                type(lexer_state_t), intent(inout) :: lexer
      
        247
                type(token_t) :: token
      
        248
                
        249
                integer :: start_pos
      
        250
                
        251
                start_pos = lexer%position
      
        252
                
        253
                select case (lexer%current_char)
      
        254
                case ('+', '-', '/', '^')
      
        255
                    token%text = lexer%current_char
      
        256
                    call advance(lexer)
      
        257
                case ('*')
      
        258
                    call advance(lexer)
      
        259
                    if (lexer%current_char == '*') then
      
        260
                        ! Power operator **
      
        261
                        call advance(lexer)
      
        262
                        token%text = '**'
      
        263
                    else
      
        264
                        token%text = '*'
      
        265
                    end if
      
        266
                case ('=')
      
        267
                    call advance(lexer)
      
        268
                    if (lexer%current_char == '=') then
      
        269
                        call advance(lexer)
      
        270
                        token%text = '=='
      
        271
                    else
      
        272
                        token%text = '='
      
        273
                    end if
      
        274
                case ('<')
      
        275
                    call advance(lexer)
      
        276
                    if (lexer%current_char == '=') then
      
        277
                        call advance(lexer)
      
        278
                        token%text = '<='
      
        279
                    else
      
        280
                        token%text = '<'
      
        281
                    end if
      
        282
                case ('>')
      
        283
                    call advance(lexer)
      
        284
                    if (lexer%current_char == '=') then
      
        285
                        call advance(lexer)
      
        286
                        token%text = '>='
      
        287
                    else
      
        288
                        token%text = '>'
      
        289
                    end if
      
        290
                case ('!')
      
        291
                    call advance(lexer)
      
        292
                    if (lexer%current_char == '=') then
      
        293
                        call advance(lexer)
      
        294
                        token%text = '!='
      
        295
                    else
      
        296
                        token%text = '!'
      
        297
                    end if
      
        298
                case default
      
        299
                    token%text = lexer%current_char
      
        300
                    call advance(lexer)
      
        301
                end select
      
        302
                
        303
                token%token_type = TOKEN_OPERATOR
      
        304
            end function read_operator
      
        305
            
        306
            !> Handle * vs ** power operator
      
        307
            function read_power_or_multiply(lexer) result(token)
      
        308
                type(lexer_state_t), intent(inout) :: lexer
      
        309
                type(token_t) :: token
      
        310
                
        311
                call advance(lexer)  ! Skip first '*'
      
        312
                
        313
                if (lexer%current_char == '*') then
      
        314
                    ! Power operator **
      
        315
                    call advance(lexer)
      
        316
                    token%token_type = TOKEN_OPERATOR
      
        317
                    token%text = '**'
      
        318
                else
      
        319
                    ! Multiply operator *
      
        320
                    token%token_type = TOKEN_OPERATOR
      
        321
                    token%text = '*'
      
        322
                end if
      
        323
            end function read_power_or_multiply
      
        324
            
        325
            !> Skip whitespace characters
      
        326
            subroutine skip_whitespace(lexer)
      
        327
                type(lexer_state_t), intent(inout) :: lexer
      
        328
                
        329
                do while (is_whitespace(lexer%current_char) .and. lexer%position <= lexer%length)
      
        330
                    call advance(lexer)
      
        331
                end do
      
        332
            end subroutine skip_whitespace
      
        333
            
        334
            !> Advance to the next character
      
        335
            subroutine advance(lexer)
      
        336
                type(lexer_state_t), intent(inout) :: lexer
      
        337
                
        338
                lexer%position = lexer%position + 1
      
        339
                if (lexer%position <= lexer%length) then
      
        340
                    lexer%current_char = lexer%input(lexer%position:lexer%position)
      
        341
                else
      
        342
                    lexer%current_char = char(0)  ! EOF marker
      
        343
                end if
      
        344
            end subroutine advance
      
        345
            
        346
            !> Check if character is a digit
      
        347
            logical function is_digit(c)
      
        348
                character, intent(in) :: c
      
        349
                is_digit = (c >= '0' .and. c <= '9')
      
        350
            end function is_digit
      
        351
            
        352
            !> Check if character is alphabetic
      
        353
            logical function is_alpha(c)
      
        354
                character, intent(in) :: c
      
        355
                is_alpha = (c >= 'a' .and. c <= 'z') .or. (c >= 'A' .and. c <= 'Z') .or. c == '_'
      
        356
            end function is_alpha
      
        357
            
        358
            !> Check if character is alphanumeric
      
        359
            logical function is_alphanumeric(c)
      
        360
                character, intent(in) :: c
      
        361
                is_alphanumeric = is_alpha(c) .or. is_digit(c)
      
        362
            end function is_alphanumeric
      
        363
            
        364
            !> Check if character is whitespace
      
        365
            logical function is_whitespace(c)
      
        366
                character, intent(in) :: c
      
        367
                is_whitespace = (c == ' ' .or. c == char(9) .or. c == char(10) .or. c == char(13))
      
        368
            end function is_whitespace
      
        369
            
        370
            !> Resize token array
      
        371
            subroutine resize_token_array(tokens, new_size)
      
        372
                type(token_t), allocatable, intent(inout) :: tokens(:)
      
        373
                integer, intent(in) :: new_size
      
        374
                
        375
                type(token_t), allocatable :: temp(:)
      
        376
                integer :: old_size
      
        377
                
        378
                if (.not. allocated(tokens)) then
      
        379
                    allocate(tokens(new_size))
      
        380
                    return
      
        381
                end if
      
        382
                
        383
                old_size = size(tokens)
      
        384
                if (new_size == old_size) return
      
        385
                
        386
                allocate(temp(min(old_size, new_size)))
      
        387
                temp = tokens(1:min(old_size, new_size))
      
        388
                
        389
                deallocate(tokens)
      
        390
                allocate(tokens(new_size))
      
        391
                
        392
                if (new_size >= old_size) then
      
        393
                    tokens(1:old_size) = temp
      
        394
                else
      
        395
                    tokens = temp(1:new_size)
      
        396
                end if
      
        397
                
        398
                deallocate(temp)
      
        399
            end subroutine resize_token_array
      
        400
            
        401
            !> Free token array memory
      
        402
            subroutine free_tokens(tokens)
      
        403
                type(token_t), allocatable, intent(inout) :: tokens(:)
      
        404
                
        405
                if (allocated(tokens)) deallocate(tokens)
      
        406
            end subroutine free_tokens
      
        407
            
        408
            !> Print tokens for debugging
      
        409
            subroutine print_tokens(tokens)
      
        410
                type(token_t), intent(in) :: tokens(:)
      
        411
                integer :: i
      
        412
                
        413
                write(*, '(A)') 'Tokens:'
      
        414
                do i = 1, size(tokens)
      
        415
                    if (tokens(i)%token_type == TOKEN_EOF) then
      
        416
                        write(*, '(A,I0,A)') '  [', i, '] EOF'
      
        417
                        exit
      
        418
                    end if
      
        419
                    write(*, '(A,I0,A,A,A,A,A,I0)') '  [', i, '] ', &
      
        420
                        get_token_type_name(tokens(i)%token_type), ' "', &
      
        421
                        tokens(i)%text, '" @', tokens(i)%position
      
        422
                end do
      
        423
            end subroutine print_tokens
      
        424
            
        425
            !> Get human-readable token type name
      
        426
            function get_token_type_name(token_type) result(name)
      
        427
                integer, intent(in) :: token_type
      
        428
                character(len=12) :: name
      
        429
                
        430
                select case (token_type)
      
        431
                case (TOKEN_EOF)
      
        432
                    name = 'EOF'
      
        433
                case (TOKEN_NUMBER)
      
        434
                    name = 'NUMBER'
      
        435
                case (TOKEN_IDENTIFIER)
      
        436
                    name = 'IDENTIFIER'
      
        437
                case (TOKEN_OPERATOR)
      
        438
                    name = 'OPERATOR'
      
        439
                case (TOKEN_LPAREN)
      
        440
                    name = 'LPAREN'
      
        441
                case (TOKEN_RPAREN)
      
        442
                    name = 'RPAREN'
      
        443
                case (TOKEN_LBRACKET)
      
        444
                    name = 'LBRACKET'
      
        445
                case (TOKEN_RBRACKET)
      
        446
                    name = 'RBRACKET'
      
        447
                case (TOKEN_SEMICOLON)
      
        448
                    name = 'SEMICOLON'
      
        449
                case (TOKEN_COMMA)
      
        450
                    name = 'COMMA'
      
        451
                case (TOKEN_ASSIGN)
      
        452
                    name = 'ASSIGN'
      
        453
                case (TOKEN_PRECISION)
      
        454
                    name = 'PRECISION'
      
        455
                case default
      
        456
                    name = 'UNKNOWN'
      
        457
                end select
      
        458
            end function get_token_type_name
      
        459
            
        460
        end module fortbite_lexer_m

1	!> Lexical analyzer module for FORTBITE
2	!>
3	!> Tokenizes mathematical expressions into a stream of tokens for parsing.
4	!> Handles numbers, identifiers, operators, and special syntax like :: and :=
5	module fortbite_lexer_m
6	use fortbite_types_m, only: token_t, TOKEN_EOF, TOKEN_NUMBER, TOKEN_IDENTIFIER, &
7	TOKEN_OPERATOR, TOKEN_LPAREN, TOKEN_RPAREN, &
8	TOKEN_LBRACKET, TOKEN_RBRACKET, TOKEN_SEMICOLON, &
9	TOKEN_COMMA, TOKEN_ASSIGN, TOKEN_PRECISION
10	use iso_fortran_env, only: real64
11	implicit none
12	private
13
14	public :: tokenize, free_tokens, print_tokens
15
16	! Lexer state
17	type :: lexer_state_t
18	character(len=:), allocatable :: input
19	integer :: position
20	integer :: length
21	character :: current_char
22	end type lexer_state_t
23
24	contains
25
26	!> Tokenize a mathematical expression
27	function tokenize(expression) result(tokens)
28	character(len=*), intent(in) :: expression
29	type(token_t), allocatable :: tokens(:)
30
31	type(lexer_state_t) :: lexer
32	type(token_t) :: current_token
33	integer :: token_count, capacity
34
35	! Initialize lexer state
36	lexer%input = trim(adjustl(expression))
37	lexer%length = len_trim(lexer%input)
38	lexer%position = 1
39
40	if (lexer%length == 0) then
41	allocate(tokens(1))
42	tokens(1)%token_type = TOKEN_EOF
43	return
44	end if
45
46	lexer%current_char = lexer%input(1:1)
47
48	! Dynamic token array
49	capacity = max(16, lexer%length / 2)
50	allocate(tokens(capacity))
51	token_count = 0
52
53	! Main tokenization loop
54	do
55	call skip_whitespace(lexer)
56	if (lexer%current_char == char(0) .or. lexer%position > lexer%length) exit
57
58	current_token = next_token(lexer)
59
60	! Resize array if needed
61	if (token_count >= capacity) then
62	capacity = capacity * 2
63	call resize_token_array(tokens, capacity)
64	end if
65
66	token_count = token_count + 1
67	tokens(token_count) = current_token
68
69	if (current_token%token_type == TOKEN_EOF) exit
70	end do
71
72	! Add EOF token if not already present
73	if (token_count == 0 .or. tokens(token_count)%token_type /= TOKEN_EOF) then
74	if (token_count >= capacity) then
75	capacity = capacity + 1
76	call resize_token_array(tokens, capacity)
77	end if
78	token_count = token_count + 1
79	tokens(token_count)%token_type = TOKEN_EOF
80	tokens(token_count)%position = lexer%position
81	end if
82
83	! Trim array to actual size
84	call resize_token_array(tokens, token_count)
85	end function tokenize
86
87	!> Get the next token from the input stream
88	function next_token(lexer) result(token)
89	type(lexer_state_t), intent(inout) :: lexer
90	type(token_t) :: token
91
92	call skip_whitespace(lexer)
93
94	if (lexer%position > lexer%length) then
95	token%token_type = TOKEN_EOF
96	token%position = lexer%position
97	return
98	end if
99
100	token%position = lexer%position
101
102	select case (lexer%current_char)
103	case ('0':'9', '.')
104	token = read_number(lexer)
105	case ('a':'z', 'A':'Z', '_')
106	token = read_identifier(lexer)
107	case ('(')
108	token%token_type = TOKEN_LPAREN
109	token%text = '('
110	call advance(lexer)
111	case (')')
112	token%token_type = TOKEN_RPAREN
113	token%text = ')'
114	call advance(lexer)
115	case ('[')
116	token%token_type = TOKEN_LBRACKET
117	token%text = '['
118	call advance(lexer)
119	case (']')
120	token%token_type = TOKEN_RBRACKET
121	token%text = ']'
122	call advance(lexer)
123	case (';')
124	token%token_type = TOKEN_SEMICOLON
125	token%text = ';'
126	call advance(lexer)
127	case (',')
128	token%token_type = TOKEN_COMMA
129	token%text = ','
130	call advance(lexer)
131	case (':')
132	token = read_colon_operator(lexer)
133	case ('+', '-', '/', '^', '=', '<', '>', '!')
134	token = read_operator(lexer)
135	case ('*')
136	token = read_power_or_multiply(lexer)
137	case default
138	! Unknown character - create error token
139	token%token_type = TOKEN_EOF ! We'll use this as error for now
140	token%text = lexer%current_char
141	call advance(lexer)
142	end select
143	end function next_token
144
145	!> Read a numeric token (integer or real)
146	function read_number(lexer) result(token)
147	type(lexer_state_t), intent(inout) :: lexer
148	type(token_t) :: token
149
150	integer :: start_pos
151	logical :: has_dot, has_exp
152
153	start_pos = lexer%position
154	has_dot = .false.
155	has_exp = .false.
156
157	! Handle leading decimal point
158	if (lexer%current_char == '.') then
159	has_dot = .true.
160	call advance(lexer)
161	if (.not. is_digit(lexer%current_char)) then
162	! Just a dot, not a number
163	token%token_type = TOKEN_OPERATOR
164	token%text = '.'
165	return
166	end if
167	end if
168
169	! Read digits
170	do while (is_digit(lexer%current_char))
171	call advance(lexer)
172	end do
173
174	! Handle decimal point
175	if (lexer%current_char == '.' .and. .not. has_dot) then
176	has_dot = .true.
177	call advance(lexer)
178	do while (is_digit(lexer%current_char))
179	call advance(lexer)
180	end do
181	end if
182
183	! Handle scientific notation
184	if ((lexer%current_char == 'e' .or. lexer%current_char == 'E') .and. .not. has_exp) then
185	has_exp = .true.
186	call advance(lexer)
187	if (lexer%current_char == '+' .or. lexer%current_char == '-') then
188	call advance(lexer)
189	end if
190	do while (is_digit(lexer%current_char))
191	call advance(lexer)
192	end do
193	end if
194
195	token%token_type = TOKEN_NUMBER
196	token%text = lexer%input(start_pos:lexer%position-1)
197	end function read_number
198
199	!> Read an identifier or keyword
200	function read_identifier(lexer) result(token)
201	type(lexer_state_t), intent(inout) :: lexer
202	type(token_t) :: token
203
204	integer :: start_pos
205
206	start_pos = lexer%position
207
208	! Read alphanumeric characters and underscores
209	do while (is_alphanumeric(lexer%current_char))
210	call advance(lexer)
211	end do
212
213	token%token_type = TOKEN_IDENTIFIER
214	token%text = lexer%input(start_pos:lexer%position-1)
215	end function read_identifier
216
217	!> Read colon-based operators (: := ::)
218	function read_colon_operator(lexer) result(token)
219	type(lexer_state_t), intent(inout) :: lexer
220	type(token_t) :: token
221
222	integer :: start_pos
223
224	start_pos = lexer%position
225	call advance(lexer) ! Skip first ':'
226
227	if (lexer%current_char == '=') then
228	! Assignment operator :=
229	call advance(lexer)
230	token%token_type = TOKEN_ASSIGN
231	token%text = ':='
232	else if (lexer%current_char == ':') then
233	! Precision operator ::
234	call advance(lexer)
235	token%token_type = TOKEN_PRECISION
236	token%text = '::'
237	else
238	! Just a colon (should be an error in math expressions)
239	token%token_type = TOKEN_OPERATOR
240	token%text = ':'
241	end if
242	end function read_colon_operator
243
244	!> Read mathematical operators
245	function read_operator(lexer) result(token)
246	type(lexer_state_t), intent(inout) :: lexer
247	type(token_t) :: token
248
249	integer :: start_pos
250
251	start_pos = lexer%position
252
253	select case (lexer%current_char)
254	case ('+', '-', '/', '^')
255	token%text = lexer%current_char
256	call advance(lexer)
257	case ('*')
258	call advance(lexer)
259	if (lexer%current_char == '*') then
260	! Power operator **
261	call advance(lexer)
262	token%text = '**'
263	else
264	token%text = '*'
265	end if
266	case ('=')
267	call advance(lexer)
268	if (lexer%current_char == '=') then
269	call advance(lexer)
270	token%text = '=='
271	else
272	token%text = '='
273	end if
274	case ('<')
275	call advance(lexer)
276	if (lexer%current_char == '=') then
277	call advance(lexer)
278	token%text = '<='
279	else
280	token%text = '<'
281	end if
282	case ('>')
283	call advance(lexer)
284	if (lexer%current_char == '=') then
285	call advance(lexer)
286	token%text = '>='
287	else
288	token%text = '>'
289	end if
290	case ('!')
291	call advance(lexer)
292	if (lexer%current_char == '=') then
293	call advance(lexer)
294	token%text = '!='
295	else
296	token%text = '!'
297	end if
298	case default
299	token%text = lexer%current_char
300	call advance(lexer)
301	end select
302
303	token%token_type = TOKEN_OPERATOR
304	end function read_operator
305
306	!> Handle * vs ** power operator
307	function read_power_or_multiply(lexer) result(token)
308	type(lexer_state_t), intent(inout) :: lexer
309	type(token_t) :: token
310
311	call advance(lexer) ! Skip first '*'
312
313	if (lexer%current_char == '*') then
314	! Power operator **
315	call advance(lexer)
316	token%token_type = TOKEN_OPERATOR
317	token%text = '**'
318	else
319	! Multiply operator *
320	token%token_type = TOKEN_OPERATOR
321	token%text = '*'
322	end if
323	end function read_power_or_multiply
324
325	!> Skip whitespace characters
326	subroutine skip_whitespace(lexer)
327	type(lexer_state_t), intent(inout) :: lexer
328
329	do while (is_whitespace(lexer%current_char) .and. lexer%position <= lexer%length)
330	call advance(lexer)
331	end do
332	end subroutine skip_whitespace
333
334	!> Advance to the next character
335	subroutine advance(lexer)
336	type(lexer_state_t), intent(inout) :: lexer
337
338	lexer%position = lexer%position + 1
339	if (lexer%position <= lexer%length) then
340	lexer%current_char = lexer%input(lexer%position:lexer%position)
341	else
342	lexer%current_char = char(0) ! EOF marker
343	end if
344	end subroutine advance
345
346	!> Check if character is a digit
347	logical function is_digit(c)
348	character, intent(in) :: c
349	is_digit = (c >= '0' .and. c <= '9')
350	end function is_digit
351
352	!> Check if character is alphabetic
353	logical function is_alpha(c)
354	character, intent(in) :: c
355	is_alpha = (c >= 'a' .and. c <= 'z') .or. (c >= 'A' .and. c <= 'Z') .or. c == '_'
356	end function is_alpha
357
358	!> Check if character is alphanumeric
359	logical function is_alphanumeric(c)
360	character, intent(in) :: c
361	is_alphanumeric = is_alpha(c) .or. is_digit(c)
362	end function is_alphanumeric
363
364	!> Check if character is whitespace
365	logical function is_whitespace(c)
366	character, intent(in) :: c
367	is_whitespace = (c == ' ' .or. c == char(9) .or. c == char(10) .or. c == char(13))
368	end function is_whitespace
369
370	!> Resize token array
371	subroutine resize_token_array(tokens, new_size)
372	type(token_t), allocatable, intent(inout) :: tokens(:)
373	integer, intent(in) :: new_size
374
375	type(token_t), allocatable :: temp(:)
376	integer :: old_size
377
378	if (.not. allocated(tokens)) then
379	allocate(tokens(new_size))
380	return
381	end if
382
383	old_size = size(tokens)
384	if (new_size == old_size) return
385
386	allocate(temp(min(old_size, new_size)))
387	temp = tokens(1:min(old_size, new_size))
388
389	deallocate(tokens)
390	allocate(tokens(new_size))
391
392	if (new_size >= old_size) then
393	tokens(1:old_size) = temp
394	else
395	tokens = temp(1:new_size)
396	end if
397
398	deallocate(temp)
399	end subroutine resize_token_array
400
401	!> Free token array memory
402	subroutine free_tokens(tokens)
403	type(token_t), allocatable, intent(inout) :: tokens(:)
404
405	if (allocated(tokens)) deallocate(tokens)
406	end subroutine free_tokens
407
408	!> Print tokens for debugging
409	subroutine print_tokens(tokens)
410	type(token_t), intent(in) :: tokens(:)
411	integer :: i
412
413	write(*, '(A)') 'Tokens:'
414	do i = 1, size(tokens)
415	if (tokens(i)%token_type == TOKEN_EOF) then
416	write(*, '(A,I0,A)') ' [', i, '] EOF'
417	exit
418	end if
419	write(*, '(A,I0,A,A,A,A,A,I0)') ' [', i, '] ', &
420	get_token_type_name(tokens(i)%token_type), ' "', &
421	tokens(i)%text, '" @', tokens(i)%position
422	end do
423	end subroutine print_tokens
424
425	!> Get human-readable token type name
426	function get_token_type_name(token_type) result(name)
427	integer, intent(in) :: token_type
428	character(len=12) :: name
429
430	select case (token_type)
431	case (TOKEN_EOF)
432	name = 'EOF'
433	case (TOKEN_NUMBER)
434	name = 'NUMBER'
435	case (TOKEN_IDENTIFIER)
436	name = 'IDENTIFIER'
437	case (TOKEN_OPERATOR)
438	name = 'OPERATOR'
439	case (TOKEN_LPAREN)
440	name = 'LPAREN'
441	case (TOKEN_RPAREN)
442	name = 'RPAREN'
443	case (TOKEN_LBRACKET)
444	name = 'LBRACKET'
445	case (TOKEN_RBRACKET)
446	name = 'RBRACKET'
447	case (TOKEN_SEMICOLON)
448	name = 'SEMICOLON'
449	case (TOKEN_COMMA)
450	name = 'COMMA'
451	case (TOKEN_ASSIGN)
452	name = 'ASSIGN'
453	case (TOKEN_PRECISION)
454	name = 'PRECISION'
455	case default
456	name = 'UNKNOWN'
457	end select
458	end function get_token_type_name
459
460	end module fortbite_lexer_m