fortsh Public

Watch 0 Fork 0 Star 0

Fortran · 17299 bytes Raw Blame History

  
        1
        ! ==============================================================================
      
        2
        ! Test: syntax_highlight v2 tokenizer
      
        3
        ! Unit tests for position-based tokenization and token classification.
      
        4
        ! ==============================================================================
      
        5
        program test_syntax_highlight
      
        6
          use syntax_highlight
      
        7
          implicit none
      
        8
        
        9
          integer, parameter :: MT = 100  ! MAX_TOKENS
      
        10
        
        11
          integer :: passed, failed, total
      
        12
          passed = 0
      
        13
          failed = 0
      
        14
          total = 0
      
        15
        
        16
          write(*, '(a)') '=========================================='
      
        17
          write(*, '(a)') 'Testing Syntax Highlight v2 Tokenizer'
      
        18
          write(*, '(a)') '=========================================='
      
        19
        
        20
          ! --- Basic token type tests ---
      
        21
          call test_simple_command()
      
        22
          call test_invalid_command()
      
        23
          call test_builtin_echo()
      
        24
          call test_keyword_if_then_fi()
      
        25
          call test_pipe_resets_cmd_pos()
      
        26
          call test_and_or_resets_cmd_pos()
      
        27
          call test_semicolon_resets_cmd_pos()
      
        28
          call test_option_token()
      
        29
          call test_single_quoted_string()
      
        30
          call test_double_quoted_string()
      
        31
          call test_variable_simple()
      
        32
          call test_variable_brace()
      
        33
          call test_variable_subshell()
      
        34
          call test_comment()
      
        35
          call test_redirect_gt()
      
        36
          call test_redirect_append()
      
        37
          call test_redirect_heredoc()
      
        38
          call test_redirect_fd_prefix()
      
        39
          call test_redirect_amp_gt()
      
        40
          call test_operator_background()
      
        41
          call test_operator_parens()
      
        42
          call test_assignment()
      
        43
          call test_glob_star()
      
        44
          call test_glob_question()
      
        45
          call test_path_token()
      
        46
          call test_number_token()
      
        47
          call test_empty_input()
      
        48
          call test_whitespace_only()
      
        49
          call test_complex_pipeline()
      
        50
          call test_keyword_resets_cmd_pos()
      
        51
          call test_case_esac()
      
        52
          call test_c_style_for_loop()
      
        53
          call test_keyword_outside_cmd_pos()
      
        54
        
        55
          write(*, '(a)') ''
      
        56
          write(*, '(a)') '=========================================='
      
        57
          write(*, '(a,i0,a,i0,a,i0,a)') 'Results: ', passed, ' passed, ', failed, ' failed (', total, ' total)'
      
        58
          write(*, '(a)') '=========================================='
      
        59
        
        60
          if (failed > 0) then
      
        61
            write(*, '(a)') 'SOME TESTS FAILED!'
      
        62
            error stop 1
      
        63
          else
      
        64
            write(*, '(a)') 'All tests passed!'
      
        65
          end if
      
        66
        
        67
        contains
      
        68
        
        69
          subroutine assert_eq(test_name, expected, actual)
      
        70
            character(len=*), intent(in) :: test_name
      
        71
            integer, intent(in) :: expected, actual
      
        72
            total = total + 1
      
        73
            if (expected == actual) then
      
        74
              passed = passed + 1
      
        75
              write(*, '(a,a)') '  PASS: ', test_name
      
        76
            else
      
        77
              failed = failed + 1
      
        78
              write(*, '(a,a,a,i0,a,i0,a)') '  FAIL: ', test_name, ' (expected=', expected, ', got=', actual, ')'
      
        79
            end if
      
        80
          end subroutine
      
        81
        
        82
          subroutine assert_token(test_name, tok, exp_start, exp_end, exp_type)
      
        83
            character(len=*), intent(in) :: test_name
      
        84
            type(hl_token_t), intent(in) :: tok
      
        85
            integer, intent(in) :: exp_start, exp_end, exp_type
      
        86
            call assert_eq(trim(test_name) // ': start', exp_start, tok%start_pos)
      
        87
            call assert_eq(trim(test_name) // ': end', exp_end, tok%end_pos)
      
        88
            call assert_eq(trim(test_name) // ': type', exp_type, tok%token_type)
      
        89
          end subroutine
      
        90
        
        91
          ! ======================== Tests ========================
      
        92
        
        93
          subroutine test_simple_command()
      
        94
            type(hl_token_t) :: tokens(MT)
      
        95
            integer :: n
      
        96
            character(len=8) :: input
      
        97
            input = 'ls -la /'
      
        98
            call tokenize_v2(input, 8, tokens, n)
      
        99
            call assert_eq('simple_cmd: count', 3, n)
      
        100
            ! 'ls' at pos 1-2 — should be COMMAND_VALID (ls exists)
      
        101
            call assert_token('simple_cmd/ls', tokens(1), 1, 2, HTOK_COMMAND_VALID)
      
        102
            call assert_token('simple_cmd/-la', tokens(2), 4, 6, HTOK_OPTION)
      
        103
            call assert_token('simple_cmd//', tokens(3), 8, 8, HTOK_PATH)
      
        104
          end subroutine
      
        105
        
        106
          subroutine test_invalid_command()
      
        107
            type(hl_token_t) :: tokens(MT)
      
        108
            integer :: n
      
        109
            character(len=16) :: input
      
        110
            input = 'xyznotfound foo '
      
        111
            call tokenize_v2(input, 15, tokens, n)
      
        112
            call assert_eq('invalid_cmd: count', 2, n)
      
        113
            call assert_token('invalid_cmd/xyz', tokens(1), 1, 11, HTOK_COMMAND_INVALID)
      
        114
            call assert_token('invalid_cmd/foo', tokens(2), 13, 15, HTOK_DEFAULT)
      
        115
          end subroutine
      
        116
        
        117
          subroutine test_builtin_echo()
      
        118
            type(hl_token_t) :: tokens(MT)
      
        119
            integer :: n
      
        120
            character(len=16) :: input
      
        121
            input = 'echo hello world'
      
        122
            call tokenize_v2(input, 16, tokens, n)
      
        123
            call assert_eq('builtin_echo: count', 3, n)
      
        124
            call assert_token('builtin_echo/echo', tokens(1), 1, 4, HTOK_BUILTIN)
      
        125
            call assert_token('builtin_echo/hello', tokens(2), 6, 10, HTOK_DEFAULT)
      
        126
            call assert_token('builtin_echo/world', tokens(3), 12, 16, HTOK_DEFAULT)
      
        127
          end subroutine
      
        128
        
        129
          subroutine test_keyword_if_then_fi()
      
        130
            type(hl_token_t) :: tokens(MT)
      
        131
            integer :: n
      
        132
            character(len=32) :: input
      
        133
            input = 'if true; then echo hi; fi'
      
        134
            call tokenize_v2(input, 25, tokens, n)
      
        135
            ! Expected: if=KEYWORD, true=CMD, ;=OP, then=KEYWORD, echo=BUILTIN, hi=DEFAULT, ;=OP, fi=KEYWORD
      
        136
            call assert_eq('if_then_fi: count', 8, n)
      
        137
            call assert_token('if_then_fi/if', tokens(1), 1, 2, HTOK_KEYWORD)
      
        138
            call assert_token('if_then_fi/;', tokens(3), 8, 8, HTOK_OPERATOR)
      
        139
            call assert_token('if_then_fi/then', tokens(4), 10, 13, HTOK_KEYWORD)
      
        140
            call assert_token('if_then_fi/echo', tokens(5), 15, 18, HTOK_BUILTIN)
      
        141
            call assert_token('if_then_fi/hi', tokens(6), 20, 21, HTOK_DEFAULT)
      
        142
            call assert_token('if_then_fi/fi', tokens(8), 24, 25, HTOK_KEYWORD)
      
        143
          end subroutine
      
        144
        
        145
          subroutine test_pipe_resets_cmd_pos()
      
        146
            type(hl_token_t) :: tokens(MT)
      
        147
            integer :: n
      
        148
            character(len=16) :: input
      
        149
            input = 'cat file | wc -l'
      
        150
            call tokenize_v2(input, 16, tokens, n)
      
        151
            call assert_eq('pipe: count', 5, n)
      
        152
            call assert_token('pipe/|', tokens(3), 10, 10, HTOK_OPERATOR)
      
        153
            call assert_token('pipe/wc', tokens(4), 12, 13, HTOK_COMMAND_VALID)
      
        154
            call assert_token('pipe/-l', tokens(5), 15, 16, HTOK_OPTION)
      
        155
          end subroutine
      
        156
        
        157
          subroutine test_and_or_resets_cmd_pos()
      
        158
            type(hl_token_t) :: tokens(MT)
      
        159
            integer :: n
      
        160
            character(len=16) :: input
      
        161
            input = 'ls && pwd'
      
        162
            call tokenize_v2(input, 9, tokens, n)
      
        163
            call assert_eq('and_or: count', 3, n)
      
        164
            call assert_token('and_or/&&', tokens(2), 4, 5, HTOK_OPERATOR)
      
        165
            call assert_token('and_or/pwd', tokens(3), 7, 9, HTOK_BUILTIN)
      
        166
          end subroutine
      
        167
        
        168
          subroutine test_semicolon_resets_cmd_pos()
      
        169
            type(hl_token_t) :: tokens(MT)
      
        170
            integer :: n
      
        171
            character(len=16) :: input
      
        172
            input = 'echo a; echo b'
      
        173
            call tokenize_v2(input, 14, tokens, n)
      
        174
            call assert_eq('semicolon: count', 5, n)
      
        175
            call assert_token('semicolon/;', tokens(3), 7, 7, HTOK_OPERATOR)
      
        176
            call assert_token('semicolon/echo2', tokens(4), 9, 12, HTOK_BUILTIN)
      
        177
          end subroutine
      
        178
        
        179
          subroutine test_option_token()
      
        180
            type(hl_token_t) :: tokens(MT)
      
        181
            integer :: n
      
        182
            character(len=16) :: input
      
        183
            input = 'ls --color=auto '
      
        184
            call tokenize_v2(input, 15, tokens, n)
      
        185
            call assert_eq('option: count', 2, n)
      
        186
            call assert_token('option/--color', tokens(2), 4, 15, HTOK_OPTION)
      
        187
          end subroutine
      
        188
        
        189
          subroutine test_single_quoted_string()
      
        190
            type(hl_token_t) :: tokens(MT)
      
        191
            integer :: n
      
        192
            character(len=32) :: input
      
        193
            input = "echo 'hello world'"
      
        194
            call tokenize_v2(input, 18, tokens, n)
      
        195
            call assert_eq('sq_string: count', 2, n)
      
        196
            call assert_token('sq_string/str', tokens(2), 6, 18, HTOK_STRING_SINGLE)
      
        197
          end subroutine
      
        198
        
        199
          subroutine test_double_quoted_string()
      
        200
            type(hl_token_t) :: tokens(MT)
      
        201
            integer :: n
      
        202
            character(len=32) :: input
      
        203
            input = 'echo "hello world"'
      
        204
            call tokenize_v2(input, 18, tokens, n)
      
        205
            call assert_eq('dq_string: count', 2, n)
      
        206
            call assert_token('dq_string/str', tokens(2), 6, 18, HTOK_STRING_DOUBLE)
      
        207
          end subroutine
      
        208
        
        209
          subroutine test_variable_simple()
      
        210
            type(hl_token_t) :: tokens(MT)
      
        211
            integer :: n
      
        212
            character(len=16) :: input
      
        213
            input = 'echo $HOME'
      
        214
            call tokenize_v2(input, 10, tokens, n)
      
        215
            call assert_eq('var_simple: count', 2, n)
      
        216
            call assert_token('var_simple/$HOME', tokens(2), 6, 10, HTOK_VARIABLE)
      
        217
          end subroutine
      
        218
        
        219
          subroutine test_variable_brace()
      
        220
            type(hl_token_t) :: tokens(MT)
      
        221
            integer :: n
      
        222
            character(len=16) :: input
      
        223
            input = 'echo ${PATH}'
      
        224
            call tokenize_v2(input, 12, tokens, n)
      
        225
            call assert_eq('var_brace: count', 2, n)
      
        226
            call assert_token('var_brace/${PATH}', tokens(2), 6, 12, HTOK_VARIABLE)
      
        227
          end subroutine
      
        228
        
        229
          subroutine test_variable_subshell()
      
        230
            type(hl_token_t) :: tokens(MT)
      
        231
            integer :: n
      
        232
            character(len=32) :: input
      
        233
            input = 'echo $(whoami)'
      
        234
            call tokenize_v2(input, 14, tokens, n)
      
        235
            call assert_eq('var_subshell: count', 2, n)
      
        236
            call assert_token('var_subshell/$()', tokens(2), 6, 14, HTOK_VARIABLE)
      
        237
          end subroutine
      
        238
        
        239
          subroutine test_comment()
      
        240
            type(hl_token_t) :: tokens(MT)
      
        241
            integer :: n
      
        242
            character(len=16) :: input
      
        243
            input = '# this is a test'
      
        244
            call tokenize_v2(input, 16, tokens, n)
      
        245
            call assert_eq('comment: count', 1, n)
      
        246
            call assert_token('comment/#...', tokens(1), 1, 16, HTOK_COMMENT)
      
        247
          end subroutine
      
        248
        
        249
          subroutine test_redirect_gt()
      
        250
            type(hl_token_t) :: tokens(MT)
      
        251
            integer :: n
      
        252
            character(len=16) :: input
      
        253
            input = 'echo hi > out'
      
        254
            call tokenize_v2(input, 13, tokens, n)
      
        255
            call assert_eq('redir_gt: count', 4, n)
      
        256
            call assert_token('redir_gt/>', tokens(3), 9, 9, HTOK_REDIRECT)
      
        257
            call assert_token('redir_gt/out', tokens(4), 11, 13, HTOK_DEFAULT)
      
        258
          end subroutine
      
        259
        
        260
          subroutine test_redirect_append()
      
        261
            type(hl_token_t) :: tokens(MT)
      
        262
            integer :: n
      
        263
            character(len=16) :: input
      
        264
            input = 'echo hi >> out'
      
        265
            call tokenize_v2(input, 14, tokens, n)
      
        266
            call assert_eq('redir_append: count', 4, n)
      
        267
            call assert_token('redir_append/>>', tokens(3), 9, 10, HTOK_REDIRECT)
      
        268
          end subroutine
      
        269
        
        270
          subroutine test_redirect_heredoc()
      
        271
            type(hl_token_t) :: tokens(MT)
      
        272
            integer :: n
      
        273
            character(len=16) :: input
      
        274
            input = 'cat << EOF'
      
        275
            call tokenize_v2(input, 10, tokens, n)
      
        276
            call assert_eq('redir_heredoc: count', 3, n)
      
        277
            call assert_token('redir_heredoc/<<', tokens(2), 5, 6, HTOK_REDIRECT)
      
        278
          end subroutine
      
        279
        
        280
          subroutine test_redirect_fd_prefix()
      
        281
            type(hl_token_t) :: tokens(MT)
      
        282
            integer :: n
      
        283
            character(len=32) :: input
      
        284
            input = 'cmd 2>/dev/null'
      
        285
            call tokenize_v2(input, 15, tokens, n)
      
        286
            ! 'cmd' = command_invalid, '2>' = redirect, '/dev/null' = path
      
        287
            call assert_eq('redir_fd: count', 3, n)
      
        288
            call assert_token('redir_fd/2>', tokens(2), 5, 6, HTOK_REDIRECT)
      
        289
            call assert_token('redir_fd//dev/null', tokens(3), 7, 15, HTOK_PATH)
      
        290
          end subroutine
      
        291
        
        292
          subroutine test_redirect_amp_gt()
      
        293
            type(hl_token_t) :: tokens(MT)
      
        294
            integer :: n
      
        295
            character(len=32) :: input
      
        296
            input = 'cmd &> /dev/null'
      
        297
            call tokenize_v2(input, 16, tokens, n)
      
        298
            call assert_eq('redir_amp_gt: count', 3, n)
      
        299
            call assert_token('redir_amp_gt/&>', tokens(2), 5, 6, HTOK_REDIRECT)
      
        300
          end subroutine
      
        301
        
        302
          subroutine test_operator_background()
      
        303
            type(hl_token_t) :: tokens(MT)
      
        304
            integer :: n
      
        305
            character(len=16) :: input
      
        306
            input = 'sleep 10 &'
      
        307
            call tokenize_v2(input, 10, tokens, n)
      
        308
            call assert_eq('bg: count', 3, n)
      
        309
            call assert_token('bg/&', tokens(3), 10, 10, HTOK_OPERATOR)
      
        310
          end subroutine
      
        311
        
        312
          subroutine test_operator_parens()
      
        313
            type(hl_token_t) :: tokens(MT)
      
        314
            integer :: n
      
        315
            character(len=16) :: input
      
        316
            input = '(echo hi)'
      
        317
            call tokenize_v2(input, 9, tokens, n)
      
        318
            call assert_eq('parens: count', 4, n)
      
        319
            call assert_token('parens/(', tokens(1), 1, 1, HTOK_OPERATOR)
      
        320
            call assert_token('parens/echo', tokens(2), 2, 5, HTOK_BUILTIN)
      
        321
            call assert_token('parens/)', tokens(4), 9, 9, HTOK_OPERATOR)
      
        322
          end subroutine
      
        323
        
        324
          subroutine test_assignment()
      
        325
            type(hl_token_t) :: tokens(MT)
      
        326
            integer :: n
      
        327
            character(len=16) :: input
      
        328
            input = 'FOO=bar'
      
        329
            call tokenize_v2(input, 7, tokens, n)
      
        330
            ! Assignment in command position — tokenizer checks keyword first, then builtin,
      
        331
            ! then valid command, then invalid command. Assignment with = is detected in
      
        332
            ! non-command position. In command position, FOO=bar is treated as a command.
      
        333
            ! Let's just check it produces something reasonable.
      
        334
            call assert_eq('assign: count', 1, n)
      
        335
          end subroutine
      
        336
        
        337
          subroutine test_glob_star()
      
        338
            type(hl_token_t) :: tokens(MT)
      
        339
            integer :: n
      
        340
            character(len=16) :: input
      
        341
            input = 'echo *.txt'
      
        342
            call tokenize_v2(input, 10, tokens, n)
      
        343
            call assert_eq('glob_star: count', 2, n)
      
        344
            call assert_token('glob_star/*.txt', tokens(2), 6, 10, HTOK_GLOB)
      
        345
          end subroutine
      
        346
        
        347
          subroutine test_glob_question()
      
        348
            type(hl_token_t) :: tokens(MT)
      
        349
            integer :: n
      
        350
            character(len=16) :: input
      
        351
            input = 'echo file?.log'
      
        352
            call tokenize_v2(input, 14, tokens, n)
      
        353
            call assert_eq('glob_question: count', 2, n)
      
        354
            call assert_token('glob_question/file?.log', tokens(2), 6, 14, HTOK_GLOB)
      
        355
          end subroutine
      
        356
        
        357
          subroutine test_path_token()
      
        358
            type(hl_token_t) :: tokens(MT)
      
        359
            integer :: n
      
        360
            character(len=16) :: input
      
        361
            input = 'echo /usr/bin'
      
        362
            call tokenize_v2(input, 13, tokens, n)
      
        363
            call assert_eq('path: count', 2, n)
      
        364
            call assert_token('path//usr/bin', tokens(2), 6, 13, HTOK_PATH)
      
        365
          end subroutine
      
        366
        
        367
          subroutine test_number_token()
      
        368
            type(hl_token_t) :: tokens(MT)
      
        369
            integer :: n
      
        370
            character(len=16) :: input
      
        371
            input = 'echo 42'
      
        372
            call tokenize_v2(input, 7, tokens, n)
      
        373
            call assert_eq('number: count', 2, n)
      
        374
            call assert_token('number/42', tokens(2), 6, 7, HTOK_NUMBER)
      
        375
          end subroutine
      
        376
        
        377
          subroutine test_empty_input()
      
        378
            type(hl_token_t) :: tokens(MT)
      
        379
            integer :: n
      
        380
            call tokenize_v2('', 0, tokens, n)
      
        381
            call assert_eq('empty: count', 0, n)
      
        382
          end subroutine
      
        383
        
        384
          subroutine test_whitespace_only()
      
        385
            type(hl_token_t) :: tokens(MT)
      
        386
            integer :: n
      
        387
            character(len=4) :: input
      
        388
            input = '    '
      
        389
            call tokenize_v2(input, 4, tokens, n)
      
        390
            call assert_eq('whitespace: count', 0, n)
      
        391
          end subroutine
      
        392
        
        393
          subroutine test_complex_pipeline()
      
        394
            type(hl_token_t) :: tokens(MT)
      
        395
            integer :: n
      
        396
            character(len=32) :: input
      
        397
            ! ls -la | grep foo | wc -l
      
        398
            input = 'ls -la | grep foo | wc -l'
      
        399
            call tokenize_v2(input, 25, tokens, n)
      
        400
            call assert_eq('pipeline: count', 8, n)
      
        401
            call assert_eq('pipeline/ls type', HTOK_COMMAND_VALID, tokens(1)%token_type)
      
        402
            call assert_eq('pipeline/-la type', HTOK_OPTION, tokens(2)%token_type)
      
        403
            call assert_eq('pipeline/|1 type', HTOK_OPERATOR, tokens(3)%token_type)
      
        404
            call assert_eq('pipeline/grep type', HTOK_COMMAND_VALID, tokens(4)%token_type)
      
        405
            call assert_eq('pipeline/foo type', HTOK_DEFAULT, tokens(5)%token_type)
      
        406
            call assert_eq('pipeline/|2 type', HTOK_OPERATOR, tokens(6)%token_type)
      
        407
            call assert_eq('pipeline/wc type', HTOK_COMMAND_VALID, tokens(7)%token_type)
      
        408
            call assert_eq('pipeline/-l type', HTOK_OPTION, tokens(8)%token_type)
      
        409
          end subroutine
      
        410
        
        411
          subroutine test_keyword_resets_cmd_pos()
      
        412
            type(hl_token_t) :: tokens(MT)
      
        413
            integer :: n
      
        414
            character(len=32) :: input
      
        415
            ! 'then' and 'else' should reset cmd pos so next word is a command
      
        416
            input = 'if true; then ls; else pwd; fi'
      
        417
            call tokenize_v2(input, 30, tokens, n)
      
        418
            ! if=KW true=CMD ;=OP then=KW ls=CMD ;=OP else=KW pwd=BUILTIN ;=OP fi=KW
      
        419
            call assert_eq('kw_reset: count', 10, n)
      
        420
            call assert_eq('kw_reset/if', HTOK_KEYWORD, tokens(1)%token_type)
      
        421
            call assert_eq('kw_reset/then', HTOK_KEYWORD, tokens(4)%token_type)
      
        422
            call assert_eq('kw_reset/ls', HTOK_COMMAND_VALID, tokens(5)%token_type)
      
        423
            call assert_eq('kw_reset/else', HTOK_KEYWORD, tokens(7)%token_type)
      
        424
            call assert_eq('kw_reset/pwd', HTOK_BUILTIN, tokens(8)%token_type)
      
        425
            call assert_eq('kw_reset/fi', HTOK_KEYWORD, tokens(10)%token_type)
      
        426
          end subroutine
      
        427
        
        428
          subroutine test_case_esac()
      
        429
            type(hl_token_t) :: tokens(MT)
      
        430
            integer :: n
      
        431
            character(len=16) :: input
      
        432
            input = 'case esac'
      
        433
            call tokenize_v2(input, 9, tokens, n)
      
        434
            call assert_eq('case_esac: count', 2, n)
      
        435
            call assert_eq('case_esac/case', HTOK_KEYWORD, tokens(1)%token_type)
      
        436
            ! esac is always recognized as a keyword regardless of command position
      
        437
            call assert_eq('case_esac/esac', HTOK_KEYWORD, tokens(2)%token_type)
      
        438
          end subroutine
      
        439
        
        440
          ! C-style for loop: for ((i=0; i<3; i++)) do echo $i; done
      
        441
          subroutine test_c_style_for_loop()
      
        442
            character(len=256) :: input
      
        443
            type(hl_token_t) :: tokens(MT)
      
        444
            integer :: n
      
        445
        
        446
            input = 'for ((i=0; i<3; i++)) do echo $i; done'
      
        447
            call tokenize_v2(input, 38, tokens, n)
      
        448
            ! for = KEYWORD, ((...)) = NUMBER (arithmetic), do = KEYWORD,
      
        449
            ! echo = BUILTIN, $i = VARIABLE, ; = OPERATOR, done = KEYWORD
      
        450
            call assert_eq('cfor: for', HTOK_KEYWORD, tokens(1)%token_type)
      
        451
            call assert_eq('cfor: (())', HTOK_NUMBER, tokens(2)%token_type)
      
        452
            call assert_eq('cfor: do', HTOK_KEYWORD, tokens(3)%token_type)
      
        453
            call assert_eq('cfor: echo', HTOK_BUILTIN, tokens(4)%token_type)
      
        454
            call assert_eq('cfor: $i', HTOK_VARIABLE, tokens(5)%token_type)
      
        455
            call assert_eq('cfor: ;', HTOK_OPERATOR, tokens(6)%token_type)
      
        456
            call assert_eq('cfor: done', HTOK_KEYWORD, tokens(7)%token_type)
      
        457
          end subroutine
      
        458
        
        459
          ! Keywords recognized even outside command position
      
        460
          subroutine test_keyword_outside_cmd_pos()
      
        461
            character(len=256) :: input
      
        462
            type(hl_token_t) :: tokens(MT)
      
        463
            integer :: n
      
        464
        
        465
            ! 'echo done' — done after a builtin should still be keyword
      
        466
            input = 'echo done'
      
        467
            call tokenize_v2(input, 9, tokens, n)
      
        468
            call assert_eq('kw_nonpos: echo', HTOK_BUILTIN, tokens(1)%token_type)
      
        469
            call assert_eq('kw_nonpos: done', HTOK_KEYWORD, tokens(2)%token_type)
      
        470
          end subroutine
      
        471
        
        472
        end program test_syntax_highlight
      
        473

1	! ==============================================================================
2	! Test: syntax_highlight v2 tokenizer
3	! Unit tests for position-based tokenization and token classification.
4	! ==============================================================================
5	program test_syntax_highlight
6	use syntax_highlight
7	implicit none
8
9	integer, parameter :: MT = 100 ! MAX_TOKENS
10
11	integer :: passed, failed, total
12	passed = 0
13	failed = 0
14	total = 0
15
16	write(*, '(a)') '=========================================='
17	write(*, '(a)') 'Testing Syntax Highlight v2 Tokenizer'
18	write(*, '(a)') '=========================================='
19
20	! --- Basic token type tests ---
21	call test_simple_command()
22	call test_invalid_command()
23	call test_builtin_echo()
24	call test_keyword_if_then_fi()
25	call test_pipe_resets_cmd_pos()
26	call test_and_or_resets_cmd_pos()
27	call test_semicolon_resets_cmd_pos()
28	call test_option_token()
29	call test_single_quoted_string()
30	call test_double_quoted_string()
31	call test_variable_simple()
32	call test_variable_brace()
33	call test_variable_subshell()
34	call test_comment()
35	call test_redirect_gt()
36	call test_redirect_append()
37	call test_redirect_heredoc()
38	call test_redirect_fd_prefix()
39	call test_redirect_amp_gt()
40	call test_operator_background()
41	call test_operator_parens()
42	call test_assignment()
43	call test_glob_star()
44	call test_glob_question()
45	call test_path_token()
46	call test_number_token()
47	call test_empty_input()
48	call test_whitespace_only()
49	call test_complex_pipeline()
50	call test_keyword_resets_cmd_pos()
51	call test_case_esac()
52	call test_c_style_for_loop()
53	call test_keyword_outside_cmd_pos()
54
55	write(*, '(a)') ''
56	write(*, '(a)') '=========================================='
57	write(*, '(a,i0,a,i0,a,i0,a)') 'Results: ', passed, ' passed, ', failed, ' failed (', total, ' total)'
58	write(*, '(a)') '=========================================='
59
60	if (failed > 0) then
61	write(*, '(a)') 'SOME TESTS FAILED!'
62	error stop 1
63	else
64	write(*, '(a)') 'All tests passed!'
65	end if
66
67	contains
68
69	subroutine assert_eq(test_name, expected, actual)
70	character(len=*), intent(in) :: test_name
71	integer, intent(in) :: expected, actual
72	total = total + 1
73	if (expected == actual) then
74	passed = passed + 1
75	write(*, '(a,a)') ' PASS: ', test_name
76	else
77	failed = failed + 1
78	write(*, '(a,a,a,i0,a,i0,a)') ' FAIL: ', test_name, ' (expected=', expected, ', got=', actual, ')'
79	end if
80	end subroutine
81
82	subroutine assert_token(test_name, tok, exp_start, exp_end, exp_type)
83	character(len=*), intent(in) :: test_name
84	type(hl_token_t), intent(in) :: tok
85	integer, intent(in) :: exp_start, exp_end, exp_type
86	call assert_eq(trim(test_name) // ': start', exp_start, tok%start_pos)
87	call assert_eq(trim(test_name) // ': end', exp_end, tok%end_pos)
88	call assert_eq(trim(test_name) // ': type', exp_type, tok%token_type)
89	end subroutine
90
91	! ======================== Tests ========================
92
93	subroutine test_simple_command()
94	type(hl_token_t) :: tokens(MT)
95	integer :: n
96	character(len=8) :: input
97	input = 'ls -la /'
98	call tokenize_v2(input, 8, tokens, n)
99	call assert_eq('simple_cmd: count', 3, n)
100	! 'ls' at pos 1-2 — should be COMMAND_VALID (ls exists)
101	call assert_token('simple_cmd/ls', tokens(1), 1, 2, HTOK_COMMAND_VALID)
102	call assert_token('simple_cmd/-la', tokens(2), 4, 6, HTOK_OPTION)
103	call assert_token('simple_cmd//', tokens(3), 8, 8, HTOK_PATH)
104	end subroutine
105
106	subroutine test_invalid_command()
107	type(hl_token_t) :: tokens(MT)
108	integer :: n
109	character(len=16) :: input
110	input = 'xyznotfound foo '
111	call tokenize_v2(input, 15, tokens, n)
112	call assert_eq('invalid_cmd: count', 2, n)
113	call assert_token('invalid_cmd/xyz', tokens(1), 1, 11, HTOK_COMMAND_INVALID)
114	call assert_token('invalid_cmd/foo', tokens(2), 13, 15, HTOK_DEFAULT)
115	end subroutine
116
117	subroutine test_builtin_echo()
118	type(hl_token_t) :: tokens(MT)
119	integer :: n
120	character(len=16) :: input
121	input = 'echo hello world'
122	call tokenize_v2(input, 16, tokens, n)
123	call assert_eq('builtin_echo: count', 3, n)
124	call assert_token('builtin_echo/echo', tokens(1), 1, 4, HTOK_BUILTIN)
125	call assert_token('builtin_echo/hello', tokens(2), 6, 10, HTOK_DEFAULT)
126	call assert_token('builtin_echo/world', tokens(3), 12, 16, HTOK_DEFAULT)
127	end subroutine
128
129	subroutine test_keyword_if_then_fi()
130	type(hl_token_t) :: tokens(MT)
131	integer :: n
132	character(len=32) :: input
133	input = 'if true; then echo hi; fi'
134	call tokenize_v2(input, 25, tokens, n)
135	! Expected: if=KEYWORD, true=CMD, ;=OP, then=KEYWORD, echo=BUILTIN, hi=DEFAULT, ;=OP, fi=KEYWORD
136	call assert_eq('if_then_fi: count', 8, n)
137	call assert_token('if_then_fi/if', tokens(1), 1, 2, HTOK_KEYWORD)
138	call assert_token('if_then_fi/;', tokens(3), 8, 8, HTOK_OPERATOR)
139	call assert_token('if_then_fi/then', tokens(4), 10, 13, HTOK_KEYWORD)
140	call assert_token('if_then_fi/echo', tokens(5), 15, 18, HTOK_BUILTIN)
141	call assert_token('if_then_fi/hi', tokens(6), 20, 21, HTOK_DEFAULT)
142	call assert_token('if_then_fi/fi', tokens(8), 24, 25, HTOK_KEYWORD)
143	end subroutine
144
145	subroutine test_pipe_resets_cmd_pos()
146	type(hl_token_t) :: tokens(MT)
147	integer :: n
148	character(len=16) :: input
149	input = 'cat file \| wc -l'
150	call tokenize_v2(input, 16, tokens, n)
151	call assert_eq('pipe: count', 5, n)
152	call assert_token('pipe/\|', tokens(3), 10, 10, HTOK_OPERATOR)
153	call assert_token('pipe/wc', tokens(4), 12, 13, HTOK_COMMAND_VALID)
154	call assert_token('pipe/-l', tokens(5), 15, 16, HTOK_OPTION)
155	end subroutine
156
157	subroutine test_and_or_resets_cmd_pos()
158	type(hl_token_t) :: tokens(MT)
159	integer :: n
160	character(len=16) :: input
161	input = 'ls && pwd'
162	call tokenize_v2(input, 9, tokens, n)
163	call assert_eq('and_or: count', 3, n)
164	call assert_token('and_or/&&', tokens(2), 4, 5, HTOK_OPERATOR)
165	call assert_token('and_or/pwd', tokens(3), 7, 9, HTOK_BUILTIN)
166	end subroutine
167
168	subroutine test_semicolon_resets_cmd_pos()
169	type(hl_token_t) :: tokens(MT)
170	integer :: n
171	character(len=16) :: input
172	input = 'echo a; echo b'
173	call tokenize_v2(input, 14, tokens, n)
174	call assert_eq('semicolon: count', 5, n)
175	call assert_token('semicolon/;', tokens(3), 7, 7, HTOK_OPERATOR)
176	call assert_token('semicolon/echo2', tokens(4), 9, 12, HTOK_BUILTIN)
177	end subroutine
178
179	subroutine test_option_token()
180	type(hl_token_t) :: tokens(MT)
181	integer :: n
182	character(len=16) :: input
183	input = 'ls --color=auto '
184	call tokenize_v2(input, 15, tokens, n)
185	call assert_eq('option: count', 2, n)
186	call assert_token('option/--color', tokens(2), 4, 15, HTOK_OPTION)
187	end subroutine
188
189	subroutine test_single_quoted_string()
190	type(hl_token_t) :: tokens(MT)
191	integer :: n
192	character(len=32) :: input
193	input = "echo 'hello world'"
194	call tokenize_v2(input, 18, tokens, n)
195	call assert_eq('sq_string: count', 2, n)
196	call assert_token('sq_string/str', tokens(2), 6, 18, HTOK_STRING_SINGLE)
197	end subroutine
198
199	subroutine test_double_quoted_string()
200	type(hl_token_t) :: tokens(MT)
201	integer :: n
202	character(len=32) :: input
203	input = 'echo "hello world"'
204	call tokenize_v2(input, 18, tokens, n)
205	call assert_eq('dq_string: count', 2, n)
206	call assert_token('dq_string/str', tokens(2), 6, 18, HTOK_STRING_DOUBLE)
207	end subroutine
208
209	subroutine test_variable_simple()
210	type(hl_token_t) :: tokens(MT)
211	integer :: n
212	character(len=16) :: input
213	input = 'echo $HOME'
214	call tokenize_v2(input, 10, tokens, n)
215	call assert_eq('var_simple: count', 2, n)
216	call assert_token('var_simple/$HOME', tokens(2), 6, 10, HTOK_VARIABLE)
217	end subroutine
218
219	subroutine test_variable_brace()
220	type(hl_token_t) :: tokens(MT)
221	integer :: n
222	character(len=16) :: input
223	input = 'echo ${PATH}'
224	call tokenize_v2(input, 12, tokens, n)
225	call assert_eq('var_brace: count', 2, n)
226	call assert_token('var_brace/${PATH}', tokens(2), 6, 12, HTOK_VARIABLE)
227	end subroutine
228
229	subroutine test_variable_subshell()
230	type(hl_token_t) :: tokens(MT)
231	integer :: n
232	character(len=32) :: input
233	input = 'echo $(whoami)'
234	call tokenize_v2(input, 14, tokens, n)
235	call assert_eq('var_subshell: count', 2, n)
236	call assert_token('var_subshell/$()', tokens(2), 6, 14, HTOK_VARIABLE)
237	end subroutine
238
239	subroutine test_comment()
240	type(hl_token_t) :: tokens(MT)
241	integer :: n
242	character(len=16) :: input
243	input = '# this is a test'
244	call tokenize_v2(input, 16, tokens, n)
245	call assert_eq('comment: count', 1, n)
246	call assert_token('comment/#...', tokens(1), 1, 16, HTOK_COMMENT)
247	end subroutine
248
249	subroutine test_redirect_gt()
250	type(hl_token_t) :: tokens(MT)
251	integer :: n
252	character(len=16) :: input
253	input = 'echo hi > out'
254	call tokenize_v2(input, 13, tokens, n)
255	call assert_eq('redir_gt: count', 4, n)
256	call assert_token('redir_gt/>', tokens(3), 9, 9, HTOK_REDIRECT)
257	call assert_token('redir_gt/out', tokens(4), 11, 13, HTOK_DEFAULT)
258	end subroutine
259
260	subroutine test_redirect_append()
261	type(hl_token_t) :: tokens(MT)
262	integer :: n
263	character(len=16) :: input
264	input = 'echo hi >> out'
265	call tokenize_v2(input, 14, tokens, n)
266	call assert_eq('redir_append: count', 4, n)
267	call assert_token('redir_append/>>', tokens(3), 9, 10, HTOK_REDIRECT)
268	end subroutine
269
270	subroutine test_redirect_heredoc()
271	type(hl_token_t) :: tokens(MT)
272	integer :: n
273	character(len=16) :: input
274	input = 'cat << EOF'
275	call tokenize_v2(input, 10, tokens, n)
276	call assert_eq('redir_heredoc: count', 3, n)
277	call assert_token('redir_heredoc/<<', tokens(2), 5, 6, HTOK_REDIRECT)
278	end subroutine
279
280	subroutine test_redirect_fd_prefix()
281	type(hl_token_t) :: tokens(MT)
282	integer :: n
283	character(len=32) :: input
284	input = 'cmd 2>/dev/null'
285	call tokenize_v2(input, 15, tokens, n)
286	! 'cmd' = command_invalid, '2>' = redirect, '/dev/null' = path
287	call assert_eq('redir_fd: count', 3, n)
288	call assert_token('redir_fd/2>', tokens(2), 5, 6, HTOK_REDIRECT)
289	call assert_token('redir_fd//dev/null', tokens(3), 7, 15, HTOK_PATH)
290	end subroutine
291
292	subroutine test_redirect_amp_gt()
293	type(hl_token_t) :: tokens(MT)
294	integer :: n
295	character(len=32) :: input
296	input = 'cmd &> /dev/null'
297	call tokenize_v2(input, 16, tokens, n)
298	call assert_eq('redir_amp_gt: count', 3, n)
299	call assert_token('redir_amp_gt/&>', tokens(2), 5, 6, HTOK_REDIRECT)
300	end subroutine
301
302	subroutine test_operator_background()
303	type(hl_token_t) :: tokens(MT)
304	integer :: n
305	character(len=16) :: input
306	input = 'sleep 10 &'
307	call tokenize_v2(input, 10, tokens, n)
308	call assert_eq('bg: count', 3, n)
309	call assert_token('bg/&', tokens(3), 10, 10, HTOK_OPERATOR)
310	end subroutine
311
312	subroutine test_operator_parens()
313	type(hl_token_t) :: tokens(MT)
314	integer :: n
315	character(len=16) :: input
316	input = '(echo hi)'
317	call tokenize_v2(input, 9, tokens, n)
318	call assert_eq('parens: count', 4, n)
319	call assert_token('parens/(', tokens(1), 1, 1, HTOK_OPERATOR)
320	call assert_token('parens/echo', tokens(2), 2, 5, HTOK_BUILTIN)
321	call assert_token('parens/)', tokens(4), 9, 9, HTOK_OPERATOR)
322	end subroutine
323
324	subroutine test_assignment()
325	type(hl_token_t) :: tokens(MT)
326	integer :: n
327	character(len=16) :: input
328	input = 'FOO=bar'
329	call tokenize_v2(input, 7, tokens, n)
330	! Assignment in command position — tokenizer checks keyword first, then builtin,
331	! then valid command, then invalid command. Assignment with = is detected in
332	! non-command position. In command position, FOO=bar is treated as a command.
333	! Let's just check it produces something reasonable.
334	call assert_eq('assign: count', 1, n)
335	end subroutine
336
337	subroutine test_glob_star()
338	type(hl_token_t) :: tokens(MT)
339	integer :: n
340	character(len=16) :: input
341	input = 'echo *.txt'
342	call tokenize_v2(input, 10, tokens, n)
343	call assert_eq('glob_star: count', 2, n)
344	call assert_token('glob_star/*.txt', tokens(2), 6, 10, HTOK_GLOB)
345	end subroutine
346
347	subroutine test_glob_question()
348	type(hl_token_t) :: tokens(MT)
349	integer :: n
350	character(len=16) :: input
351	input = 'echo file?.log'
352	call tokenize_v2(input, 14, tokens, n)
353	call assert_eq('glob_question: count', 2, n)
354	call assert_token('glob_question/file?.log', tokens(2), 6, 14, HTOK_GLOB)
355	end subroutine
356
357	subroutine test_path_token()
358	type(hl_token_t) :: tokens(MT)
359	integer :: n
360	character(len=16) :: input
361	input = 'echo /usr/bin'
362	call tokenize_v2(input, 13, tokens, n)
363	call assert_eq('path: count', 2, n)
364	call assert_token('path//usr/bin', tokens(2), 6, 13, HTOK_PATH)
365	end subroutine
366
367	subroutine test_number_token()
368	type(hl_token_t) :: tokens(MT)
369	integer :: n
370	character(len=16) :: input
371	input = 'echo 42'
372	call tokenize_v2(input, 7, tokens, n)
373	call assert_eq('number: count', 2, n)
374	call assert_token('number/42', tokens(2), 6, 7, HTOK_NUMBER)
375	end subroutine
376
377	subroutine test_empty_input()
378	type(hl_token_t) :: tokens(MT)
379	integer :: n
380	call tokenize_v2('', 0, tokens, n)
381	call assert_eq('empty: count', 0, n)
382	end subroutine
383
384	subroutine test_whitespace_only()
385	type(hl_token_t) :: tokens(MT)
386	integer :: n
387	character(len=4) :: input
388	input = ' '
389	call tokenize_v2(input, 4, tokens, n)
390	call assert_eq('whitespace: count', 0, n)
391	end subroutine
392
393	subroutine test_complex_pipeline()
394	type(hl_token_t) :: tokens(MT)
395	integer :: n
396	character(len=32) :: input
397	! ls -la \| grep foo \| wc -l
398	input = 'ls -la \| grep foo \| wc -l'
399	call tokenize_v2(input, 25, tokens, n)
400	call assert_eq('pipeline: count', 8, n)
401	call assert_eq('pipeline/ls type', HTOK_COMMAND_VALID, tokens(1)%token_type)
402	call assert_eq('pipeline/-la type', HTOK_OPTION, tokens(2)%token_type)
403	call assert_eq('pipeline/\|1 type', HTOK_OPERATOR, tokens(3)%token_type)
404	call assert_eq('pipeline/grep type', HTOK_COMMAND_VALID, tokens(4)%token_type)
405	call assert_eq('pipeline/foo type', HTOK_DEFAULT, tokens(5)%token_type)
406	call assert_eq('pipeline/\|2 type', HTOK_OPERATOR, tokens(6)%token_type)
407	call assert_eq('pipeline/wc type', HTOK_COMMAND_VALID, tokens(7)%token_type)
408	call assert_eq('pipeline/-l type', HTOK_OPTION, tokens(8)%token_type)
409	end subroutine
410
411	subroutine test_keyword_resets_cmd_pos()
412	type(hl_token_t) :: tokens(MT)
413	integer :: n
414	character(len=32) :: input
415	! 'then' and 'else' should reset cmd pos so next word is a command
416	input = 'if true; then ls; else pwd; fi'
417	call tokenize_v2(input, 30, tokens, n)
418	! if=KW true=CMD ;=OP then=KW ls=CMD ;=OP else=KW pwd=BUILTIN ;=OP fi=KW
419	call assert_eq('kw_reset: count', 10, n)
420	call assert_eq('kw_reset/if', HTOK_KEYWORD, tokens(1)%token_type)
421	call assert_eq('kw_reset/then', HTOK_KEYWORD, tokens(4)%token_type)
422	call assert_eq('kw_reset/ls', HTOK_COMMAND_VALID, tokens(5)%token_type)
423	call assert_eq('kw_reset/else', HTOK_KEYWORD, tokens(7)%token_type)
424	call assert_eq('kw_reset/pwd', HTOK_BUILTIN, tokens(8)%token_type)
425	call assert_eq('kw_reset/fi', HTOK_KEYWORD, tokens(10)%token_type)
426	end subroutine
427
428	subroutine test_case_esac()
429	type(hl_token_t) :: tokens(MT)
430	integer :: n
431	character(len=16) :: input
432	input = 'case esac'
433	call tokenize_v2(input, 9, tokens, n)
434	call assert_eq('case_esac: count', 2, n)
435	call assert_eq('case_esac/case', HTOK_KEYWORD, tokens(1)%token_type)
436	! esac is always recognized as a keyword regardless of command position
437	call assert_eq('case_esac/esac', HTOK_KEYWORD, tokens(2)%token_type)
438	end subroutine
439
440	! C-style for loop: for ((i=0; i<3; i++)) do echo $i; done
441	subroutine test_c_style_for_loop()
442	character(len=256) :: input
443	type(hl_token_t) :: tokens(MT)
444	integer :: n
445
446	input = 'for ((i=0; i<3; i++)) do echo $i; done'
447	call tokenize_v2(input, 38, tokens, n)
448	! for = KEYWORD, ((...)) = NUMBER (arithmetic), do = KEYWORD,
449	! echo = BUILTIN, $i = VARIABLE, ; = OPERATOR, done = KEYWORD
450	call assert_eq('cfor: for', HTOK_KEYWORD, tokens(1)%token_type)
451	call assert_eq('cfor: (())', HTOK_NUMBER, tokens(2)%token_type)
452	call assert_eq('cfor: do', HTOK_KEYWORD, tokens(3)%token_type)
453	call assert_eq('cfor: echo', HTOK_BUILTIN, tokens(4)%token_type)
454	call assert_eq('cfor: $i', HTOK_VARIABLE, tokens(5)%token_type)
455	call assert_eq('cfor: ;', HTOK_OPERATOR, tokens(6)%token_type)
456	call assert_eq('cfor: done', HTOK_KEYWORD, tokens(7)%token_type)
457	end subroutine
458
459	! Keywords recognized even outside command position
460	subroutine test_keyword_outside_cmd_pos()
461	character(len=256) :: input
462	type(hl_token_t) :: tokens(MT)
463	integer :: n
464
465	! 'echo done' — done after a builtin should still be keyword
466	input = 'echo done'
467	call tokenize_v2(input, 9, tokens, n)
468	call assert_eq('kw_nonpos: echo', HTOK_BUILTIN, tokens(1)%token_type)
469	call assert_eq('kw_nonpos: done', HTOK_KEYWORD, tokens(2)%token_type)
470	end subroutine
471
472	end program test_syntax_highlight
473