| 1 | module syntax_highlighter_module |
| 2 | use iso_fortran_env, only: int32 |
| 3 | implicit none |
| 4 | private |
| 5 | |
| 6 | public :: syntax_highlighter_t |
| 7 | public :: token_t |
| 8 | public :: init_highlighter, cleanup_highlighter |
| 9 | public :: tokenize_line, get_token_color |
| 10 | public :: detect_language |
| 11 | public :: TOKEN_PLAIN, TOKEN_KEYWORD, TOKEN_STRING, TOKEN_NUMBER |
| 12 | public :: TOKEN_COMMENT, TOKEN_OPERATOR, TOKEN_TYPE, TOKEN_FUNCTION |
| 13 | public :: TOKEN_PREPROCESSOR |
| 14 | |
| 15 | ! Token types as integer parameters |
| 16 | integer, parameter :: TOKEN_PLAIN = 0 |
| 17 | integer, parameter :: TOKEN_KEYWORD = 1 |
| 18 | integer, parameter :: TOKEN_STRING = 2 |
| 19 | integer, parameter :: TOKEN_NUMBER = 3 |
| 20 | integer, parameter :: TOKEN_COMMENT = 4 |
| 21 | integer, parameter :: TOKEN_OPERATOR = 5 |
| 22 | integer, parameter :: TOKEN_TYPE = 6 |
| 23 | integer, parameter :: TOKEN_FUNCTION = 7 |
| 24 | integer, parameter :: TOKEN_PREPROCESSOR = 8 |
| 25 | |
| 26 | ! Token structure |
| 27 | type :: token_t |
| 28 | integer :: type = TOKEN_PLAIN |
| 29 | integer :: start_col |
| 30 | integer :: end_col |
| 31 | end type token_t |
| 32 | |
| 33 | ! Language definition |
| 34 | type :: language_def_t |
| 35 | character(len=32) :: name = "" |
| 36 | character(len=16), allocatable :: extensions(:) |
| 37 | character(len=64), allocatable :: keywords(:) |
| 38 | character(len=64), allocatable :: types(:) |
| 39 | character(len=8) :: comment_single = "" |
| 40 | character(len=8) :: comment_start = "" |
| 41 | character(len=8) :: comment_end = "" |
| 42 | character(len=4), allocatable :: string_delimiters(:) |
| 43 | character(len=4), allocatable :: operators(:) |
| 44 | logical :: case_sensitive = .true. |
| 45 | end type language_def_t |
| 46 | |
| 47 | ! Main highlighter type |
| 48 | type :: syntax_highlighter_t |
| 49 | type(language_def_t) :: current_lang |
| 50 | logical :: enabled = .false. |
| 51 | logical :: in_multiline_comment = .false. |
| 52 | logical :: in_multiline_string = .false. |
| 53 | character(len=4) :: string_delimiter = "" |
| 54 | end type syntax_highlighter_t |
| 55 | |
| 56 | ! Color mapping (ANSI escape codes) |
| 57 | character(len=*), parameter :: COLOR_KEYWORD = char(27) // '[1;34m' ! Bold Blue |
| 58 | character(len=*), parameter :: COLOR_STRING = char(27) // '[32m' ! Green |
| 59 | character(len=*), parameter :: COLOR_NUMBER = char(27) // '[35m' ! Magenta |
| 60 | character(len=*), parameter :: COLOR_COMMENT = char(27) // '[90m' ! Gray |
| 61 | character(len=*), parameter :: COLOR_OPERATOR = char(27) // '[33m' ! Yellow |
| 62 | character(len=*), parameter :: COLOR_TYPE = char(27) // '[36m' ! Cyan |
| 63 | character(len=*), parameter :: COLOR_FUNCTION = char(27) // '[1;36m' ! Bold Cyan |
| 64 | character(len=*), parameter :: COLOR_PREPROC = char(27) // '[95m' ! Light Magenta |
| 65 | character(len=*), parameter :: COLOR_RESET = char(27) // '[0m' |
| 66 | |
| 67 | contains |
| 68 | |
| 69 | subroutine init_highlighter(highlighter, filename) |
| 70 | type(syntax_highlighter_t), intent(out) :: highlighter |
| 71 | character(len=*), intent(in), optional :: filename |
| 72 | |
| 73 | highlighter%enabled = .false. |
| 74 | highlighter%in_multiline_comment = .false. |
| 75 | highlighter%in_multiline_string = .false. |
| 76 | |
| 77 | if (present(filename)) then |
| 78 | call detect_language(highlighter, filename) |
| 79 | end if |
| 80 | end subroutine init_highlighter |
| 81 | |
| 82 | subroutine cleanup_highlighter(highlighter) |
| 83 | type(syntax_highlighter_t), intent(inout) :: highlighter |
| 84 | |
| 85 | if (allocated(highlighter%current_lang%extensions)) & |
| 86 | deallocate(highlighter%current_lang%extensions) |
| 87 | if (allocated(highlighter%current_lang%keywords)) & |
| 88 | deallocate(highlighter%current_lang%keywords) |
| 89 | if (allocated(highlighter%current_lang%types)) & |
| 90 | deallocate(highlighter%current_lang%types) |
| 91 | if (allocated(highlighter%current_lang%string_delimiters)) & |
| 92 | deallocate(highlighter%current_lang%string_delimiters) |
| 93 | if (allocated(highlighter%current_lang%operators)) & |
| 94 | deallocate(highlighter%current_lang%operators) |
| 95 | end subroutine cleanup_highlighter |
| 96 | |
| 97 | subroutine detect_language(highlighter, filename) |
| 98 | type(syntax_highlighter_t), intent(inout) :: highlighter |
| 99 | character(len=*), intent(in) :: filename |
| 100 | character(len=:), allocatable :: extension |
| 101 | integer :: dot_pos |
| 102 | |
| 103 | ! Find file extension |
| 104 | dot_pos = index(filename, '.', back=.true.) |
| 105 | if (dot_pos > 0) then |
| 106 | extension = filename(dot_pos:) |
| 107 | |
| 108 | select case(extension) |
| 109 | case('.f90', '.f95', '.f03', '.f08', '.f18') |
| 110 | call load_fortran_syntax(highlighter) |
| 111 | case('.py', '.pyw') |
| 112 | call load_python_syntax(highlighter) |
| 113 | case('.c', '.h') |
| 114 | call load_c_syntax(highlighter) |
| 115 | case('.cpp', '.cc', '.cxx', '.hpp', '.hxx') |
| 116 | call load_cpp_syntax(highlighter) |
| 117 | case('.rs') |
| 118 | call load_rust_syntax(highlighter) |
| 119 | case('.go') |
| 120 | call load_go_syntax(highlighter) |
| 121 | case('.js', '.jsx', '.mjs') |
| 122 | call load_javascript_syntax(highlighter) |
| 123 | case('.ts', '.tsx') |
| 124 | call load_typescript_syntax(highlighter) |
| 125 | case('.sh', '.bash') |
| 126 | call load_bash_syntax(highlighter) |
| 127 | case('.md', '.markdown') |
| 128 | call load_markdown_syntax(highlighter) |
| 129 | case default |
| 130 | highlighter%enabled = .false. |
| 131 | end select |
| 132 | end if |
| 133 | end subroutine detect_language |
| 134 | |
| 135 | subroutine tokenize_line(highlighter, line, tokens) |
| 136 | type(syntax_highlighter_t), intent(inout) :: highlighter |
| 137 | character(len=*), intent(in) :: line |
| 138 | type(token_t), allocatable, intent(out) :: tokens(:) |
| 139 | integer :: i, j, line_len, token_count |
| 140 | logical :: in_string, in_comment |
| 141 | character(len=1) :: ch |
| 142 | character(len=:), allocatable :: word |
| 143 | |
| 144 | if (.not. highlighter%enabled) then |
| 145 | allocate(tokens(1)) |
| 146 | tokens(1)%type = TOKEN_PLAIN |
| 147 | tokens(1)%start_col = 1 |
| 148 | tokens(1)%end_col = len(line) |
| 149 | return |
| 150 | end if |
| 151 | |
| 152 | line_len = len(line) |
| 153 | allocate(tokens(line_len)) ! Worst case: each char is a token |
| 154 | token_count = 0 |
| 155 | i = 1 |
| 156 | |
| 157 | ! Handle multiline comment continuation |
| 158 | if (highlighter%in_multiline_comment) then |
| 159 | call process_multiline_comment(highlighter, line, tokens, token_count, i) |
| 160 | end if |
| 161 | |
| 162 | ! Handle multiline string continuation |
| 163 | if (highlighter%in_multiline_string) then |
| 164 | call process_multiline_string(highlighter, line, tokens, token_count, i) |
| 165 | end if |
| 166 | |
| 167 | ! Process rest of line |
| 168 | do while (i <= line_len) |
| 169 | ch = line(i:i) |
| 170 | |
| 171 | ! Check for single-line comment |
| 172 | if (check_comment_start(highlighter, line, i)) then |
| 173 | token_count = token_count + 1 |
| 174 | tokens(token_count)%type = TOKEN_COMMENT |
| 175 | tokens(token_count)%start_col = i |
| 176 | tokens(token_count)%end_col = line_len |
| 177 | exit |
| 178 | end if |
| 179 | |
| 180 | ! Check for string |
| 181 | if (check_string_start(highlighter, line, i)) then |
| 182 | call process_string(highlighter, line, tokens, token_count, i) |
| 183 | |
| 184 | ! Check for number |
| 185 | else if (is_digit(ch) .or. (ch == '.' .and. i < line_len .and. is_digit(line(i+1:i+1)))) then |
| 186 | call process_number(line, tokens, token_count, i) |
| 187 | |
| 188 | ! Check for word (keyword, type, identifier) |
| 189 | else if (is_alpha(ch) .or. ch == '_') then |
| 190 | call process_word(highlighter, line, tokens, token_count, i) |
| 191 | |
| 192 | ! Check for operator |
| 193 | else if (is_operator_char(highlighter, ch)) then |
| 194 | token_count = token_count + 1 |
| 195 | tokens(token_count)%type = TOKEN_OPERATOR |
| 196 | tokens(token_count)%start_col = i |
| 197 | tokens(token_count)%end_col = i |
| 198 | i = i + 1 |
| 199 | |
| 200 | ! Plain character (space, etc.) |
| 201 | else |
| 202 | token_count = token_count + 1 |
| 203 | tokens(token_count)%type = TOKEN_PLAIN |
| 204 | tokens(token_count)%start_col = i |
| 205 | tokens(token_count)%end_col = i |
| 206 | i = i + 1 |
| 207 | end if |
| 208 | end do |
| 209 | |
| 210 | ! Resize tokens array |
| 211 | if (token_count > 0) then |
| 212 | tokens = tokens(1:token_count) |
| 213 | else |
| 214 | ! tokens is already allocated, just resize to 1 element |
| 215 | deallocate(tokens) |
| 216 | allocate(tokens(1)) |
| 217 | tokens(1)%type = TOKEN_PLAIN |
| 218 | tokens(1)%start_col = 1 |
| 219 | tokens(1)%end_col = max(1, line_len) |
| 220 | end if |
| 221 | end subroutine tokenize_line |
| 222 | |
| 223 | function get_token_color(tok_type) result(color) |
| 224 | integer, intent(in) :: tok_type |
| 225 | character(len=:), allocatable :: color |
| 226 | |
| 227 | select case(tok_type) |
| 228 | case(TOKEN_KEYWORD) |
| 229 | color = COLOR_KEYWORD |
| 230 | case(TOKEN_STRING) |
| 231 | color = COLOR_STRING |
| 232 | case(TOKEN_NUMBER) |
| 233 | color = COLOR_NUMBER |
| 234 | case(TOKEN_COMMENT) |
| 235 | color = COLOR_COMMENT |
| 236 | case(TOKEN_OPERATOR) |
| 237 | color = COLOR_OPERATOR |
| 238 | case(TOKEN_TYPE) |
| 239 | color = COLOR_TYPE |
| 240 | case(TOKEN_FUNCTION) |
| 241 | color = COLOR_FUNCTION |
| 242 | case(TOKEN_PREPROCESSOR) |
| 243 | color = COLOR_PREPROC |
| 244 | case(TOKEN_PLAIN) |
| 245 | color = "" |
| 246 | case default |
| 247 | color = "" |
| 248 | end select |
| 249 | end function get_token_color |
| 250 | |
| 251 | ! Language-specific loading routines |
| 252 | subroutine load_fortran_syntax(highlighter) |
| 253 | type(syntax_highlighter_t), intent(inout) :: highlighter |
| 254 | |
| 255 | highlighter%current_lang%name = "fortran" |
| 256 | highlighter%current_lang%case_sensitive = .false. |
| 257 | |
| 258 | ! Keywords |
| 259 | allocate(highlighter%current_lang%keywords(58)) |
| 260 | highlighter%current_lang%keywords = [ & |
| 261 | "program ", "end ", "subroutine ", "function ", & |
| 262 | "module ", "use ", "implicit ", "none ", & |
| 263 | "if ", "then ", "else ", "elseif ", & |
| 264 | "endif ", "do ", "while ", "enddo ", & |
| 265 | "select ", "case ", "default ", "endselect ", & |
| 266 | "where ", "elsewhere ", "endwhere ", "forall ", & |
| 267 | "call ", "return ", "contains ", "interface ", & |
| 268 | "abstract ", "allocate ", "deallocate ", "allocatable ", & |
| 269 | "intent ", "in ", "out ", "inout ", & |
| 270 | "optional ", "parameter ", "save ", "pointer ", & |
| 271 | "target ", "public ", "private ", "protected ", & |
| 272 | "bind ", "import ", "only ", "operator ", & |
| 273 | "assignment ", "generic ", "final ", "extends ", & |
| 274 | "class ", "type ", "endtype ", "enum ", & |
| 275 | "enumerator ", "namelist " & |
| 276 | ] |
| 277 | |
| 278 | ! Types |
| 279 | allocate(highlighter%current_lang%types(10)) |
| 280 | highlighter%current_lang%types = [ & |
| 281 | "integer ", "real ", "complex ", "logical ", & |
| 282 | "character ", "double ", "precision ", "int32 ", & |
| 283 | "int64 ", "real64 " & |
| 284 | ] |
| 285 | |
| 286 | ! Comments |
| 287 | highlighter%current_lang%comment_single = "!" |
| 288 | |
| 289 | ! String delimiters |
| 290 | allocate(highlighter%current_lang%string_delimiters(2)) |
| 291 | highlighter%current_lang%string_delimiters = ['"', "'"] |
| 292 | |
| 293 | ! Operators |
| 294 | allocate(highlighter%current_lang%operators(20)) |
| 295 | highlighter%current_lang%operators = [ & |
| 296 | "+ ", "- ", "* ", "/ ", "** ", "= ", & |
| 297 | "== ", "/= ", "< ", "> ", "<= ", ">= ", & |
| 298 | ".and", ".or.", ".not", ".eq.", ".ne.", ".lt.", & |
| 299 | ".gt.", ".le." & |
| 300 | ] |
| 301 | |
| 302 | highlighter%enabled = .true. |
| 303 | end subroutine load_fortran_syntax |
| 304 | |
| 305 | subroutine load_python_syntax(highlighter) |
| 306 | type(syntax_highlighter_t), intent(inout) :: highlighter |
| 307 | |
| 308 | highlighter%current_lang%name = "python" |
| 309 | highlighter%current_lang%case_sensitive = .true. |
| 310 | |
| 311 | ! Keywords |
| 312 | allocate(highlighter%current_lang%keywords(35)) |
| 313 | highlighter%current_lang%keywords = [ & |
| 314 | "def ", "class ", "if ", "elif ", & |
| 315 | "else ", "for ", "while ", "break ", & |
| 316 | "continue ", "return ", "yield ", "import ", & |
| 317 | "from ", "as ", "try ", "except ", & |
| 318 | "finally ", "raise ", "with ", "assert ", & |
| 319 | "lambda ", "pass ", "del ", "global ", & |
| 320 | "nonlocal ", "in ", "is ", "and ", & |
| 321 | "or ", "not ", "True ", "False ", & |
| 322 | "None ", "async ", "await " & |
| 323 | ] |
| 324 | |
| 325 | ! Types |
| 326 | allocate(highlighter%current_lang%types(8)) |
| 327 | highlighter%current_lang%types = [ & |
| 328 | "int ", "float ", "str ", "bool ", & |
| 329 | "list ", "dict ", "tuple ", "set " & |
| 330 | ] |
| 331 | |
| 332 | highlighter%current_lang%comment_single = "#" |
| 333 | |
| 334 | ! String delimiters |
| 335 | allocate(highlighter%current_lang%string_delimiters(4)) |
| 336 | highlighter%current_lang%string_delimiters = ['" ', "' ", '""" ', "''' "] |
| 337 | |
| 338 | ! Operators |
| 339 | allocate(highlighter%current_lang%operators(15)) |
| 340 | highlighter%current_lang%operators = [ & |
| 341 | "+ ", "- ", "* ", "/ ", "// ", "% ", "** ", & |
| 342 | "= ", "== ", "!= ", "< ", "> ", "<= ", ">= ", & |
| 343 | "& " & |
| 344 | ] |
| 345 | |
| 346 | highlighter%enabled = .true. |
| 347 | end subroutine load_python_syntax |
| 348 | |
| 349 | ! Helper functions |
| 350 | function is_alpha(ch) result(res) |
| 351 | character(len=1), intent(in) :: ch |
| 352 | logical :: res |
| 353 | res = (ch >= 'a' .and. ch <= 'z') .or. (ch >= 'A' .and. ch <= 'Z') |
| 354 | end function is_alpha |
| 355 | |
| 356 | function is_digit(ch) result(res) |
| 357 | character(len=1), intent(in) :: ch |
| 358 | logical :: res |
| 359 | res = (ch >= '0' .and. ch <= '9') |
| 360 | end function is_digit |
| 361 | |
| 362 | function is_alnum(ch) result(res) |
| 363 | character(len=1), intent(in) :: ch |
| 364 | logical :: res |
| 365 | res = is_alpha(ch) .or. is_digit(ch) .or. ch == '_' |
| 366 | end function is_alnum |
| 367 | |
| 368 | function is_operator_char(highlighter, ch) result(res) |
| 369 | type(syntax_highlighter_t), intent(in) :: highlighter |
| 370 | character(len=1), intent(in) :: ch |
| 371 | logical :: res |
| 372 | integer :: i |
| 373 | |
| 374 | res = .false. |
| 375 | if (.not. allocated(highlighter%current_lang%operators)) return |
| 376 | |
| 377 | do i = 1, size(highlighter%current_lang%operators) |
| 378 | if (index(trim(highlighter%current_lang%operators(i)), ch) > 0) then |
| 379 | res = .true. |
| 380 | exit |
| 381 | end if |
| 382 | end do |
| 383 | end function is_operator_char |
| 384 | |
| 385 | function check_comment_start(highlighter, line, pos) result(res) |
| 386 | type(syntax_highlighter_t), intent(in) :: highlighter |
| 387 | character(len=*), intent(in) :: line |
| 388 | integer, intent(in) :: pos |
| 389 | logical :: res |
| 390 | integer :: comment_len |
| 391 | |
| 392 | res = .false. |
| 393 | if (highlighter%current_lang%comment_single /= "") then |
| 394 | comment_len = len_trim(highlighter%current_lang%comment_single) |
| 395 | if (pos + comment_len - 1 <= len(line)) then |
| 396 | res = line(pos:pos+comment_len-1) == trim(highlighter%current_lang%comment_single) |
| 397 | end if |
| 398 | end if |
| 399 | end function check_comment_start |
| 400 | |
| 401 | function check_string_start(highlighter, line, pos) result(res) |
| 402 | type(syntax_highlighter_t), intent(in) :: highlighter |
| 403 | character(len=*), intent(in) :: line |
| 404 | integer, intent(in) :: pos |
| 405 | logical :: res |
| 406 | integer :: i, delim_len |
| 407 | |
| 408 | res = .false. |
| 409 | if (.not. allocated(highlighter%current_lang%string_delimiters)) return |
| 410 | |
| 411 | do i = 1, size(highlighter%current_lang%string_delimiters) |
| 412 | delim_len = len_trim(highlighter%current_lang%string_delimiters(i)) |
| 413 | if (pos + delim_len - 1 <= len(line)) then |
| 414 | if (line(pos:pos+delim_len-1) == trim(highlighter%current_lang%string_delimiters(i))) then |
| 415 | res = .true. |
| 416 | exit |
| 417 | end if |
| 418 | end if |
| 419 | end do |
| 420 | end function check_string_start |
| 421 | |
| 422 | subroutine process_string(highlighter, line, tokens, token_count, pos) |
| 423 | type(syntax_highlighter_t), intent(inout) :: highlighter |
| 424 | character(len=*), intent(in) :: line |
| 425 | type(token_t), intent(inout) :: tokens(:) |
| 426 | integer, intent(inout) :: token_count, pos |
| 427 | integer :: i, start_pos, delim_len, line_len |
| 428 | character(len=:), allocatable :: delimiter |
| 429 | logical :: found_end |
| 430 | |
| 431 | line_len = len(line) |
| 432 | start_pos = pos |
| 433 | |
| 434 | ! Find which delimiter matches |
| 435 | do i = 1, size(highlighter%current_lang%string_delimiters) |
| 436 | delim_len = len_trim(highlighter%current_lang%string_delimiters(i)) |
| 437 | if (pos + delim_len - 1 <= line_len) then |
| 438 | if (line(pos:pos+delim_len-1) == trim(highlighter%current_lang%string_delimiters(i))) then |
| 439 | delimiter = trim(highlighter%current_lang%string_delimiters(i)) |
| 440 | exit |
| 441 | end if |
| 442 | end if |
| 443 | end do |
| 444 | |
| 445 | ! Move past opening delimiter |
| 446 | pos = pos + len(delimiter) |
| 447 | |
| 448 | ! Find closing delimiter |
| 449 | found_end = .false. |
| 450 | do while (pos <= line_len) |
| 451 | if (line(pos:pos) == '\' .and. pos < line_len) then |
| 452 | ! Skip escaped character |
| 453 | pos = pos + 2 |
| 454 | else if (pos + len(delimiter) - 1 <= line_len) then |
| 455 | if (line(pos:pos+len(delimiter)-1) == delimiter) then |
| 456 | pos = pos + len(delimiter) |
| 457 | found_end = .true. |
| 458 | exit |
| 459 | else |
| 460 | pos = pos + 1 |
| 461 | end if |
| 462 | else |
| 463 | pos = pos + 1 |
| 464 | end if |
| 465 | end do |
| 466 | |
| 467 | ! Add string token |
| 468 | token_count = token_count + 1 |
| 469 | tokens(token_count)%type = TOKEN_STRING |
| 470 | tokens(token_count)%start_col = start_pos |
| 471 | tokens(token_count)%end_col = min(pos - 1, line_len) |
| 472 | |
| 473 | ! Handle unclosed string |
| 474 | if (.not. found_end) then |
| 475 | pos = line_len + 1 |
| 476 | end if |
| 477 | end subroutine process_string |
| 478 | |
| 479 | subroutine process_number(line, tokens, token_count, pos) |
| 480 | character(len=*), intent(in) :: line |
| 481 | type(token_t), intent(inout) :: tokens(:) |
| 482 | integer, intent(inout) :: token_count, pos |
| 483 | integer :: start_pos |
| 484 | logical :: has_dot, has_e |
| 485 | |
| 486 | start_pos = pos |
| 487 | has_dot = .false. |
| 488 | has_e = .false. |
| 489 | |
| 490 | do while (pos <= len(line)) |
| 491 | if (is_digit(line(pos:pos))) then |
| 492 | pos = pos + 1 |
| 493 | else if (line(pos:pos) == '.' .and. .not. has_dot .and. .not. has_e) then |
| 494 | has_dot = .true. |
| 495 | pos = pos + 1 |
| 496 | else if ((line(pos:pos) == 'e' .or. line(pos:pos) == 'E') .and. .not. has_e) then |
| 497 | has_e = .true. |
| 498 | pos = pos + 1 |
| 499 | if (pos <= len(line) .and. (line(pos:pos) == '+' .or. line(pos:pos) == '-')) then |
| 500 | pos = pos + 1 |
| 501 | end if |
| 502 | else |
| 503 | exit |
| 504 | end if |
| 505 | end do |
| 506 | |
| 507 | token_count = token_count + 1 |
| 508 | tokens(token_count)%type = TOKEN_NUMBER |
| 509 | tokens(token_count)%start_col = start_pos |
| 510 | tokens(token_count)%end_col = pos - 1 |
| 511 | end subroutine process_number |
| 512 | |
| 513 | subroutine process_word(highlighter, line, tokens, token_count, pos) |
| 514 | type(syntax_highlighter_t), intent(in) :: highlighter |
| 515 | character(len=*), intent(in) :: line |
| 516 | type(token_t), intent(inout) :: tokens(:) |
| 517 | integer, intent(inout) :: token_count, pos |
| 518 | integer :: start_pos, end_pos, i |
| 519 | character(len=:), allocatable :: word |
| 520 | logical :: is_keyword, is_type |
| 521 | |
| 522 | start_pos = pos |
| 523 | |
| 524 | ! Find end of word |
| 525 | do while (pos <= len(line) .and. is_alnum(line(pos:pos))) |
| 526 | pos = pos + 1 |
| 527 | end do |
| 528 | end_pos = pos - 1 |
| 529 | |
| 530 | word = line(start_pos:end_pos) |
| 531 | |
| 532 | ! Check if it's a keyword |
| 533 | is_keyword = .false. |
| 534 | if (allocated(highlighter%current_lang%keywords)) then |
| 535 | do i = 1, size(highlighter%current_lang%keywords) |
| 536 | if (compare_word(word, highlighter%current_lang%keywords(i), & |
| 537 | highlighter%current_lang%case_sensitive)) then |
| 538 | is_keyword = .true. |
| 539 | exit |
| 540 | end if |
| 541 | end do |
| 542 | end if |
| 543 | |
| 544 | ! Check if it's a type |
| 545 | is_type = .false. |
| 546 | if (.not. is_keyword .and. allocated(highlighter%current_lang%types)) then |
| 547 | do i = 1, size(highlighter%current_lang%types) |
| 548 | if (compare_word(word, highlighter%current_lang%types(i), & |
| 549 | highlighter%current_lang%case_sensitive)) then |
| 550 | is_type = .true. |
| 551 | exit |
| 552 | end if |
| 553 | end do |
| 554 | end if |
| 555 | |
| 556 | token_count = token_count + 1 |
| 557 | tokens(token_count)%start_col = start_pos |
| 558 | tokens(token_count)%end_col = end_pos |
| 559 | |
| 560 | if (is_keyword) then |
| 561 | tokens(token_count)%type = TOKEN_KEYWORD |
| 562 | else if (is_type) then |
| 563 | tokens(token_count)%type = TOKEN_TYPE |
| 564 | else |
| 565 | tokens(token_count)%type = TOKEN_PLAIN |
| 566 | end if |
| 567 | end subroutine process_word |
| 568 | |
| 569 | function compare_word(word1, word2, case_sensitive) result(match) |
| 570 | character(len=*), intent(in) :: word1, word2 |
| 571 | logical, intent(in) :: case_sensitive |
| 572 | logical :: match |
| 573 | |
| 574 | if (case_sensitive) then |
| 575 | match = trim(word1) == trim(word2) |
| 576 | else |
| 577 | match = to_lower(trim(word1)) == to_lower(trim(word2)) |
| 578 | end if |
| 579 | end function compare_word |
| 580 | |
| 581 | function to_lower(str) result(lower_str) |
| 582 | character(len=*), intent(in) :: str |
| 583 | character(len=len(str)) :: lower_str |
| 584 | integer :: i |
| 585 | |
| 586 | lower_str = str |
| 587 | do i = 1, len(str) |
| 588 | if (str(i:i) >= 'A' .and. str(i:i) <= 'Z') then |
| 589 | lower_str(i:i) = char(ichar(str(i:i)) + 32) |
| 590 | end if |
| 591 | end do |
| 592 | end function to_lower |
| 593 | |
| 594 | ! C language support |
| 595 | subroutine load_c_syntax(highlighter) |
| 596 | type(syntax_highlighter_t), intent(inout) :: highlighter |
| 597 | |
| 598 | highlighter%current_lang%name = "c" |
| 599 | highlighter%current_lang%case_sensitive = .true. |
| 600 | |
| 601 | ! Keywords |
| 602 | allocate(highlighter%current_lang%keywords(32)) |
| 603 | highlighter%current_lang%keywords = [ & |
| 604 | "auto ", "break ", "case ", "char ", & |
| 605 | "const ", "continue ", "default ", "do ", & |
| 606 | "double ", "else ", "enum ", "extern ", & |
| 607 | "float ", "for ", "goto ", "if ", & |
| 608 | "inline ", "int ", "long ", "register ", & |
| 609 | "return ", "short ", "signed ", "sizeof ", & |
| 610 | "static ", "struct ", "switch ", "typedef ", & |
| 611 | "union ", "unsigned ", "void ", "while " & |
| 612 | ] |
| 613 | |
| 614 | ! Types |
| 615 | allocate(highlighter%current_lang%types(8)) |
| 616 | highlighter%current_lang%types = [ & |
| 617 | "size_t ", "uint32_t ", "int32_t ", "uint64_t ", & |
| 618 | "int64_t ", "bool ", "FILE ", "NULL " & |
| 619 | ] |
| 620 | |
| 621 | highlighter%current_lang%comment_single = "//" |
| 622 | highlighter%current_lang%comment_start = "/*" |
| 623 | highlighter%current_lang%comment_end = "*/" |
| 624 | |
| 625 | allocate(highlighter%current_lang%string_delimiters(2)) |
| 626 | highlighter%current_lang%string_delimiters = ['"', "'"] |
| 627 | |
| 628 | allocate(highlighter%current_lang%operators(20)) |
| 629 | highlighter%current_lang%operators = [ & |
| 630 | "+ ", "- ", "* ", "/ ", "% ", "= ", & |
| 631 | "== ", "!= ", "< ", "> ", "<= ", ">= ", & |
| 632 | "&& ", "|| ", "! ", "& ", "| ", "^ ", & |
| 633 | "<< ", ">> " & |
| 634 | ] |
| 635 | |
| 636 | highlighter%enabled = .true. |
| 637 | end subroutine load_c_syntax |
| 638 | |
| 639 | ! C++ language support |
| 640 | subroutine load_cpp_syntax(highlighter) |
| 641 | type(syntax_highlighter_t), intent(inout) :: highlighter |
| 642 | |
| 643 | highlighter%current_lang%name = "cpp" |
| 644 | highlighter%current_lang%case_sensitive = .true. |
| 645 | |
| 646 | ! Keywords (C++ specific + C keywords) |
| 647 | allocate(highlighter%current_lang%keywords(48)) |
| 648 | highlighter%current_lang%keywords = [ & |
| 649 | "auto ", "break ", "case ", "char ", & |
| 650 | "const ", "continue ", "default ", "do ", & |
| 651 | "double ", "else ", "enum ", "extern ", & |
| 652 | "float ", "for ", "goto ", "if ", & |
| 653 | "inline ", "int ", "long ", "register ", & |
| 654 | "return ", "short ", "signed ", "sizeof ", & |
| 655 | "static ", "struct ", "switch ", "typedef ", & |
| 656 | "union ", "unsigned ", "void ", "while ", & |
| 657 | "class ", "namespace ", "template ", "typename ", & |
| 658 | "new ", "delete ", "this ", "friend ", & |
| 659 | "virtual ", "override ", "final ", "public ", & |
| 660 | "private ", "protected ", "try ", "catch " & |
| 661 | ] |
| 662 | |
| 663 | ! Types |
| 664 | allocate(highlighter%current_lang%types(12)) |
| 665 | highlighter%current_lang%types = [ & |
| 666 | "std ", "string ", "vector ", "map ", & |
| 667 | "set ", "pair ", "unique_ptr ", "shared_ptr ", & |
| 668 | "nullptr ", "true ", "false ", "bool " & |
| 669 | ] |
| 670 | |
| 671 | highlighter%current_lang%comment_single = "//" |
| 672 | highlighter%current_lang%comment_start = "/*" |
| 673 | highlighter%current_lang%comment_end = "*/" |
| 674 | |
| 675 | allocate(highlighter%current_lang%string_delimiters(2)) |
| 676 | highlighter%current_lang%string_delimiters = ['"', "'"] |
| 677 | |
| 678 | allocate(highlighter%current_lang%operators(22)) |
| 679 | highlighter%current_lang%operators = [ & |
| 680 | "+ ", "- ", "* ", "/ ", "% ", "= ", & |
| 681 | "== ", "!= ", "< ", "> ", "<= ", ">= ", & |
| 682 | "&& ", "|| ", "! ", "& ", "| ", "^ ", & |
| 683 | "<< ", ">> ", ":: ", "-> " & |
| 684 | ] |
| 685 | |
| 686 | highlighter%enabled = .true. |
| 687 | end subroutine load_cpp_syntax |
| 688 | |
| 689 | ! Rust language support |
| 690 | subroutine load_rust_syntax(highlighter) |
| 691 | type(syntax_highlighter_t), intent(inout) :: highlighter |
| 692 | |
| 693 | highlighter%current_lang%name = "rust" |
| 694 | highlighter%current_lang%case_sensitive = .true. |
| 695 | |
| 696 | ! Keywords |
| 697 | allocate(highlighter%current_lang%keywords(40)) |
| 698 | highlighter%current_lang%keywords = [ & |
| 699 | "as ", "async ", "await ", "break ", & |
| 700 | "const ", "continue ", "crate ", "dyn ", & |
| 701 | "else ", "enum ", "extern ", "false ", & |
| 702 | "fn ", "for ", "if ", "impl ", & |
| 703 | "in ", "let ", "loop ", "match ", & |
| 704 | "mod ", "move ", "mut ", "pub ", & |
| 705 | "ref ", "return ", "self ", "Self ", & |
| 706 | "static ", "struct ", "super ", "trait ", & |
| 707 | "true ", "type ", "unsafe ", "use ", & |
| 708 | "where ", "while ", "async ", "await " & |
| 709 | ] |
| 710 | |
| 711 | ! Types |
| 712 | allocate(highlighter%current_lang%types(16)) |
| 713 | highlighter%current_lang%types = [ & |
| 714 | "i8 ", "i16 ", "i32 ", "i64 ", & |
| 715 | "i128 ", "u8 ", "u16 ", "u32 ", & |
| 716 | "u64 ", "u128 ", "f32 ", "f64 ", & |
| 717 | "bool ", "char ", "str ", "String " & |
| 718 | ] |
| 719 | |
| 720 | highlighter%current_lang%comment_single = "//" |
| 721 | highlighter%current_lang%comment_start = "/*" |
| 722 | highlighter%current_lang%comment_end = "*/" |
| 723 | |
| 724 | allocate(highlighter%current_lang%string_delimiters(2)) |
| 725 | highlighter%current_lang%string_delimiters = ['"', "'"] |
| 726 | |
| 727 | allocate(highlighter%current_lang%operators(20)) |
| 728 | highlighter%current_lang%operators = [ & |
| 729 | "+ ", "- ", "* ", "/ ", "% ", "= ", & |
| 730 | "== ", "!= ", "< ", "> ", "<= ", ">= ", & |
| 731 | "&& ", "|| ", "! ", "& ", "| ", "^ ", & |
| 732 | ":: ", "-> " & |
| 733 | ] |
| 734 | |
| 735 | highlighter%enabled = .true. |
| 736 | end subroutine load_rust_syntax |
| 737 | |
| 738 | ! Go language support |
| 739 | subroutine load_go_syntax(highlighter) |
| 740 | type(syntax_highlighter_t), intent(inout) :: highlighter |
| 741 | |
| 742 | highlighter%current_lang%name = "go" |
| 743 | highlighter%current_lang%case_sensitive = .true. |
| 744 | |
| 745 | ! Keywords |
| 746 | allocate(highlighter%current_lang%keywords(25)) |
| 747 | highlighter%current_lang%keywords = [ & |
| 748 | "break ", "case ", "chan ", "const ", & |
| 749 | "continue ", "default ", "defer ", "else ", & |
| 750 | "fallthrough ", "for ", "func ", "go ", & |
| 751 | "goto ", "if ", "import ", "interface ", & |
| 752 | "map ", "package ", "range ", "return ", & |
| 753 | "select ", "struct ", "switch ", "type ", & |
| 754 | "var " & |
| 755 | ] |
| 756 | |
| 757 | ! Types |
| 758 | allocate(highlighter%current_lang%types(15)) |
| 759 | highlighter%current_lang%types = [ & |
| 760 | "bool ", "byte ", "complex64 ", "complex128 ", & |
| 761 | "error ", "float32 ", "float64 ", "int ", & |
| 762 | "int8 ", "int16 ", "int32 ", "int64 ", & |
| 763 | "string ", "uint ", "nil " & |
| 764 | ] |
| 765 | |
| 766 | highlighter%current_lang%comment_single = "//" |
| 767 | highlighter%current_lang%comment_start = "/*" |
| 768 | highlighter%current_lang%comment_end = "*/" |
| 769 | |
| 770 | allocate(highlighter%current_lang%string_delimiters(3)) |
| 771 | highlighter%current_lang%string_delimiters = ['"', "'", '`'] |
| 772 | |
| 773 | allocate(highlighter%current_lang%operators(18)) |
| 774 | highlighter%current_lang%operators = [ & |
| 775 | "+ ", "- ", "* ", "/ ", "% ", "= ", & |
| 776 | "== ", "!= ", "< ", "> ", "<= ", ">= ", & |
| 777 | "&& ", "|| ", "! ", "& ", "| ", ":= " & |
| 778 | ] |
| 779 | |
| 780 | highlighter%enabled = .true. |
| 781 | end subroutine load_go_syntax |
| 782 | |
| 783 | ! JavaScript language support |
| 784 | subroutine load_javascript_syntax(highlighter) |
| 785 | type(syntax_highlighter_t), intent(inout) :: highlighter |
| 786 | |
| 787 | highlighter%current_lang%name = "javascript" |
| 788 | highlighter%current_lang%case_sensitive = .true. |
| 789 | |
| 790 | ! Keywords |
| 791 | allocate(highlighter%current_lang%keywords(38)) |
| 792 | highlighter%current_lang%keywords = [ & |
| 793 | "async ", "await ", "break ", "case ", & |
| 794 | "catch ", "class ", "const ", "continue ", & |
| 795 | "debugger ", "default ", "delete ", "do ", & |
| 796 | "else ", "export ", "extends ", "finally ", & |
| 797 | "for ", "function ", "if ", "import ", & |
| 798 | "in ", "instanceof ", "let ", "new ", & |
| 799 | "return ", "super ", "switch ", "this ", & |
| 800 | "throw ", "try ", "typeof ", "var ", & |
| 801 | "void ", "while ", "with ", "yield ", & |
| 802 | "true ", "false " & |
| 803 | ] |
| 804 | |
| 805 | ! Types |
| 806 | allocate(highlighter%current_lang%types(10)) |
| 807 | highlighter%current_lang%types = [ & |
| 808 | "null ", "undefined ", "Boolean ", "Number ", & |
| 809 | "String ", "Symbol ", "Object ", "Array ", & |
| 810 | "Function ", "Promise " & |
| 811 | ] |
| 812 | |
| 813 | highlighter%current_lang%comment_single = "//" |
| 814 | highlighter%current_lang%comment_start = "/*" |
| 815 | highlighter%current_lang%comment_end = "*/" |
| 816 | |
| 817 | allocate(highlighter%current_lang%string_delimiters(3)) |
| 818 | highlighter%current_lang%string_delimiters = ['"', "'", '`'] |
| 819 | |
| 820 | allocate(highlighter%current_lang%operators(20)) |
| 821 | highlighter%current_lang%operators = [ & |
| 822 | "+ ", "- ", "* ", "/ ", "% ", "= ", & |
| 823 | "== ", "=== ", "!= ", "!== ", "< ", "> ", & |
| 824 | "<= ", ">= ", "&& ", "|| ", "! ", "? ", & |
| 825 | ": ", "=> " & |
| 826 | ] |
| 827 | |
| 828 | highlighter%enabled = .true. |
| 829 | end subroutine load_javascript_syntax |
| 830 | |
| 831 | ! TypeScript language support (extends JavaScript) |
| 832 | subroutine load_typescript_syntax(highlighter) |
| 833 | type(syntax_highlighter_t), intent(inout) :: highlighter |
| 834 | |
| 835 | ! Start with JavaScript syntax |
| 836 | call load_javascript_syntax(highlighter) |
| 837 | highlighter%current_lang%name = "typescript" |
| 838 | |
| 839 | ! Add TypeScript-specific keywords |
| 840 | deallocate(highlighter%current_lang%keywords) |
| 841 | allocate(highlighter%current_lang%keywords(45)) |
| 842 | highlighter%current_lang%keywords = [ & |
| 843 | "async ", "await ", "break ", "case ", & |
| 844 | "catch ", "class ", "const ", "continue ", & |
| 845 | "debugger ", "default ", "delete ", "do ", & |
| 846 | "else ", "export ", "extends ", "finally ", & |
| 847 | "for ", "function ", "if ", "import ", & |
| 848 | "in ", "instanceof ", "let ", "new ", & |
| 849 | "return ", "super ", "switch ", "this ", & |
| 850 | "throw ", "try ", "typeof ", "var ", & |
| 851 | "void ", "while ", "with ", "yield ", & |
| 852 | "true ", "false ", "enum ", "interface ", & |
| 853 | "type ", "namespace ", "module ", "declare ", & |
| 854 | "abstract " & |
| 855 | ] |
| 856 | |
| 857 | ! Add TypeScript types |
| 858 | deallocate(highlighter%current_lang%types) |
| 859 | allocate(highlighter%current_lang%types(15)) |
| 860 | highlighter%current_lang%types = [ & |
| 861 | "null ", "undefined ", "boolean ", "number ", & |
| 862 | "string ", "symbol ", "object ", "any ", & |
| 863 | "unknown ", "never ", "void ", "Array ", & |
| 864 | "Function ", "Promise ", "ReadonlyArra" & |
| 865 | ] |
| 866 | |
| 867 | highlighter%enabled = .true. |
| 868 | end subroutine load_typescript_syntax |
| 869 | |
| 870 | ! Bash/Shell script support |
| 871 | subroutine load_bash_syntax(highlighter) |
| 872 | type(syntax_highlighter_t), intent(inout) :: highlighter |
| 873 | |
| 874 | highlighter%current_lang%name = "bash" |
| 875 | highlighter%current_lang%case_sensitive = .true. |
| 876 | |
| 877 | ! Keywords |
| 878 | allocate(highlighter%current_lang%keywords(30)) |
| 879 | highlighter%current_lang%keywords = [ & |
| 880 | "if ", "then ", "else ", "elif ", & |
| 881 | "fi ", "for ", "while ", "do ", & |
| 882 | "done ", "case ", "esac ", "function ", & |
| 883 | "return ", "break ", "continue ", "exit ", & |
| 884 | "export ", "source ", "alias ", "unset ", & |
| 885 | "shift ", "local ", "declare ", "readonly ", & |
| 886 | "echo ", "printf ", "read ", "cd ", & |
| 887 | "pwd ", "ls " & |
| 888 | ] |
| 889 | |
| 890 | ! Built-in variables/types |
| 891 | allocate(highlighter%current_lang%types(10)) |
| 892 | highlighter%current_lang%types = [ & |
| 893 | "$0 ", "$1 ", "$@ ", "$* ", & |
| 894 | "$# ", "$? ", "$$ ", "$! ", & |
| 895 | "true ", "false " & |
| 896 | ] |
| 897 | |
| 898 | highlighter%current_lang%comment_single = "#" |
| 899 | |
| 900 | allocate(highlighter%current_lang%string_delimiters(3)) |
| 901 | highlighter%current_lang%string_delimiters = ['"', "'", '`'] |
| 902 | |
| 903 | allocate(highlighter%current_lang%operators(15)) |
| 904 | highlighter%current_lang%operators = [ & |
| 905 | "= ", "== ", "!= ", "< ", "> ", & |
| 906 | "-eq ", "-ne ", "-lt ", "-gt ", "-le ", & |
| 907 | "-ge ", "&& ", "|| ", "| ", "& " & |
| 908 | ] |
| 909 | |
| 910 | highlighter%enabled = .true. |
| 911 | end subroutine load_bash_syntax |
| 912 | |
| 913 | ! Markdown support (special handling needed) |
| 914 | subroutine load_markdown_syntax(highlighter) |
| 915 | type(syntax_highlighter_t), intent(inout) :: highlighter |
| 916 | |
| 917 | highlighter%current_lang%name = "markdown" |
| 918 | highlighter%current_lang%case_sensitive = .true. |
| 919 | |
| 920 | ! Headers and emphasis markers as "keywords" |
| 921 | allocate(highlighter%current_lang%keywords(8)) |
| 922 | highlighter%current_lang%keywords = [ & |
| 923 | "# ", "## ", "### ", "#### ", & |
| 924 | "##### ", "###### ", "* ", "_ " & |
| 925 | ] |
| 926 | |
| 927 | ! Code block languages as "types" |
| 928 | allocate(highlighter%current_lang%types(10)) |
| 929 | highlighter%current_lang%types = [ & |
| 930 | "``` ", "```python ", "```bash ", "```fortran ", & |
| 931 | "```c ", "```cpp ", "```rust ", "```go ", & |
| 932 | "```javascri ", "```typescri " & |
| 933 | ] |
| 934 | |
| 935 | ! No traditional comments in markdown |
| 936 | highlighter%current_lang%comment_single = "" |
| 937 | |
| 938 | ! Links and code spans |
| 939 | allocate(highlighter%current_lang%string_delimiters(2)) |
| 940 | highlighter%current_lang%string_delimiters = ['`', '['] |
| 941 | |
| 942 | ! List markers and special chars |
| 943 | allocate(highlighter%current_lang%operators(6)) |
| 944 | highlighter%current_lang%operators = [ & |
| 945 | "- ", "+ ", "* ", "> ", "| ", "! " & |
| 946 | ] |
| 947 | |
| 948 | highlighter%enabled = .true. |
| 949 | end subroutine load_markdown_syntax |
| 950 | |
| 951 | ! Stub implementations for multiline handling |
| 952 | subroutine process_multiline_comment(highlighter, line, tokens, token_count, pos) |
| 953 | type(syntax_highlighter_t), intent(inout) :: highlighter |
| 954 | character(len=*), intent(in) :: line |
| 955 | type(token_t), intent(inout) :: tokens(:) |
| 956 | integer, intent(inout) :: token_count, pos |
| 957 | ! TODO: Implement multiline comment handling |
| 958 | end subroutine process_multiline_comment |
| 959 | |
| 960 | subroutine process_multiline_string(highlighter, line, tokens, token_count, pos) |
| 961 | type(syntax_highlighter_t), intent(inout) :: highlighter |
| 962 | character(len=*), intent(in) :: line |
| 963 | type(token_t), intent(inout) :: tokens(:) |
| 964 | integer, intent(inout) :: token_count, pos |
| 965 | ! TODO: Implement multiline string handling |
| 966 | end subroutine process_multiline_string |
| 967 | |
| 968 | end module syntax_highlighter_module |