Fortran · 35943 bytes Raw Blame History
1 ! ==============================================================================
2 ! Module: syntax_highlight
3 ! Purpose: Real-time syntax highlighting for interactive command line
4 ! ==============================================================================
5 module syntax_highlight
6 use iso_fortran_env, only: error_unit
7 use system_interface, only: c_access, X_OK
8 #ifdef USE_MEMORY_POOL
9 use string_pool
10 #endif
11 implicit none
12 private
13
14 ! Public interface
15 public :: highlight_command_line
16 public :: highlight_single_char
17 public :: is_valid_command
18 public :: init_syntax_highlighting
19 public :: clear_command_cache
20 public :: MAX_HIGHLIGHT_LEN ! Export buffer size for callers
21 public :: cleanup_syntax_highlighting
22 ! v2 API — exposed for unit testing
23 public :: hl_token_t, tokenize_v2, hl_token_color
24
25 ! ANSI color codes
26 integer, parameter :: COLOR_RESET = 0
27 integer, parameter :: COLOR_BLACK = 30
28 integer, parameter :: COLOR_RED = 31
29 integer, parameter :: COLOR_GREEN = 32
30 integer, parameter :: COLOR_YELLOW = 33
31 integer, parameter :: COLOR_BLUE = 34
32 integer, parameter :: COLOR_MAGENTA = 35
33 integer, parameter :: COLOR_CYAN = 36
34 integer, parameter :: COLOR_WHITE = 37
35 integer, parameter :: COLOR_BRIGHT_BLACK = 90
36 integer, parameter :: COLOR_BRIGHT_RED = 91
37 integer, parameter :: COLOR_BRIGHT_GREEN = 92
38 integer, parameter :: COLOR_BRIGHT_YELLOW = 93
39 integer, parameter :: COLOR_BRIGHT_BLUE = 94
40 integer, parameter :: COLOR_BRIGHT_MAGENTA = 95
41 integer, parameter :: COLOR_BRIGHT_CYAN = 96
42 integer, parameter :: COLOR_BRIGHT_WHITE = 97
43
44 ! Highlight token types — position-based, context-aware
45 integer, parameter, public :: HTOK_COMMAND_VALID = 1
46 integer, parameter, public :: HTOK_COMMAND_INVALID = 2
47 integer, parameter, public :: HTOK_KEYWORD = 3
48 integer, parameter, public :: HTOK_BUILTIN = 4
49 integer, parameter, public :: HTOK_OPTION = 5
50 integer, parameter, public :: HTOK_STRING_SINGLE = 6
51 integer, parameter, public :: HTOK_STRING_DOUBLE = 7
52 integer, parameter, public :: HTOK_VARIABLE = 8
53 integer, parameter, public :: HTOK_COMMENT = 9
54 integer, parameter, public :: HTOK_OPERATOR = 10
55 integer, parameter, public :: HTOK_REDIRECT = 11
56 integer, parameter, public :: HTOK_NUMBER = 12
57 integer, parameter, public :: HTOK_PATH = 13
58 integer, parameter, public :: HTOK_GLOB = 14
59 integer, parameter, public :: HTOK_ASSIGNMENT = 15
60 integer, parameter, public :: HTOK_DEFAULT = 16
61
62 ! v2 token structure — references positions in input buffer, no string copying
63 type :: hl_token_t
64 integer :: start_pos = 0
65 integer :: end_pos = 0
66 integer :: token_type = HTOK_DEFAULT
67 end type hl_token_t
68
69 ! Color scheme for different token types
70 integer, parameter :: COLOR_COMMAND_VALID = COLOR_GREEN
71 integer, parameter :: COLOR_COMMAND_INVALID = COLOR_RED
72 integer, parameter :: COLOR_KEYWORD = COLOR_BRIGHT_MAGENTA
73 integer, parameter :: COLOR_OPTION = COLOR_BLUE
74 integer, parameter :: COLOR_STRING = COLOR_YELLOW
75 integer, parameter :: COLOR_VARIABLE = COLOR_MAGENTA
76 integer, parameter :: COLOR_COMMENT = COLOR_BRIGHT_BLACK
77 integer, parameter :: COLOR_OPERATOR = COLOR_CYAN
78 integer, parameter :: COLOR_NUMBER = COLOR_CYAN
79 integer, parameter :: COLOR_PATH = COLOR_BRIGHT_BLUE
80 integer, parameter :: COLOR_GLOB = COLOR_BRIGHT_CYAN
81 integer, parameter :: COLOR_ASSIGNMENT = COLOR_BRIGHT_WHITE
82
83 ! Fixed-length parameters to avoid heap corruption with LLVM Flang
84 integer, parameter :: MAX_COMMAND_LEN = 256
85 integer, parameter :: MAX_HIGHLIGHT_LEN = 4096
86 integer, parameter :: MAX_TOKEN_LEN = 256
87 integer, parameter :: MAX_TOKENS = 100
88 integer, parameter :: MAX_PATH_LEN = 4096
89
90 ! Command validation cache
91 type :: cache_entry_t
92 character(len=MAX_COMMAND_LEN) :: command = ''
93 logical :: is_valid = .false.
94 integer :: timestamp = 0
95 end type cache_entry_t
96
97 integer, parameter :: CACHE_SIZE = 256
98 type(cache_entry_t), save :: command_cache(CACHE_SIZE)
99 integer, save :: cache_count = 0
100 integer, save :: current_timestamp = 0
101
102 ! Configuration
103 logical, save :: highlighting_enabled = .true.
104 logical, save :: cache_enabled = .true.
105
106 contains
107
108 ! Initialize syntax highlighting system
109 subroutine init_syntax_highlighting()
110 character(len=256) :: term_type
111 integer :: status
112
113 ! Clear cache
114 call clear_command_cache()
115
116 #ifdef USE_MEMORY_POOL
117 ! Initialize string pool if using memory pooling
118 call pool_init()
119 #endif
120
121 ! Check for test mode FIRST - disable highlighting if in test mode
122 call get_environment_variable('FORTSH_TEST_MODE', term_type, status=status)
123 if (status == 0 .and. trim(term_type) == '1') then
124 highlighting_enabled = .false.
125 return
126 end if
127
128 ! Check if terminal supports colors based on TERM environment variable
129 call get_environment_variable('TERM', term_type, status=status)
130
131 if (status /= 0 .or. len_trim(term_type) == 0) then
132 ! No TERM set - disable highlighting
133 highlighting_enabled = .false.
134 return
135 end if
136
137 ! Known dumb/non-ANSI terminals - disable highlighting
138 select case (trim(term_type))
139 case ('dumb', 'unknown', 'cons25')
140 highlighting_enabled = .false.
141 case default
142 highlighting_enabled = .true.
143 end select
144 end subroutine
145
146 ! Cleanup syntax highlighting system
147 subroutine cleanup_syntax_highlighting()
148 #ifdef USE_MEMORY_POOL
149 integer :: allocs, deallocs, current, peak
150 real :: hit_rate
151
152 ! Get final statistics before cleanup
153 call pool_statistics(allocs, deallocs, current, peak, hit_rate)
154
155 ! Only print stats in debug mode
156 ! write(error_unit, '(a)') 'String pool statistics:'
157 ! write(error_unit, '(a,i0)') ' Total allocations: ', allocs
158 ! write(error_unit, '(a,i0)') ' Total deallocations: ', deallocs
159 ! write(error_unit, '(a,i0)') ' Peak strings: ', peak
160 ! write(error_unit, '(a,f5.1,a)') ' Cache hit rate: ', hit_rate * 100.0, '%'
161
162 ! Clean up the pool
163 call pool_cleanup()
164 #endif
165
166 ! Clear the command cache
167 call clear_command_cache()
168 end subroutine
169
170 ! Clear command validation cache
171 subroutine clear_command_cache()
172 integer :: i
173
174 do i = 1, CACHE_SIZE
175 command_cache(i)%command = ''
176 command_cache(i)%is_valid = .false.
177 command_cache(i)%timestamp = 0
178 end do
179 cache_count = 0
180 current_timestamp = 0
181 end subroutine
182
183 ! Main function: Highlight a command line
184 ! Convert to subroutine to avoid allocatable string returns (flang-new workaround)
185 ! Takes input_len to avoid substring temporaries on stack
186 subroutine highlight_command_line(input, highlighted, actual_len, input_len)
187 character(len=*), intent(in) :: input
188 character(len=MAX_HIGHLIGHT_LEN), intent(out) :: highlighted
189 integer, intent(out), optional :: actual_len
190 integer, intent(in), optional :: input_len ! Explicit length to avoid substrings
191
192 ! v2 position-based tokens — 12 bytes × MAX_TOKENS = 1.2KB on stack
193 type(hl_token_t) :: v2_tokens(MAX_TOKENS)
194 integer :: num_tokens
195 integer :: len_used
196 integer :: actual_input_len
197
198 ! Use provided length if given, otherwise use full buffer length
199 if (present(input_len)) then
200 actual_input_len = input_len
201 else
202 actual_input_len = len(input)
203 end if
204
205 ! Bounds check - but don't use len(input) on allocatable strings (returns 0 in flang-new!)
206 if (actual_input_len < 0) actual_input_len = 0
207
208 if (.not. highlighting_enabled .or. actual_input_len == 0) then
209 if (actual_input_len > 0 .and. actual_input_len <= MAX_HIGHLIGHT_LEN) then
210 highlighted(1:actual_input_len) = input(1:actual_input_len)
211 if (actual_input_len < MAX_HIGHLIGHT_LEN) then
212 highlighted(actual_input_len+1:MAX_HIGHLIGHT_LEN) = ' '
213 end if
214 else
215 highlighted = ' '
216 actual_input_len = 0
217 end if
218 len_used = actual_input_len
219 if (present(actual_len)) actual_len = len_used
220 return
221 end if
222
223 ! v2 pipeline: tokenize → render (position-based, no string copying)
224 call tokenize_v2(input, actual_input_len, v2_tokens, num_tokens)
225
226 if (num_tokens == 0) then
227 if (actual_input_len > 0 .and. actual_input_len <= MAX_HIGHLIGHT_LEN) then
228 highlighted(1:actual_input_len) = input(1:actual_input_len)
229 if (actual_input_len < MAX_HIGHLIGHT_LEN) then
230 highlighted(actual_input_len+1:MAX_HIGHLIGHT_LEN) = ' '
231 end if
232 else
233 highlighted = ' '
234 actual_input_len = 0
235 end if
236 len_used = actual_input_len
237 if (present(actual_len)) actual_len = len_used
238 return
239 end if
240
241 call render_highlighted_v2(input, actual_input_len, v2_tokens, num_tokens, highlighted, len_used)
242
243 if (present(actual_len)) then
244 actual_len = len_used
245 end if
246 end subroutine
247
248 ! Highlight a single character based on context
249 ! This is a simplified version for incremental display updates
250 function highlight_single_char(ch, buffer) result(highlighted)
251 character, intent(in) :: ch
252 character(len=*), intent(in) :: buffer
253 character(len=32) :: highlighted
254
255 character(len=32) :: colored_char
256 integer :: color
257
258 ! Simple heuristics for single character highlighting
259 if (ch == '"' .or. ch == "'") then
260 color = COLOR_STRING
261 else if (ch == '-' .and. (len_trim(buffer) == 0 .or. buffer(len_trim(buffer):len_trim(buffer)) == ' ')) then
262 color = COLOR_OPTION
263 else if (ch == '#') then
264 color = COLOR_COMMENT
265 else if (ch == '$') then
266 color = COLOR_VARIABLE
267 else if (ch == '|' .or. ch == '&' .or. ch == '>' .or. ch == '<' .or. ch == ';') then
268 color = COLOR_OPERATOR
269 else if (ch >= '0' .and. ch <= '9') then
270 color = COLOR_NUMBER
271 else
272 ! For now, just use reset color for regular chars
273 color = COLOR_RESET
274 end if
275
276 ! Build the colored character
277 if (color /= COLOR_RESET) then
278 write(colored_char, '(a,i15,a,a,a)') char(27) // '[', color, 'm', ch, char(27) // '[0m'
279 highlighted = trim(colored_char)
280 else
281 highlighted = ch
282 end if
283 end function
284
285 ! Check if a command is valid (exists in PATH, is builtin, or is function)
286 function is_valid_command(command) result(valid)
287 character(len=*), intent(in) :: command
288 logical :: valid
289
290 integer :: cache_idx
291 character(len=MAX_COMMAND_LEN) :: cmd
292
293 cmd = trim(command)
294 valid = .false.
295
296 ! Check cache first
297 if (cache_enabled) then
298 cache_idx = find_in_cache(cmd)
299 if (cache_idx > 0) then
300 valid = command_cache(cache_idx)%is_valid
301 return
302 end if
303 end if
304
305 ! Check if it's a builtin command
306 if (is_builtin_v2(cmd, len_trim(cmd))) then
307 valid = .true.
308 call add_to_cache(cmd, .true.)
309 return
310 end if
311
312 ! Check if command exists in PATH
313 if (command_exists_in_path(cmd)) then
314 valid = .true.
315 call add_to_cache(cmd, .true.)
316 return
317 end if
318
319 ! Not found
320 call add_to_cache(cmd, .false.)
321 end function
322
323 ! Check if command exists in PATH
324 function command_exists_in_path(command) result(exists)
325 use system_interface, only: file_is_executable, get_environment_var
326 character(len=*), intent(in) :: command
327 logical :: exists
328
329 ! Use allocatable to avoid 9KB stack allocation
330 character(len=:), allocatable :: path_env, full_path, dir
331 integer :: path_start, path_end, colon_pos
332
333 exists = .false.
334
335 ! Get PATH environment variable using system_interface (not intrinsic!)
336 path_env = get_environment_var('PATH')
337 if (len_trim(path_env) == 0) then
338 return
339 end if
340
341 ! Allocate buffers on heap (not using pool - too complex for mixed allocation)
342 allocate(character(len=MAX_PATH_LEN) :: full_path)
343 allocate(character(len=1024) :: dir)
344
345 ! Search each directory in PATH
346 path_start = 1
347 do while (path_start <= len_trim(path_env))
348 ! Find next colon
349 colon_pos = index(path_env(path_start:), ':')
350 if (colon_pos > 0) then
351 path_end = path_start + colon_pos - 2
352 else
353 path_end = len_trim(path_env)
354 end if
355
356 ! Extract directory
357 dir = path_env(path_start:path_end)
358
359 ! Check if command exists in this directory
360 full_path = trim(dir) // '/' // trim(command)
361 if (file_is_executable(full_path)) then
362 exists = .true.
363 return
364 end if
365
366 ! Move to next directory
367 if (colon_pos > 0) then
368 path_start = path_start + colon_pos
369 else
370 exit
371 end if
372 end do
373
374 ! Deallocate heap-allocated buffers
375 if (allocated(path_env)) deallocate(path_env)
376 if (allocated(full_path)) deallocate(full_path)
377 if (allocated(dir)) deallocate(dir)
378 end function
379
380 ! Check if a path is a navigable directory (for cd-on-path-entry highlighting)
381 function is_navigable_path(path) result(is_nav)
382 use system_interface, only: file_is_directory, get_environment_var
383 character(len=*), intent(in) :: path
384 logical :: is_nav
385 character(len=:), allocatable :: expanded, home_dir
386
387 is_nav = .false.
388 if (len_trim(path) == 0) return
389
390 ! Expand tilde
391 if (path(1:1) == '~') then
392 home_dir = get_environment_var('HOME')
393 if (allocated(home_dir) .and. len(home_dir) > 0) then
394 if (len_trim(path) == 1) then
395 expanded = home_dir
396 else
397 expanded = trim(home_dir) // path(2:len_trim(path))
398 end if
399 else
400 return
401 end if
402 else
403 expanded = trim(path)
404 end if
405
406 is_nav = file_is_directory(expanded)
407 end function is_navigable_path
408
409 ! Check if a path containing / points to an executable file
410 function is_path_executable(path) result(is_exec)
411 use system_interface, only: file_is_executable
412 character(len=*), intent(in) :: path
413 logical :: is_exec
414
415 is_exec = .false.
416 if (len_trim(path) == 0) return
417 is_exec = file_is_executable(trim(path))
418 end function is_path_executable
419
420 ! Generate ANSI color code
421 function color_code(color) result(code)
422 integer, intent(in) :: color
423 character(len=32) :: code
424
425 if (color == COLOR_RESET) then
426 code = char(27) // '[0m'
427 else
428 write(code, '(a,i0,a)') char(27) // '[', color, 'm'
429 end if
430 code = trim(code)
431 end function
432
433 ! Cache management functions
434 function find_in_cache(command) result(idx)
435 character(len=*), intent(in) :: command
436 integer :: idx
437
438 integer :: i
439
440 idx = 0
441
442 do i = 1, min(cache_count, CACHE_SIZE)
443 if (len_trim(command_cache(i)%command) > 0) then
444 if (trim(command_cache(i)%command) == trim(command)) then
445 ! Update timestamp for LRU
446 command_cache(i)%timestamp = current_timestamp
447 current_timestamp = current_timestamp + 1
448 idx = i
449 return
450 end if
451 end if
452 end do
453 end function
454
455 subroutine add_to_cache(command, is_valid)
456 character(len=*), intent(in) :: command
457 logical, intent(in) :: is_valid
458
459 integer :: idx, oldest_idx, oldest_time
460 integer :: i
461
462 ! Check if already in cache
463 idx = find_in_cache(command)
464 if (idx > 0) then
465 command_cache(idx)%is_valid = is_valid
466 return
467 end if
468
469 ! Find empty slot or oldest entry
470 oldest_idx = 1
471 oldest_time = command_cache(1)%timestamp
472
473 do i = 1, CACHE_SIZE
474 if (len_trim(command_cache(i)%command) == 0) then
475 idx = i
476 exit
477 end if
478
479 if (command_cache(i)%timestamp < oldest_time) then
480 oldest_time = command_cache(i)%timestamp
481 oldest_idx = i
482 end if
483 end do
484
485 ! Use empty slot or evict oldest
486 if (idx == 0) idx = oldest_idx
487
488 ! Store in cache
489 command_cache(idx)%command = trim(command)
490 command_cache(idx)%is_valid = is_valid
491 command_cache(idx)%timestamp = current_timestamp
492 current_timestamp = current_timestamp + 1
493
494 if (idx > cache_count) cache_count = idx
495 end subroutine
496
497 ! ============================================================================
498 ! v2 Highlighting Pipeline
499 ! Position-based tokenizer with keyword, operator, and context awareness.
500 ! ============================================================================
501
502 ! Color lookup for v2 token types
503 function hl_token_color(tok_type) result(color)
504 integer, intent(in) :: tok_type
505 integer :: color
506 select case(tok_type)
507 case(HTOK_COMMAND_VALID); color = COLOR_COMMAND_VALID
508 case(HTOK_COMMAND_INVALID);color = COLOR_COMMAND_INVALID
509 case(HTOK_KEYWORD); color = COLOR_KEYWORD
510 case(HTOK_BUILTIN); color = COLOR_GREEN
511 case(HTOK_OPTION); color = COLOR_OPTION
512 case(HTOK_STRING_SINGLE); color = COLOR_STRING
513 case(HTOK_STRING_DOUBLE); color = COLOR_STRING
514 case(HTOK_VARIABLE); color = COLOR_VARIABLE
515 case(HTOK_COMMENT); color = COLOR_COMMENT
516 case(HTOK_OPERATOR); color = COLOR_OPERATOR
517 case(HTOK_REDIRECT); color = COLOR_OPERATOR
518 case(HTOK_NUMBER); color = COLOR_NUMBER
519 case(HTOK_PATH); color = COLOR_PATH
520 case(HTOK_GLOB); color = COLOR_GLOB
521 case(HTOK_ASSIGNMENT); color = COLOR_ASSIGNMENT
522 case default; color = COLOR_RESET
523 end select
524 end function hl_token_color
525
526 ! Check if word is a shell keyword (for highlighting)
527 function is_keyword_for_highlight(word, wlen) result(is_kw)
528 character(len=*), intent(in) :: word
529 integer, intent(in) :: wlen
530 logical :: is_kw
531
532 is_kw = .false.
533 if (wlen < 1 .or. wlen > 8) return
534
535 select case(word(1:wlen))
536 case('if', 'then', 'else', 'elif', 'fi')
537 is_kw = .true.
538 case('for', 'in', 'do', 'done')
539 is_kw = .true.
540 case('while', 'until')
541 is_kw = .true.
542 case('case', 'esac')
543 is_kw = .true.
544 case('function', 'select', 'time')
545 is_kw = .true.
546 case('{', '}', '!', '[[', ']]')
547 is_kw = .true.
548 case default
549 is_kw = .false.
550 end select
551 end function is_keyword_for_highlight
552
553 ! Check if keyword resets command position (followed by a command)
554 function keyword_starts_command(word, wlen) result(starts)
555 character(len=*), intent(in) :: word
556 integer, intent(in) :: wlen
557 logical :: starts
558
559 starts = .false.
560 if (wlen < 1 .or. wlen > 5) return
561
562 select case(word(1:wlen))
563 case('if', 'elif', 'while', 'until', '!')
564 starts = .true.
565 case('then', 'else', 'do')
566 starts = .true.
567 case default
568 starts = .false.
569 end select
570 end function keyword_starts_command
571
572 ! Check if word is a shell builtin (synced with builtins.f90)
573 function is_builtin_v2(word, wlen) result(is_b)
574 character(len=*), intent(in) :: word
575 integer, intent(in) :: wlen
576 logical :: is_b
577
578 is_b = .false.
579 if (wlen < 1 .or. wlen > 10) return
580
581 select case(word(1:wlen))
582 case('cd', 'echo', 'pwd', 'exit', 'export', 'set', 'unset')
583 is_b = .true.
584 case('alias', 'unalias', 'source', '.', ':')
585 is_b = .true.
586 case('history', 'jobs', 'fg', 'bg', 'kill', 'wait')
587 is_b = .true.
588 case('read', 'printf', 'test', '[')
589 is_b = .true.
590 case('type', 'which', 'command', 'builtin')
591 is_b = .true.
592 case('declare', 'local', 'readonly', 'return', 'shift')
593 is_b = .true.
594 case('break', 'continue')
595 is_b = .true.
596 case('coproc', 'let', 'eval', 'exec')
597 is_b = .true.
598 case('trap', 'ulimit', 'umask', 'getopts', 'hash')
599 is_b = .true.
600 case('help', 'fc', 'complete', 'compgen')
601 is_b = .true.
602 case('pushd', 'popd', 'dirs', 'prevd', 'nextd', 'dirh')
603 is_b = .true.
604 case('abbr', 'shopt', 'printenv', 'times')
605 is_b = .true.
606 case default
607 is_b = .false.
608 end select
609 end function is_builtin_v2
610
611 ! v2 tokenizer — state machine with multi-char operators and command position
612 subroutine tokenize_v2(input, input_len, tokens, num_tokens)
613 character(len=*), intent(in) :: input
614 integer, intent(in) :: input_len
615 type(hl_token_t), intent(out) :: tokens(MAX_TOKENS)
616 integer, intent(out) :: num_tokens
617
618 integer :: i, tok_start, wlen
619 logical :: in_cmd_pos, has_slash, has_glob, has_equals
620 character(len=1) :: ch, next_ch
621
622 num_tokens = 0
623 if (input_len == 0) return
624
625 in_cmd_pos = .true.
626 i = 1
627
628 do while (i <= input_len .and. num_tokens < MAX_TOKENS)
629 ch = input(i:i)
630
631 ! Skip whitespace
632 if (ch == ' ' .or. ch == char(9)) then
633 i = i + 1
634 cycle
635 end if
636
637 ! Comment — rest of line
638 if (ch == '#') then
639 num_tokens = num_tokens + 1
640 tokens(num_tokens)%start_pos = i
641 tokens(num_tokens)%end_pos = input_len
642 tokens(num_tokens)%token_type = HTOK_COMMENT
643 return ! nothing after comment
644 end if
645
646 ! Single-quoted string
647 if (ch == "'") then
648 tok_start = i
649 i = i + 1
650 do while (i <= input_len)
651 if (input(i:i) == "'") then
652 i = i + 1
653 exit
654 end if
655 i = i + 1
656 end do
657 num_tokens = num_tokens + 1
658 tokens(num_tokens)%start_pos = tok_start
659 tokens(num_tokens)%end_pos = i - 1
660 tokens(num_tokens)%token_type = HTOK_STRING_SINGLE
661 in_cmd_pos = .false.
662 cycle
663 end if
664
665 ! Double-quoted string
666 if (ch == '"') then
667 tok_start = i
668 i = i + 1
669 do while (i <= input_len)
670 if (input(i:i) == '\' .and. i + 1 <= input_len) then
671 i = i + 2 ! skip escaped char
672 cycle
673 end if
674 if (input(i:i) == '"') then
675 i = i + 1
676 exit
677 end if
678 i = i + 1
679 end do
680 num_tokens = num_tokens + 1
681 tokens(num_tokens)%start_pos = tok_start
682 tokens(num_tokens)%end_pos = i - 1
683 tokens(num_tokens)%token_type = HTOK_STRING_DOUBLE
684 in_cmd_pos = .false.
685 cycle
686 end if
687
688 ! Variable
689 if (ch == '$') then
690 tok_start = i
691 i = i + 1
692 if (i <= input_len) then
693 if (input(i:i) == '{') then
694 ! ${...} expansion
695 i = i + 1
696 do while (i <= input_len .and. input(i:i) /= '}')
697 i = i + 1
698 end do
699 if (i <= input_len) i = i + 1 ! skip }
700 else if (input(i:i) == '(') then
701 ! $() or $(()) command/arithmetic substitution
702 i = i + 1
703 if (i <= input_len .and. input(i:i) == '(') then
704 ! $(( ... ))
705 i = i + 1
706 do while (i <= input_len)
707 if (i + 1 <= input_len .and. input(i:i) == ')' .and. input(i+1:i+1) == ')') then
708 i = i + 2
709 exit
710 end if
711 i = i + 1
712 end do
713 else
714 ! $( ... ) — find matching paren (simple, no nesting)
715 do while (i <= input_len .and. input(i:i) /= ')')
716 i = i + 1
717 end do
718 if (i <= input_len) i = i + 1 ! skip )
719 end if
720 else
721 ! Simple $VAR
722 do while (i <= input_len)
723 if (.not. (is_alnum(input(i:i)) .or. input(i:i) == '_')) exit
724 i = i + 1
725 end do
726 end if
727 end if
728 num_tokens = num_tokens + 1
729 tokens(num_tokens)%start_pos = tok_start
730 tokens(num_tokens)%end_pos = i - 1
731 tokens(num_tokens)%token_type = HTOK_VARIABLE
732 in_cmd_pos = .false.
733 cycle
734 end if
735
736 ! Operators and redirections
737 if (ch == '|' .or. ch == '&' .or. ch == ';' .or. &
738 ch == '>' .or. ch == '<' .or. ch == '(' .or. ch == ')') then
739 tok_start = i
740 next_ch = ' '
741 if (i + 1 <= input_len) next_ch = input(i+1:i+1)
742
743 select case(ch)
744 case('|')
745 if (next_ch == '|') then
746 i = i + 2 ! ||
747 else
748 i = i + 1 ! |
749 end if
750 num_tokens = num_tokens + 1
751 tokens(num_tokens)%start_pos = tok_start
752 tokens(num_tokens)%end_pos = i - 1
753 tokens(num_tokens)%token_type = HTOK_OPERATOR
754 in_cmd_pos = .true.
755
756 case('&')
757 if (next_ch == '&') then
758 i = i + 2 ! &&
759 num_tokens = num_tokens + 1
760 tokens(num_tokens)%start_pos = tok_start
761 tokens(num_tokens)%end_pos = i - 1
762 tokens(num_tokens)%token_type = HTOK_OPERATOR
763 in_cmd_pos = .true.
764 else if (next_ch == '>') then
765 i = i + 2 ! &>
766 num_tokens = num_tokens + 1
767 tokens(num_tokens)%start_pos = tok_start
768 tokens(num_tokens)%end_pos = i - 1
769 tokens(num_tokens)%token_type = HTOK_REDIRECT
770 else
771 i = i + 1 ! & (background)
772 num_tokens = num_tokens + 1
773 tokens(num_tokens)%start_pos = tok_start
774 tokens(num_tokens)%end_pos = i - 1
775 tokens(num_tokens)%token_type = HTOK_OPERATOR
776 in_cmd_pos = .true.
777 end if
778
779 case(';')
780 if (next_ch == ';') then
781 i = i + 2 ! ;;
782 else
783 i = i + 1 ! ;
784 end if
785 num_tokens = num_tokens + 1
786 tokens(num_tokens)%start_pos = tok_start
787 tokens(num_tokens)%end_pos = i - 1
788 tokens(num_tokens)%token_type = HTOK_OPERATOR
789 in_cmd_pos = .true.
790
791 case('>')
792 if (next_ch == '>') then
793 i = i + 2 ! >>
794 else if (next_ch == '&') then
795 i = i + 2 ! >&
796 else if (next_ch == '|') then
797 i = i + 2 ! >|
798 else
799 i = i + 1 ! >
800 end if
801 num_tokens = num_tokens + 1
802 tokens(num_tokens)%start_pos = tok_start
803 tokens(num_tokens)%end_pos = i - 1
804 tokens(num_tokens)%token_type = HTOK_REDIRECT
805
806 case('<')
807 if (next_ch == '<') then
808 i = i + 2 ! <<
809 if (i <= input_len .and. input(i:i) == '<') i = i + 1 ! <<<
810 if (i <= input_len .and. input(i:i) == '-') i = i + 1 ! <<-
811 else if (next_ch == '&') then
812 i = i + 2 ! <&
813 else if (next_ch == '>') then
814 i = i + 2 ! <>
815 else
816 i = i + 1 ! <
817 end if
818 num_tokens = num_tokens + 1
819 tokens(num_tokens)%start_pos = tok_start
820 tokens(num_tokens)%end_pos = i - 1
821 tokens(num_tokens)%token_type = HTOK_REDIRECT
822
823 case('(')
824 if (next_ch == '(') then
825 ! (( ... )) arithmetic context — consume as single token
826 i = i + 2 ! skip ((
827 do while (i <= input_len)
828 if (i + 1 <= input_len .and. input(i:i) == ')' .and. input(i+1:i+1) == ')') then
829 i = i + 2 ! skip ))
830 exit
831 end if
832 i = i + 1
833 end do
834 num_tokens = num_tokens + 1
835 tokens(num_tokens)%start_pos = tok_start
836 tokens(num_tokens)%end_pos = i - 1
837 tokens(num_tokens)%token_type = HTOK_NUMBER ! arithmetic expression
838 in_cmd_pos = .true. ! do/command follows ))
839 else
840 i = i + 1
841 num_tokens = num_tokens + 1
842 tokens(num_tokens)%start_pos = tok_start
843 tokens(num_tokens)%end_pos = i - 1
844 tokens(num_tokens)%token_type = HTOK_OPERATOR
845 in_cmd_pos = .true.
846 end if
847
848 case(')')
849 i = i + 1
850 num_tokens = num_tokens + 1
851 tokens(num_tokens)%start_pos = tok_start
852 tokens(num_tokens)%end_pos = i - 1
853 tokens(num_tokens)%token_type = HTOK_OPERATOR
854
855 end select
856 cycle
857 end if
858
859 ! Word token — scan to end of word, then classify
860 tok_start = i
861 has_slash = .false.
862 has_glob = .false.
863 has_equals = .false.
864
865 do while (i <= input_len)
866 ch = input(i:i)
867 ! Backslash escape — skip next character (keeps it in the word)
868 if (ch == '\' .and. i + 1 <= input_len) then
869 i = i + 2
870 cycle
871 end if
872 ! Word terminators
873 if (ch == ' ' .or. ch == char(9) .or. ch == ';' .or. ch == '|' .or. &
874 ch == '&' .or. ch == '>' .or. ch == '<' .or. ch == '(' .or. &
875 ch == ')' .or. ch == '#' .or. ch == '"' .or. ch == "'" .or. &
876 ch == '$') exit
877 if (ch == '/') has_slash = .true.
878 if (ch == '*' .or. ch == '?' .or. ch == '[') has_glob = .true.
879 if (ch == '=' .and. i > tok_start) has_equals = .true.
880 i = i + 1
881 end do
882
883 wlen = i - tok_start
884 if (wlen == 0) then
885 i = i + 1
886 cycle
887 end if
888
889 num_tokens = num_tokens + 1
890 tokens(num_tokens)%start_pos = tok_start
891 tokens(num_tokens)%end_pos = i - 1
892
893 ! Check for fd-prefix redirect: all digits followed by > or <
894 ! This applies regardless of command position (e.g. cmd 2>/dev/null)
895 if (i <= input_len .and. (input(i:i) == '>' .or. input(i:i) == '<')) then
896 if (is_all_digits(input(tok_start:i-1), wlen)) then
897 ! This is an fd number — fold it into the redirect token
898 num_tokens = num_tokens - 1
899 next_ch = input(i:i)
900 i = i + 1 ! skip > or <
901 if (i <= input_len) then
902 if ((next_ch == '>' .and. (input(i:i) == '>' .or. input(i:i) == '&' .or. input(i:i) == '|')) .or. &
903 (next_ch == '<' .and. (input(i:i) == '<' .or. input(i:i) == '&' .or. input(i:i) == '>'))) then
904 i = i + 1 ! multi-char redirect
905 end if
906 end if
907 num_tokens = num_tokens + 1
908 tokens(num_tokens)%start_pos = tok_start
909 tokens(num_tokens)%end_pos = i - 1
910 tokens(num_tokens)%token_type = HTOK_REDIRECT
911 cycle
912 end if
913 end if
914
915 ! Classify the word
916 ! Keywords are structural — recognize them regardless of command position
917 if (is_keyword_for_highlight(input(tok_start:), wlen)) then
918 tokens(num_tokens)%token_type = HTOK_KEYWORD
919 if (keyword_starts_command(input(tok_start:), wlen)) then
920 in_cmd_pos = .true.
921 else
922 in_cmd_pos = .false.
923 end if
924 else if (in_cmd_pos) then
925 if (is_builtin_v2(input(tok_start:), wlen)) then
926 tokens(num_tokens)%token_type = HTOK_BUILTIN
927 in_cmd_pos = .false.
928 else if (is_valid_command(input(tok_start:tok_start+wlen-1))) then
929 tokens(num_tokens)%token_type = HTOK_COMMAND_VALID
930 in_cmd_pos = .false.
931 else if (has_slash .and. is_path_executable(input(tok_start:tok_start+wlen-1))) then
932 ! Path-to-executable (e.g. ./bin/fortsh, /usr/bin/env): check directly
933 tokens(num_tokens)%token_type = HTOK_COMMAND_VALID
934 in_cmd_pos = .false.
935 else if (has_slash .and. is_navigable_path(input(tok_start:tok_start+wlen-1))) then
936 ! Path-as-command (cd-on-path-entry): valid directory shows green
937 tokens(num_tokens)%token_type = HTOK_COMMAND_VALID
938 in_cmd_pos = .false.
939 else
940 tokens(num_tokens)%token_type = HTOK_COMMAND_INVALID
941 in_cmd_pos = .false.
942 end if
943 else
944 ! Not in command position — classify by content
945 if (has_equals .and. is_valid_identifier(input(tok_start:), tok_start, i - 1)) then
946 tokens(num_tokens)%token_type = HTOK_ASSIGNMENT
947 else if (input(tok_start:tok_start) == '-') then
948 tokens(num_tokens)%token_type = HTOK_OPTION
949 else if (has_glob) then
950 tokens(num_tokens)%token_type = HTOK_GLOB
951 else if (has_slash) then
952 tokens(num_tokens)%token_type = HTOK_PATH
953 else if (is_all_digits(input(tok_start:i-1), wlen)) then
954 tokens(num_tokens)%token_type = HTOK_NUMBER
955 else
956 tokens(num_tokens)%token_type = HTOK_DEFAULT
957 end if
958 end if
959 end do
960 end subroutine tokenize_v2
961
962 ! Check if character is alphanumeric or underscore
963 pure function is_alnum(ch) result(res)
964 character(len=1), intent(in) :: ch
965 logical :: res
966 res = (ch >= 'a' .and. ch <= 'z') .or. (ch >= 'A' .and. ch <= 'Z') .or. &
967 (ch >= '0' .and. ch <= '9') .or. ch == '_'
968 end function is_alnum
969
970 ! Check if string is all digits
971 pure function is_all_digits(str, slen) result(res)
972 character(len=*), intent(in) :: str
973 integer, intent(in) :: slen
974 logical :: res
975 integer :: j
976 res = slen > 0
977 do j = 1, slen
978 if (str(j:j) < '0' .or. str(j:j) > '9') then
979 res = .false.
980 return
981 end if
982 end do
983 end function is_all_digits
984
985 ! Check if word up to = is a valid identifier (for VAR=value detection)
986 pure function is_valid_identifier(word, wstart, wend) result(res)
987 character(len=*), intent(in) :: word
988 integer, intent(in) :: wstart, wend
989 logical :: res
990 integer :: j, eq_pos, local_len
991 res = .false.
992 local_len = wend - wstart + 1
993 ! Find = position relative to word start
994 eq_pos = 0
995 do j = 1, local_len
996 if (word(j:j) == '=') then
997 eq_pos = j
998 exit
999 end if
1000 end do
1001 if (eq_pos < 2) return ! need at least 1 char before =
1002 ! First char must be letter or underscore
1003 if (.not. ((word(1:1) >= 'a' .and. word(1:1) <= 'z') .or. &
1004 (word(1:1) >= 'A' .and. word(1:1) <= 'Z') .or. word(1:1) == '_')) return
1005 ! Rest must be alnum or underscore
1006 do j = 2, eq_pos - 1
1007 if (.not. is_alnum(word(j:j))) return
1008 end do
1009 res = .true.
1010 end function is_valid_identifier
1011
1012 ! v2 renderer — builds highlighted string from position-based tokens
1013 subroutine render_highlighted_v2(input, input_len, tokens, num_tokens, highlighted, actual_len)
1014 character(len=*), intent(in) :: input
1015 integer, intent(in) :: input_len
1016 type(hl_token_t), intent(in) :: tokens(MAX_TOKENS)
1017 integer, intent(in) :: num_tokens
1018 character(len=MAX_HIGHLIGHT_LEN), intent(out) :: highlighted
1019 integer, intent(out) :: actual_len
1020
1021 integer :: pos, ipos, tidx, color, color_len, reset_len, j
1022 character(len=32) :: color_str, reset_str
1023
1024 highlighted = ' '
1025 pos = 1
1026 tidx = 1
1027
1028 reset_str = trim(color_code(COLOR_RESET))
1029 reset_len = len_trim(reset_str)
1030
1031 ipos = 1
1032 do while (ipos <= input_len .and. pos < MAX_HIGHLIGHT_LEN - 20)
1033 ! Check if we're at the start of the next token
1034 if (tidx <= num_tokens .and. ipos == tokens(tidx)%start_pos) then
1035 ! Emit color code
1036 color = hl_token_color(tokens(tidx)%token_type)
1037 if (color /= COLOR_RESET) then
1038 color_str = trim(color_code(color))
1039 color_len = len_trim(color_str)
1040 do j = 1, color_len
1041 if (pos <= MAX_HIGHLIGHT_LEN) then
1042 highlighted(pos:pos) = color_str(j:j)
1043 pos = pos + 1
1044 end if
1045 end do
1046 end if
1047
1048 ! Emit token characters
1049 do j = tokens(tidx)%start_pos, tokens(tidx)%end_pos
1050 if (pos <= MAX_HIGHLIGHT_LEN .and. j <= input_len) then
1051 highlighted(pos:pos) = input(j:j)
1052 pos = pos + 1
1053 end if
1054 end do
1055
1056 ! Emit reset
1057 if (color /= COLOR_RESET) then
1058 do j = 1, reset_len
1059 if (pos <= MAX_HIGHLIGHT_LEN) then
1060 highlighted(pos:pos) = reset_str(j:j)
1061 pos = pos + 1
1062 end if
1063 end do
1064 end if
1065
1066 ipos = tokens(tidx)%end_pos + 1
1067 tidx = tidx + 1
1068 else
1069 ! Non-token character (whitespace between tokens)
1070 highlighted(pos:pos) = input(ipos:ipos)
1071 pos = pos + 1
1072 ipos = ipos + 1
1073 end if
1074 end do
1075
1076 actual_len = pos - 1
1077 end subroutine render_highlighted_v2
1078
1079 end module syntax_highlight