Fortran · 17299 bytes Raw Blame History
1 ! ==============================================================================
2 ! Test: syntax_highlight v2 tokenizer
3 ! Unit tests for position-based tokenization and token classification.
4 ! ==============================================================================
5 program test_syntax_highlight
6 use syntax_highlight
7 implicit none
8
9 integer, parameter :: MT = 100 ! MAX_TOKENS
10
11 integer :: passed, failed, total
12 passed = 0
13 failed = 0
14 total = 0
15
16 write(*, '(a)') '=========================================='
17 write(*, '(a)') 'Testing Syntax Highlight v2 Tokenizer'
18 write(*, '(a)') '=========================================='
19
20 ! --- Basic token type tests ---
21 call test_simple_command()
22 call test_invalid_command()
23 call test_builtin_echo()
24 call test_keyword_if_then_fi()
25 call test_pipe_resets_cmd_pos()
26 call test_and_or_resets_cmd_pos()
27 call test_semicolon_resets_cmd_pos()
28 call test_option_token()
29 call test_single_quoted_string()
30 call test_double_quoted_string()
31 call test_variable_simple()
32 call test_variable_brace()
33 call test_variable_subshell()
34 call test_comment()
35 call test_redirect_gt()
36 call test_redirect_append()
37 call test_redirect_heredoc()
38 call test_redirect_fd_prefix()
39 call test_redirect_amp_gt()
40 call test_operator_background()
41 call test_operator_parens()
42 call test_assignment()
43 call test_glob_star()
44 call test_glob_question()
45 call test_path_token()
46 call test_number_token()
47 call test_empty_input()
48 call test_whitespace_only()
49 call test_complex_pipeline()
50 call test_keyword_resets_cmd_pos()
51 call test_case_esac()
52 call test_c_style_for_loop()
53 call test_keyword_outside_cmd_pos()
54
55 write(*, '(a)') ''
56 write(*, '(a)') '=========================================='
57 write(*, '(a,i0,a,i0,a,i0,a)') 'Results: ', passed, ' passed, ', failed, ' failed (', total, ' total)'
58 write(*, '(a)') '=========================================='
59
60 if (failed > 0) then
61 write(*, '(a)') 'SOME TESTS FAILED!'
62 error stop 1
63 else
64 write(*, '(a)') 'All tests passed!'
65 end if
66
67 contains
68
69 subroutine assert_eq(test_name, expected, actual)
70 character(len=*), intent(in) :: test_name
71 integer, intent(in) :: expected, actual
72 total = total + 1
73 if (expected == actual) then
74 passed = passed + 1
75 write(*, '(a,a)') ' PASS: ', test_name
76 else
77 failed = failed + 1
78 write(*, '(a,a,a,i0,a,i0,a)') ' FAIL: ', test_name, ' (expected=', expected, ', got=', actual, ')'
79 end if
80 end subroutine
81
82 subroutine assert_token(test_name, tok, exp_start, exp_end, exp_type)
83 character(len=*), intent(in) :: test_name
84 type(hl_token_t), intent(in) :: tok
85 integer, intent(in) :: exp_start, exp_end, exp_type
86 call assert_eq(trim(test_name) // ': start', exp_start, tok%start_pos)
87 call assert_eq(trim(test_name) // ': end', exp_end, tok%end_pos)
88 call assert_eq(trim(test_name) // ': type', exp_type, tok%token_type)
89 end subroutine
90
91 ! ======================== Tests ========================
92
93 subroutine test_simple_command()
94 type(hl_token_t) :: tokens(MT)
95 integer :: n
96 character(len=8) :: input
97 input = 'ls -la /'
98 call tokenize_v2(input, 8, tokens, n)
99 call assert_eq('simple_cmd: count', 3, n)
100 ! 'ls' at pos 1-2 — should be COMMAND_VALID (ls exists)
101 call assert_token('simple_cmd/ls', tokens(1), 1, 2, HTOK_COMMAND_VALID)
102 call assert_token('simple_cmd/-la', tokens(2), 4, 6, HTOK_OPTION)
103 call assert_token('simple_cmd//', tokens(3), 8, 8, HTOK_PATH)
104 end subroutine
105
106 subroutine test_invalid_command()
107 type(hl_token_t) :: tokens(MT)
108 integer :: n
109 character(len=16) :: input
110 input = 'xyznotfound foo '
111 call tokenize_v2(input, 15, tokens, n)
112 call assert_eq('invalid_cmd: count', 2, n)
113 call assert_token('invalid_cmd/xyz', tokens(1), 1, 11, HTOK_COMMAND_INVALID)
114 call assert_token('invalid_cmd/foo', tokens(2), 13, 15, HTOK_DEFAULT)
115 end subroutine
116
117 subroutine test_builtin_echo()
118 type(hl_token_t) :: tokens(MT)
119 integer :: n
120 character(len=16) :: input
121 input = 'echo hello world'
122 call tokenize_v2(input, 16, tokens, n)
123 call assert_eq('builtin_echo: count', 3, n)
124 call assert_token('builtin_echo/echo', tokens(1), 1, 4, HTOK_BUILTIN)
125 call assert_token('builtin_echo/hello', tokens(2), 6, 10, HTOK_DEFAULT)
126 call assert_token('builtin_echo/world', tokens(3), 12, 16, HTOK_DEFAULT)
127 end subroutine
128
129 subroutine test_keyword_if_then_fi()
130 type(hl_token_t) :: tokens(MT)
131 integer :: n
132 character(len=32) :: input
133 input = 'if true; then echo hi; fi'
134 call tokenize_v2(input, 25, tokens, n)
135 ! Expected: if=KEYWORD, true=CMD, ;=OP, then=KEYWORD, echo=BUILTIN, hi=DEFAULT, ;=OP, fi=KEYWORD
136 call assert_eq('if_then_fi: count', 8, n)
137 call assert_token('if_then_fi/if', tokens(1), 1, 2, HTOK_KEYWORD)
138 call assert_token('if_then_fi/;', tokens(3), 8, 8, HTOK_OPERATOR)
139 call assert_token('if_then_fi/then', tokens(4), 10, 13, HTOK_KEYWORD)
140 call assert_token('if_then_fi/echo', tokens(5), 15, 18, HTOK_BUILTIN)
141 call assert_token('if_then_fi/hi', tokens(6), 20, 21, HTOK_DEFAULT)
142 call assert_token('if_then_fi/fi', tokens(8), 24, 25, HTOK_KEYWORD)
143 end subroutine
144
145 subroutine test_pipe_resets_cmd_pos()
146 type(hl_token_t) :: tokens(MT)
147 integer :: n
148 character(len=16) :: input
149 input = 'cat file | wc -l'
150 call tokenize_v2(input, 16, tokens, n)
151 call assert_eq('pipe: count', 5, n)
152 call assert_token('pipe/|', tokens(3), 10, 10, HTOK_OPERATOR)
153 call assert_token('pipe/wc', tokens(4), 12, 13, HTOK_COMMAND_VALID)
154 call assert_token('pipe/-l', tokens(5), 15, 16, HTOK_OPTION)
155 end subroutine
156
157 subroutine test_and_or_resets_cmd_pos()
158 type(hl_token_t) :: tokens(MT)
159 integer :: n
160 character(len=16) :: input
161 input = 'ls && pwd'
162 call tokenize_v2(input, 9, tokens, n)
163 call assert_eq('and_or: count', 3, n)
164 call assert_token('and_or/&&', tokens(2), 4, 5, HTOK_OPERATOR)
165 call assert_token('and_or/pwd', tokens(3), 7, 9, HTOK_BUILTIN)
166 end subroutine
167
168 subroutine test_semicolon_resets_cmd_pos()
169 type(hl_token_t) :: tokens(MT)
170 integer :: n
171 character(len=16) :: input
172 input = 'echo a; echo b'
173 call tokenize_v2(input, 14, tokens, n)
174 call assert_eq('semicolon: count', 5, n)
175 call assert_token('semicolon/;', tokens(3), 7, 7, HTOK_OPERATOR)
176 call assert_token('semicolon/echo2', tokens(4), 9, 12, HTOK_BUILTIN)
177 end subroutine
178
179 subroutine test_option_token()
180 type(hl_token_t) :: tokens(MT)
181 integer :: n
182 character(len=16) :: input
183 input = 'ls --color=auto '
184 call tokenize_v2(input, 15, tokens, n)
185 call assert_eq('option: count', 2, n)
186 call assert_token('option/--color', tokens(2), 4, 15, HTOK_OPTION)
187 end subroutine
188
189 subroutine test_single_quoted_string()
190 type(hl_token_t) :: tokens(MT)
191 integer :: n
192 character(len=32) :: input
193 input = "echo 'hello world'"
194 call tokenize_v2(input, 18, tokens, n)
195 call assert_eq('sq_string: count', 2, n)
196 call assert_token('sq_string/str', tokens(2), 6, 18, HTOK_STRING_SINGLE)
197 end subroutine
198
199 subroutine test_double_quoted_string()
200 type(hl_token_t) :: tokens(MT)
201 integer :: n
202 character(len=32) :: input
203 input = 'echo "hello world"'
204 call tokenize_v2(input, 18, tokens, n)
205 call assert_eq('dq_string: count', 2, n)
206 call assert_token('dq_string/str', tokens(2), 6, 18, HTOK_STRING_DOUBLE)
207 end subroutine
208
209 subroutine test_variable_simple()
210 type(hl_token_t) :: tokens(MT)
211 integer :: n
212 character(len=16) :: input
213 input = 'echo $HOME'
214 call tokenize_v2(input, 10, tokens, n)
215 call assert_eq('var_simple: count', 2, n)
216 call assert_token('var_simple/$HOME', tokens(2), 6, 10, HTOK_VARIABLE)
217 end subroutine
218
219 subroutine test_variable_brace()
220 type(hl_token_t) :: tokens(MT)
221 integer :: n
222 character(len=16) :: input
223 input = 'echo ${PATH}'
224 call tokenize_v2(input, 12, tokens, n)
225 call assert_eq('var_brace: count', 2, n)
226 call assert_token('var_brace/${PATH}', tokens(2), 6, 12, HTOK_VARIABLE)
227 end subroutine
228
229 subroutine test_variable_subshell()
230 type(hl_token_t) :: tokens(MT)
231 integer :: n
232 character(len=32) :: input
233 input = 'echo $(whoami)'
234 call tokenize_v2(input, 14, tokens, n)
235 call assert_eq('var_subshell: count', 2, n)
236 call assert_token('var_subshell/$()', tokens(2), 6, 14, HTOK_VARIABLE)
237 end subroutine
238
239 subroutine test_comment()
240 type(hl_token_t) :: tokens(MT)
241 integer :: n
242 character(len=16) :: input
243 input = '# this is a test'
244 call tokenize_v2(input, 16, tokens, n)
245 call assert_eq('comment: count', 1, n)
246 call assert_token('comment/#...', tokens(1), 1, 16, HTOK_COMMENT)
247 end subroutine
248
249 subroutine test_redirect_gt()
250 type(hl_token_t) :: tokens(MT)
251 integer :: n
252 character(len=16) :: input
253 input = 'echo hi > out'
254 call tokenize_v2(input, 13, tokens, n)
255 call assert_eq('redir_gt: count', 4, n)
256 call assert_token('redir_gt/>', tokens(3), 9, 9, HTOK_REDIRECT)
257 call assert_token('redir_gt/out', tokens(4), 11, 13, HTOK_DEFAULT)
258 end subroutine
259
260 subroutine test_redirect_append()
261 type(hl_token_t) :: tokens(MT)
262 integer :: n
263 character(len=16) :: input
264 input = 'echo hi >> out'
265 call tokenize_v2(input, 14, tokens, n)
266 call assert_eq('redir_append: count', 4, n)
267 call assert_token('redir_append/>>', tokens(3), 9, 10, HTOK_REDIRECT)
268 end subroutine
269
270 subroutine test_redirect_heredoc()
271 type(hl_token_t) :: tokens(MT)
272 integer :: n
273 character(len=16) :: input
274 input = 'cat << EOF'
275 call tokenize_v2(input, 10, tokens, n)
276 call assert_eq('redir_heredoc: count', 3, n)
277 call assert_token('redir_heredoc/<<', tokens(2), 5, 6, HTOK_REDIRECT)
278 end subroutine
279
280 subroutine test_redirect_fd_prefix()
281 type(hl_token_t) :: tokens(MT)
282 integer :: n
283 character(len=32) :: input
284 input = 'cmd 2>/dev/null'
285 call tokenize_v2(input, 15, tokens, n)
286 ! 'cmd' = command_invalid, '2>' = redirect, '/dev/null' = path
287 call assert_eq('redir_fd: count', 3, n)
288 call assert_token('redir_fd/2>', tokens(2), 5, 6, HTOK_REDIRECT)
289 call assert_token('redir_fd//dev/null', tokens(3), 7, 15, HTOK_PATH)
290 end subroutine
291
292 subroutine test_redirect_amp_gt()
293 type(hl_token_t) :: tokens(MT)
294 integer :: n
295 character(len=32) :: input
296 input = 'cmd &> /dev/null'
297 call tokenize_v2(input, 16, tokens, n)
298 call assert_eq('redir_amp_gt: count', 3, n)
299 call assert_token('redir_amp_gt/&>', tokens(2), 5, 6, HTOK_REDIRECT)
300 end subroutine
301
302 subroutine test_operator_background()
303 type(hl_token_t) :: tokens(MT)
304 integer :: n
305 character(len=16) :: input
306 input = 'sleep 10 &'
307 call tokenize_v2(input, 10, tokens, n)
308 call assert_eq('bg: count', 3, n)
309 call assert_token('bg/&', tokens(3), 10, 10, HTOK_OPERATOR)
310 end subroutine
311
312 subroutine test_operator_parens()
313 type(hl_token_t) :: tokens(MT)
314 integer :: n
315 character(len=16) :: input
316 input = '(echo hi)'
317 call tokenize_v2(input, 9, tokens, n)
318 call assert_eq('parens: count', 4, n)
319 call assert_token('parens/(', tokens(1), 1, 1, HTOK_OPERATOR)
320 call assert_token('parens/echo', tokens(2), 2, 5, HTOK_BUILTIN)
321 call assert_token('parens/)', tokens(4), 9, 9, HTOK_OPERATOR)
322 end subroutine
323
324 subroutine test_assignment()
325 type(hl_token_t) :: tokens(MT)
326 integer :: n
327 character(len=16) :: input
328 input = 'FOO=bar'
329 call tokenize_v2(input, 7, tokens, n)
330 ! Assignment in command position — tokenizer checks keyword first, then builtin,
331 ! then valid command, then invalid command. Assignment with = is detected in
332 ! non-command position. In command position, FOO=bar is treated as a command.
333 ! Let's just check it produces something reasonable.
334 call assert_eq('assign: count', 1, n)
335 end subroutine
336
337 subroutine test_glob_star()
338 type(hl_token_t) :: tokens(MT)
339 integer :: n
340 character(len=16) :: input
341 input = 'echo *.txt'
342 call tokenize_v2(input, 10, tokens, n)
343 call assert_eq('glob_star: count', 2, n)
344 call assert_token('glob_star/*.txt', tokens(2), 6, 10, HTOK_GLOB)
345 end subroutine
346
347 subroutine test_glob_question()
348 type(hl_token_t) :: tokens(MT)
349 integer :: n
350 character(len=16) :: input
351 input = 'echo file?.log'
352 call tokenize_v2(input, 14, tokens, n)
353 call assert_eq('glob_question: count', 2, n)
354 call assert_token('glob_question/file?.log', tokens(2), 6, 14, HTOK_GLOB)
355 end subroutine
356
357 subroutine test_path_token()
358 type(hl_token_t) :: tokens(MT)
359 integer :: n
360 character(len=16) :: input
361 input = 'echo /usr/bin'
362 call tokenize_v2(input, 13, tokens, n)
363 call assert_eq('path: count', 2, n)
364 call assert_token('path//usr/bin', tokens(2), 6, 13, HTOK_PATH)
365 end subroutine
366
367 subroutine test_number_token()
368 type(hl_token_t) :: tokens(MT)
369 integer :: n
370 character(len=16) :: input
371 input = 'echo 42'
372 call tokenize_v2(input, 7, tokens, n)
373 call assert_eq('number: count', 2, n)
374 call assert_token('number/42', tokens(2), 6, 7, HTOK_NUMBER)
375 end subroutine
376
377 subroutine test_empty_input()
378 type(hl_token_t) :: tokens(MT)
379 integer :: n
380 call tokenize_v2('', 0, tokens, n)
381 call assert_eq('empty: count', 0, n)
382 end subroutine
383
384 subroutine test_whitespace_only()
385 type(hl_token_t) :: tokens(MT)
386 integer :: n
387 character(len=4) :: input
388 input = ' '
389 call tokenize_v2(input, 4, tokens, n)
390 call assert_eq('whitespace: count', 0, n)
391 end subroutine
392
393 subroutine test_complex_pipeline()
394 type(hl_token_t) :: tokens(MT)
395 integer :: n
396 character(len=32) :: input
397 ! ls -la | grep foo | wc -l
398 input = 'ls -la | grep foo | wc -l'
399 call tokenize_v2(input, 25, tokens, n)
400 call assert_eq('pipeline: count', 8, n)
401 call assert_eq('pipeline/ls type', HTOK_COMMAND_VALID, tokens(1)%token_type)
402 call assert_eq('pipeline/-la type', HTOK_OPTION, tokens(2)%token_type)
403 call assert_eq('pipeline/|1 type', HTOK_OPERATOR, tokens(3)%token_type)
404 call assert_eq('pipeline/grep type', HTOK_COMMAND_VALID, tokens(4)%token_type)
405 call assert_eq('pipeline/foo type', HTOK_DEFAULT, tokens(5)%token_type)
406 call assert_eq('pipeline/|2 type', HTOK_OPERATOR, tokens(6)%token_type)
407 call assert_eq('pipeline/wc type', HTOK_COMMAND_VALID, tokens(7)%token_type)
408 call assert_eq('pipeline/-l type', HTOK_OPTION, tokens(8)%token_type)
409 end subroutine
410
411 subroutine test_keyword_resets_cmd_pos()
412 type(hl_token_t) :: tokens(MT)
413 integer :: n
414 character(len=32) :: input
415 ! 'then' and 'else' should reset cmd pos so next word is a command
416 input = 'if true; then ls; else pwd; fi'
417 call tokenize_v2(input, 30, tokens, n)
418 ! if=KW true=CMD ;=OP then=KW ls=CMD ;=OP else=KW pwd=BUILTIN ;=OP fi=KW
419 call assert_eq('kw_reset: count', 10, n)
420 call assert_eq('kw_reset/if', HTOK_KEYWORD, tokens(1)%token_type)
421 call assert_eq('kw_reset/then', HTOK_KEYWORD, tokens(4)%token_type)
422 call assert_eq('kw_reset/ls', HTOK_COMMAND_VALID, tokens(5)%token_type)
423 call assert_eq('kw_reset/else', HTOK_KEYWORD, tokens(7)%token_type)
424 call assert_eq('kw_reset/pwd', HTOK_BUILTIN, tokens(8)%token_type)
425 call assert_eq('kw_reset/fi', HTOK_KEYWORD, tokens(10)%token_type)
426 end subroutine
427
428 subroutine test_case_esac()
429 type(hl_token_t) :: tokens(MT)
430 integer :: n
431 character(len=16) :: input
432 input = 'case esac'
433 call tokenize_v2(input, 9, tokens, n)
434 call assert_eq('case_esac: count', 2, n)
435 call assert_eq('case_esac/case', HTOK_KEYWORD, tokens(1)%token_type)
436 ! esac is always recognized as a keyword regardless of command position
437 call assert_eq('case_esac/esac', HTOK_KEYWORD, tokens(2)%token_type)
438 end subroutine
439
440 ! C-style for loop: for ((i=0; i<3; i++)) do echo $i; done
441 subroutine test_c_style_for_loop()
442 character(len=256) :: input
443 type(hl_token_t) :: tokens(MT)
444 integer :: n
445
446 input = 'for ((i=0; i<3; i++)) do echo $i; done'
447 call tokenize_v2(input, 38, tokens, n)
448 ! for = KEYWORD, ((...)) = NUMBER (arithmetic), do = KEYWORD,
449 ! echo = BUILTIN, $i = VARIABLE, ; = OPERATOR, done = KEYWORD
450 call assert_eq('cfor: for', HTOK_KEYWORD, tokens(1)%token_type)
451 call assert_eq('cfor: (())', HTOK_NUMBER, tokens(2)%token_type)
452 call assert_eq('cfor: do', HTOK_KEYWORD, tokens(3)%token_type)
453 call assert_eq('cfor: echo', HTOK_BUILTIN, tokens(4)%token_type)
454 call assert_eq('cfor: $i', HTOK_VARIABLE, tokens(5)%token_type)
455 call assert_eq('cfor: ;', HTOK_OPERATOR, tokens(6)%token_type)
456 call assert_eq('cfor: done', HTOK_KEYWORD, tokens(7)%token_type)
457 end subroutine
458
459 ! Keywords recognized even outside command position
460 subroutine test_keyword_outside_cmd_pos()
461 character(len=256) :: input
462 type(hl_token_t) :: tokens(MT)
463 integer :: n
464
465 ! 'echo done' — done after a builtin should still be keyword
466 input = 'echo done'
467 call tokenize_v2(input, 9, tokens, n)
468 call assert_eq('kw_nonpos: echo', HTOK_BUILTIN, tokens(1)%token_type)
469 call assert_eq('kw_nonpos: done', HTOK_KEYWORD, tokens(2)%token_type)
470 end subroutine
471
472 end program test_syntax_highlight
473