fortrangoingonforty/facsimile / d12ed9f

Browse files

Fix UTF-8 rendering in editor

Authored by espadonne
SHA
d12ed9fd1b1149e1fd97ef90d3685a290388137b
Parents
42f9bdb
Tree
3eeb67a

1 changed file

StatusFile+-
M src/terminal/renderer_module.f90 111 79
src/terminal/renderer_module.f90modified
@@ -316,45 +316,60 @@ contains
316316
         type(buffer_t), intent(in) :: buffer
317317
         type(editor_state_t), intent(in) :: editor
318318
         integer, intent(in) :: line_num, start_col, width
319
-        character(len=:), allocatable :: line
320
-        integer :: i, col, line_len, token_idx
319
+        character(len=:), allocatable :: line, utf8_ch
320
+        integer :: i, char_idx, byte_pos, token_idx, char_count, display_col, char_width
321321
         integer :: sel_start_line, sel_start_col, sel_end_line, sel_end_col
322322
         logical :: in_selection, is_bracket_match, is_current_line, is_search_match
323
-        character :: ch
324323
         type(token_t), allocatable :: tokens(:)
325324
         character(len=:), allocatable :: token_color
326325
         integer :: search_matches(2, 50)  ! Up to 50 matches per line (start, end pairs)
327326
         integer :: num_search_matches, match_idx
327
+        integer :: line_byte_len
328328
 
329329
         line = buffer_get_line(buffer, line_num)
330
-        line_len = len(line)
330
+        line_byte_len = len(line)
331
+        char_count = utf8_char_count(line)
331332
 
332
-        ! Get all search matches on this line
333
+        ! Get all search matches on this line (these use byte indices)
333334
         if (search_mode_active) then
334335
             call get_matches_on_line(line, line_num, search_matches, num_search_matches)
335336
         else
336337
             num_search_matches = 0
337338
         end if
338339
 
339
-        ! Get syntax tokens for this line
340
+        ! Get syntax tokens for this line (tokens use byte indices)
340341
         if (syntax_highlighter%enabled) then
341342
             call tokenize_line(syntax_highlighter, line, tokens)
342343
         else
343344
             allocate(tokens(1))
344345
             tokens(1)%type = TOKEN_PLAIN
345346
             tokens(1)%start_col = 1
346
-            tokens(1)%end_col = max(1, line_len)
347
+            tokens(1)%end_col = max(1, line_byte_len)
347348
         end if
348349
 
349350
         ! Check if this is the current line
350351
         is_current_line = (line_num == editor%cursors(editor%active_cursor)%line) .and. highlight_current_line
351352
 
352
-        ! Render each character with selection highlighting
353
-        do col = start_col, min(start_col + width - 1, line_len + 1)
353
+        ! Render each UTF-8 character with selection highlighting
354
+        ! char_idx = 1-based character index (for selection logic)
355
+        ! display_col = screen column position (for width tracking)
356
+        ! byte_pos = byte position in string (for token lookup)
357
+        display_col = 0
358
+        char_idx = start_col
359
+
360
+        do while (char_idx <= char_count .and. display_col < width)
354361
             in_selection = .false.
355362
             is_bracket_match = .false.
356363
 
364
+            ! Get the UTF-8 character at this position
365
+            utf8_ch = utf8_char_at(line, char_idx)
366
+            char_width = utf8_display_width(utf8_ch)
367
+
368
+            ! Get byte position for token lookup
369
+            byte_pos = utf8_char_to_byte_index(line, char_idx)
370
+
357371
             ! Check if this position is in any cursor's selection
372
+            ! (cursor positions are character indices, not byte indices)
358373
             do i = 1, size(editor%cursors)
359374
                 if (editor%cursors(i)%has_selection) then
360375
                     ! Determine selection bounds (handle both directions)
@@ -374,26 +389,26 @@ contains
374389
                         sel_end_col = editor%cursors(i)%column
375390
                     end if
376391
 
377
-                    ! Check if this position is selected
392
+                    ! Check if this position is selected (using char_idx)
378393
                     if (line_num > sel_start_line .and. line_num < sel_end_line) then
379394
                         ! Fully selected line (between start and end)
380395
                         in_selection = .true.
381396
                         exit
382397
                     else if (line_num == sel_start_line .and. line_num == sel_end_line) then
383398
                         ! Single-line selection
384
-                        if (col >= sel_start_col .and. col < sel_end_col) then
399
+                        if (char_idx >= sel_start_col .and. char_idx < sel_end_col) then
385400
                             in_selection = .true.
386401
                             exit
387402
                         end if
388403
                     else if (line_num == sel_start_line .and. line_num < sel_end_line) then
389404
                         ! First line of multi-line selection
390
-                        if (col >= sel_start_col) then
405
+                        if (char_idx >= sel_start_col) then
391406
                             in_selection = .true.
392407
                             exit
393408
                         end if
394409
                     else if (line_num == sel_end_line .and. line_num > sel_start_line) then
395410
                         ! Last line of multi-line selection
396
-                        if (col < sel_end_col) then
411
+                        if (char_idx < sel_end_col) then
397412
                             in_selection = .true.
398413
                             exit
399414
                         end if
@@ -401,26 +416,28 @@ contains
401416
                 end if
402417
             end do
403418
 
404
-            ! Check if this position is a bracket or its match
405
-            if ((line_num == bracket_line .and. col == bracket_col) .or. &
406
-                (line_num == matching_bracket_line .and. col == matching_bracket_col)) then
419
+            ! Check if this position is a bracket or its match (using char_idx)
420
+            if ((line_num == bracket_line .and. char_idx == bracket_col) .or. &
421
+                (line_num == matching_bracket_line .and. char_idx == matching_bracket_col)) then
407422
                 is_bracket_match = .true.
408423
             end if
409424
 
410
-            ! Check if this position is part of a search match
425
+            ! Check if this position is part of a search match (search uses byte indices)
411426
             is_search_match = .false.
412
-            do match_idx = 1, num_search_matches
413
-                if (col >= search_matches(1, match_idx) .and. col <= search_matches(2, match_idx)) then
414
-                    is_search_match = .true.
415
-                    exit
416
-                end if
417
-            end do
427
+            if (byte_pos > 0) then
428
+                do match_idx = 1, num_search_matches
429
+                    if (byte_pos >= search_matches(1, match_idx) .and. byte_pos <= search_matches(2, match_idx)) then
430
+                        is_search_match = .true.
431
+                        exit
432
+                    end if
433
+                end do
434
+            end if
418435
 
419
-            ! Find which token this column belongs to
436
+            ! Find which token this column belongs to (tokens use byte indices)
420437
             token_color = ""
421
-            if (syntax_highlighter%enabled) then
438
+            if (syntax_highlighter%enabled .and. byte_pos > 0) then
422439
                 do token_idx = 1, size(tokens)
423
-                    if (col >= tokens(token_idx)%start_col .and. col <= tokens(token_idx)%end_col) then
440
+                    if (byte_pos >= tokens(token_idx)%start_col .and. byte_pos <= tokens(token_idx)%end_col) then
424441
                         token_color = get_token_color(tokens(token_idx)%type)
425442
                         exit
426443
                     end if
@@ -428,42 +445,39 @@ contains
428445
             end if
429446
 
430447
             ! Render character with or without highlighting
431
-            if (col <= line_len) then
432
-                ch = line(col:col)
433
-            else
434
-                ch = ' '
435
-            end if
436
-
437448
             if (in_selection) then
438449
                 ! Highlight selected text with reverse video (highest priority)
439
-                call terminal_write(char(27) // '[7m' // ch // char(27) // '[0m')
450
+                call terminal_write(char(27) // '[7m' // utf8_ch // char(27) // '[0m')
440451
             else if (is_bracket_match) then
441452
                 ! Highlight matching brackets with cyan background
442
-                call terminal_write(char(27) // '[46m' // ch // char(27) // '[0m')
453
+                call terminal_write(char(27) // '[46m' // utf8_ch // char(27) // '[0m')
443454
             else if (is_search_match) then
444455
                 ! Highlight search matches with yellow background
445456
                 if (len(token_color) > 0) then
446
-                    call terminal_write(token_color // char(27) // '[43m' // ch // char(27) // '[0m')
457
+                    call terminal_write(token_color // char(27) // '[43m' // utf8_ch // char(27) // '[0m')
447458
                 else
448
-                    call terminal_write(char(27) // '[43m' // ch // char(27) // '[0m')
459
+                    call terminal_write(char(27) // '[43m' // utf8_ch // char(27) // '[0m')
449460
                 end if
450461
             else if (is_current_line) then
451462
                 ! Subtle background for current line with syntax color
452463
                 if (len(token_color) > 0) then
453
-                    call terminal_write(token_color // char(27) // '[48;5;236m' // ch // char(27) // '[0m')
464
+                    call terminal_write(token_color // char(27) // '[48;5;236m' // utf8_ch // char(27) // '[0m')
454465
                 else
455
-                    call terminal_write(char(27) // '[48;5;236m' // ch // char(27) // '[0m')
466
+                    call terminal_write(char(27) // '[48;5;236m' // utf8_ch // char(27) // '[0m')
456467
                 end if
457468
             else if (len(token_color) > 0) then
458469
                 ! Apply syntax highlighting
459
-                call terminal_write(token_color // ch // char(27) // '[0m')
470
+                call terminal_write(token_color // utf8_ch // char(27) // '[0m')
460471
             else
461
-                call terminal_write(ch)
472
+                call terminal_write(utf8_ch)
462473
             end if
474
+
475
+            display_col = display_col + char_width
476
+            char_idx = char_idx + 1
463477
         end do
464478
 
465479
         ! Fill remaining width with spaces
466
-        do col = max(line_len + 1, start_col), start_col + width - 1
480
+        do while (display_col < width)
467481
             in_selection = .false.
468482
 
469483
             ! Check if end of line position is in selection
@@ -485,25 +499,26 @@ contains
485499
                     end if
486500
 
487501
                     ! Check if this position is selected (multi-line aware)
502
+                    ! Use char_idx which is now past end of line content
488503
                     if (line_num > sel_start_line .and. line_num < sel_end_line) then
489504
                         ! Fully selected line
490505
                         in_selection = .true.
491506
                         exit
492507
                     else if (line_num == sel_start_line .and. line_num == sel_end_line) then
493508
                         ! Single-line selection
494
-                        if (col >= sel_start_col .and. col < sel_end_col) then
509
+                        if (char_idx >= sel_start_col .and. char_idx < sel_end_col) then
495510
                             in_selection = .true.
496511
                             exit
497512
                         end if
498513
                     else if (line_num == sel_start_line .and. line_num < sel_end_line) then
499514
                         ! First line of multi-line selection
500
-                        if (col >= sel_start_col) then
515
+                        if (char_idx >= sel_start_col) then
501516
                             in_selection = .true.
502517
                             exit
503518
                         end if
504519
                     else if (line_num == sel_end_line .and. line_num > sel_start_line) then
505520
                         ! Last line of multi-line selection
506
-                        if (col < sel_end_col) then
521
+                        if (char_idx < sel_end_col) then
507522
                             in_selection = .true.
508523
                             exit
509524
                         end if
@@ -518,9 +533,12 @@ contains
518533
             else
519534
                 call terminal_write(' ')
520535
             end if
536
+            display_col = display_col + 1
537
+            char_idx = char_idx + 1
521538
         end do
522539
 
523540
         if (allocated(line)) deallocate(line)
541
+        if (allocated(utf8_ch)) deallocate(utf8_ch)
524542
     end subroutine render_line_with_selections
525543
 
526544
     subroutine render_status_bar(editor, buffer, match_mode_active, match_case_sens)
@@ -1319,14 +1337,13 @@ contains
13191337
         type(buffer_t), intent(in) :: buffer
13201338
         type(editor_state_t), intent(in) :: editor
13211339
         integer, intent(in) :: pane_idx, line_num, screen_row, col, width
1322
-        character(len=:), allocatable :: line
1340
+        character(len=:), allocatable :: line, utf8_ch
13231341
         type(pane_t) :: pane
1324
-        integer :: tab_idx, start_col, end_col, i, char_col
1342
+        integer :: tab_idx, i, char_idx, char_count, display_col, char_width
13251343
         integer :: content_width, content_col
13261344
         character(len=5) :: line_num_str
13271345
         logical :: is_current_line, in_selection, is_bracket_match
13281346
         integer :: sel_start_line, sel_start_col, sel_end_line, sel_end_col
1329
-        character(len=1) :: ch
13301347
 
13311348
         tab_idx = editor%active_tab_index
13321349
         pane = editor%tabs(tab_idx)%panes(pane_idx)
@@ -1377,9 +1394,8 @@ contains
13771394
         line = buffer_get_line(buffer, line_num)
13781395
         if (.not. allocated(line)) return
13791396
 
1380
-        ! Calculate visible portion based on horizontal scroll
1381
-        start_col = pane%viewport_column
1382
-        end_col = min(start_col + content_width - 1, len(line))
1397
+        ! Get character count for UTF-8 iteration
1398
+        char_count = utf8_char_count(line)
13831399
 
13841400
         ! Check if this is the current line with a cursor
13851401
         is_current_line = .false.
@@ -1393,9 +1409,17 @@ contains
13931409
         end if
13941410
 
13951411
         ! Render the line character by character with selection highlighting
1396
-        do char_col = start_col, start_col + content_width - 1
1412
+        ! Using UTF-8 aware iteration
1413
+        display_col = 0
1414
+        char_idx = pane%viewport_column  ! Start from viewport column (character index)
1415
+
1416
+        do while (char_idx <= char_count .and. display_col < content_width)
13971417
             in_selection = .false.
13981418
 
1419
+            ! Get the UTF-8 character at this position
1420
+            utf8_ch = utf8_char_at(line, char_idx)
1421
+            char_width = utf8_display_width(utf8_ch)
1422
+
13991423
             ! Check if this position is in any cursor's selection (use pane's cursors)
14001424
             if (allocated(pane%cursors)) then
14011425
                 do i = 1, size(pane%cursors)
@@ -1417,26 +1441,26 @@ contains
14171441
                             sel_end_col = pane%cursors(i)%column
14181442
                         end if
14191443
 
1420
-                        ! Check if this position is selected
1444
+                        ! Check if this position is selected (using char_idx)
14211445
                         if (line_num > sel_start_line .and. line_num < sel_end_line) then
14221446
                             ! Fully selected line (between start and end)
14231447
                             in_selection = .true.
14241448
                             exit
14251449
                         else if (line_num == sel_start_line .and. line_num == sel_end_line) then
14261450
                             ! Single-line selection
1427
-                            if (char_col >= sel_start_col .and. char_col < sel_end_col) then
1451
+                            if (char_idx >= sel_start_col .and. char_idx < sel_end_col) then
14281452
                                 in_selection = .true.
14291453
                                 exit
14301454
                             end if
14311455
                         else if (line_num == sel_start_line .and. line_num < sel_end_line) then
14321456
                             ! First line of multi-line selection
1433
-                            if (char_col >= sel_start_col) then
1457
+                            if (char_idx >= sel_start_col) then
14341458
                                 in_selection = .true.
14351459
                                 exit
14361460
                             end if
14371461
                         else if (line_num == sel_end_line .and. line_num > sel_start_line) then
14381462
                             ! Last line of multi-line selection
1439
-                            if (char_col < sel_end_col) then
1463
+                            if (char_idx < sel_end_col) then
14401464
                                 in_selection = .true.
14411465
                                 exit
14421466
                             end if
@@ -1448,40 +1472,50 @@ contains
14481472
             ! Check if this position is a bracket or its match (only for active pane)
14491473
             is_bracket_match = .false.
14501474
             if (pane%is_active) then
1451
-                if ((line_num == bracket_line .and. char_col == bracket_col) .or. &
1452
-                    (line_num == matching_bracket_line .and. char_col == matching_bracket_col)) then
1475
+                if ((line_num == bracket_line .and. char_idx == bracket_col) .or. &
1476
+                    (line_num == matching_bracket_line .and. char_idx == matching_bracket_col)) then
14531477
                     is_bracket_match = .true.
14541478
                 end if
14551479
             end if
14561480
 
1457
-            ! Get the character at this position
1458
-            if (char_col <= len(line)) then
1459
-                ch = line(char_col:char_col)
1460
-            else
1461
-                ch = ' '
1462
-            end if
1463
-
14641481
             ! Render the character with appropriate highlighting
14651482
             if (in_selection) then
14661483
                 ! Highlight selected text with reverse video
1467
-                call terminal_write(char(27) // '[7m' // ch // char(27) // '[0m')
1484
+                call terminal_write(char(27) // '[7m' // utf8_ch // char(27) // '[0m')
14681485
             else if (is_bracket_match) then
14691486
                 ! Highlight matching brackets with cyan background
1470
-                call terminal_write(char(27) // '[46m' // ch // char(27) // '[0m')
1487
+                call terminal_write(char(27) // '[46m' // utf8_ch // char(27) // '[0m')
14711488
             else if (pane%is_active .and. is_current_line) then
14721489
                 ! Subtle background for current line in active pane
1473
-                call terminal_write(char(27) // '[48;5;237m' // ch // char(27) // '[0m')
1490
+                call terminal_write(char(27) // '[48;5;237m' // utf8_ch // char(27) // '[0m')
14741491
             else if (.not. pane%is_active) then
14751492
                 ! Inactive pane background
1476
-                call terminal_write(char(27) // '[48;5;234m' // ch // char(27) // '[0m')
1493
+                call terminal_write(char(27) // '[48;5;234m' // utf8_ch // char(27) // '[0m')
14771494
             else
14781495
                 ! Normal text
1479
-                call terminal_write(ch)
1496
+                call terminal_write(utf8_ch)
1497
+            end if
1498
+
1499
+            display_col = display_col + char_width
1500
+            char_idx = char_idx + 1
1501
+        end do
1502
+
1503
+        ! Fill remaining width with spaces
1504
+        do while (display_col < content_width)
1505
+            if (.not. pane%is_active) then
1506
+                call terminal_write(char(27) // '[48;5;234m ' // char(27) // '[0m')
1507
+            else if (is_current_line) then
1508
+                call terminal_write(char(27) // '[48;5;237m ' // char(27) // '[0m')
1509
+            else
1510
+                call terminal_write(' ')
14801511
             end if
1512
+            display_col = display_col + 1
14811513
         end do
14821514
 
14831515
         ! Reset attributes
14841516
         call terminal_write(char(27) // '[0m')
1517
+
1518
+        if (allocated(utf8_ch)) deallocate(utf8_ch)
14851519
     end subroutine render_buffer_line_in_pane
14861520
 
14871521
     subroutine render_pane_separator(col, start_row, height)
@@ -1879,13 +1913,11 @@ contains
18791913
             end if
18801914
         case ("symbols")
18811915
             if (is_symbols_panel_visible(editor%symbols_panel)) then
1882
-                call render_lsp_symbols_panel(editor%symbols_panel, panel_start_col, panel_width, &
1883
-                                              2, editor%screen_rows - 1)
1916
+                call render_lsp_symbols_panel(editor%symbols_panel, editor%screen_rows - 1)
18841917
             end if
18851918
         case ("workspace_symbols")
18861919
             if (is_workspace_symbols_panel_visible(editor%workspace_symbols_panel)) then
1887
-                call render_lsp_workspace_symbols_panel(editor%workspace_symbols_panel, panel_start_col, &
1888
-                                                        panel_width, 2, editor%screen_rows - 1)
1920
+                call render_lsp_workspace_symbols_panel(editor%workspace_symbols_panel, editor%screen_rows - 1)
18891921
             end if
18901922
         end select
18911923
 
@@ -2130,25 +2162,25 @@ contains
21302162
     end subroutine render_lsp_references_panel
21312163
 
21322164
     ! Render symbols panel in offcanvas mode (right side, full height)
2133
-    subroutine render_lsp_symbols_panel(panel, start_col, width, start_row, end_row)
2165
+    subroutine render_lsp_symbols_panel(panel, screen_height)
21342166
         use symbols_panel_module, only: symbols_panel_t, render_symbols_panel
21352167
         type(symbols_panel_t), intent(in) :: panel
2136
-        integer, intent(in) :: start_col, width, start_row, end_row
2168
+        integer, intent(in) :: screen_height
21372169
 
21382170
         ! Delegate to the real symbols panel renderer
21392171
         ! The panel manages its own positioning via panel_start_col and panel_width
2140
-        call render_symbols_panel(panel, end_row)
2172
+        call render_symbols_panel(panel, screen_height)
21412173
     end subroutine render_lsp_symbols_panel
21422174
 
21432175
     ! Render workspace symbols panel in offcanvas mode (right side, full height)
2144
-    subroutine render_lsp_workspace_symbols_panel(panel, start_col, width, start_row, end_row)
2176
+    subroutine render_lsp_workspace_symbols_panel(panel, screen_height)
21452177
         use workspace_symbols_panel_module, only: workspace_symbols_panel_t, render_workspace_symbols_panel
21462178
         type(workspace_symbols_panel_t), intent(in) :: panel
2147
-        integer, intent(in) :: start_col, width, start_row, end_row
2179
+        integer, intent(in) :: screen_height
21482180
 
21492181
         ! Delegate to the real workspace symbols panel renderer
21502182
         ! The panel manages its own positioning via panel_start_col and panel_width
2151
-        call render_workspace_symbols_panel(panel, end_row)
2183
+        call render_workspace_symbols_panel(panel, screen_height)
21522184
     end subroutine render_lsp_workspace_symbols_panel
21532185
 
21542186
     ! Helper function to extract basename from path