Rust · 1017 bytes Raw Blame History
1 #![allow(dead_code)]
2
3 use unicode_segmentation::UnicodeSegmentation;
4 use unicode_width::UnicodeWidthStr;
5
6 /// Get the display width of a string (handling wide chars like CJK)
7 pub fn display_width(s: &str) -> usize {
8 UnicodeWidthStr::width(s)
9 }
10
11 /// Count grapheme clusters in a string
12 pub fn grapheme_count(s: &str) -> usize {
13 s.graphemes(true).count()
14 }
15
16 /// Get the nth grapheme from a string
17 pub fn nth_grapheme(s: &str, n: usize) -> Option<&str> {
18 s.graphemes(true).nth(n)
19 }
20
21 /// Convert a grapheme index to byte offset
22 pub fn grapheme_to_byte_offset(s: &str, grapheme_idx: usize) -> usize {
23 s.graphemes(true)
24 .take(grapheme_idx)
25 .map(|g| g.len())
26 .sum()
27 }
28
29 /// Convert a byte offset to grapheme index
30 pub fn byte_to_grapheme_offset(s: &str, byte_idx: usize) -> usize {
31 let mut count = 0;
32 let mut bytes = 0;
33 for g in s.graphemes(true) {
34 if bytes >= byte_idx {
35 break;
36 }
37 bytes += g.len();
38 count += 1;
39 }
40 count
41 }
42