Rust · 995 bytes Raw Blame History
1 use unicode_segmentation::UnicodeSegmentation;
2 use unicode_width::UnicodeWidthStr;
3
4 /// Get the display width of a string (handling wide chars like CJK)
5 pub fn display_width(s: &str) -> usize {
6 UnicodeWidthStr::width(s)
7 }
8
9 /// Count grapheme clusters in a string
10 pub fn grapheme_count(s: &str) -> usize {
11 s.graphemes(true).count()
12 }
13
14 /// Get the nth grapheme from a string
15 pub fn nth_grapheme(s: &str, n: usize) -> Option<&str> {
16 s.graphemes(true).nth(n)
17 }
18
19 /// Convert a grapheme index to byte offset
20 pub fn grapheme_to_byte_offset(s: &str, grapheme_idx: usize) -> usize {
21 s.graphemes(true)
22 .take(grapheme_idx)
23 .map(|g| g.len())
24 .sum()
25 }
26
27 /// Convert a byte offset to grapheme index
28 pub fn byte_to_grapheme_offset(s: &str, byte_idx: usize) -> usize {
29 let mut count = 0;
30 let mut bytes = 0;
31 for g in s.graphemes(true) {
32 if bytes >= byte_idx {
33 break;
34 }
35 bytes += g.len();
36 count += 1;
37 }
38 count
39 }
40