Fortran · 6259 bytes Raw Blame History
1 module wcwidth_mod
2 implicit none
3 private
4
5 public :: codepoint_width
6
7 contains
8
9 ! Determine display width of a Unicode codepoint
10 ! Returns 0 for zero-width, 1 for normal, 2 for wide characters
11 function codepoint_width(cp) result(w)
12 integer, intent(in) :: cp
13 integer :: w
14
15 w = 1 ! Default width
16
17 ! Zero-width characters
18 if (cp == 0) then
19 w = 0
20 return
21 end if
22
23 ! C0/C1 control characters are zero-width
24 if (cp < 32 .or. (cp >= 127 .and. cp < 160)) then
25 w = 0
26 return
27 end if
28
29 ! Combining characters (selected ranges)
30 ! Combining Diacritical Marks: U+0300-U+036F
31 if (cp >= int(z'0300') .and. cp <= int(z'036F')) then
32 w = 0
33 return
34 end if
35
36 ! Combining Diacritical Marks Extended: U+1AB0-U+1AFF
37 if (cp >= int(z'1AB0') .and. cp <= int(z'1AFF')) then
38 w = 0
39 return
40 end if
41
42 ! Combining Diacritical Marks Supplement: U+1DC0-U+1DFF
43 if (cp >= int(z'1DC0') .and. cp <= int(z'1DFF')) then
44 w = 0
45 return
46 end if
47
48 ! Combining Diacritical Marks for Symbols: U+20D0-U+20FF
49 if (cp >= int(z'20D0') .and. cp <= int(z'20FF')) then
50 w = 0
51 return
52 end if
53
54 ! Variation Selectors: U+FE00-U+FE0F
55 if (cp >= int(z'FE00') .and. cp <= int(z'FE0F')) then
56 w = 0
57 return
58 end if
59
60 ! Zero Width Joiner/Non-Joiner: U+200B-U+200D
61 if (cp >= int(z'200B') .and. cp <= int(z'200D')) then
62 w = 0
63 return
64 end if
65
66 ! Soft Hyphen: U+00AD
67 if (cp == int(z'00AD')) then
68 w = 0
69 return
70 end if
71
72 ! Wide character ranges (East Asian Width: Wide or Fullwidth)
73
74 ! CJK Unified Ideographs: U+4E00-U+9FFF
75 if (cp >= int(z'4E00') .and. cp <= int(z'9FFF')) then
76 w = 2
77 return
78 end if
79
80 ! CJK Unified Ideographs Extension A: U+3400-U+4DBF
81 if (cp >= int(z'3400') .and. cp <= int(z'4DBF')) then
82 w = 2
83 return
84 end if
85
86 ! CJK Unified Ideographs Extension B: U+20000-U+2A6DF
87 if (cp >= int(z'20000') .and. cp <= int(z'2A6DF')) then
88 w = 2
89 return
90 end if
91
92 ! CJK Compatibility Ideographs: U+F900-U+FAFF
93 if (cp >= int(z'F900') .and. cp <= int(z'FAFF')) then
94 w = 2
95 return
96 end if
97
98 ! CJK Compatibility Ideographs Supplement: U+2F800-U+2FA1F
99 if (cp >= int(z'2F800') .and. cp <= int(z'2FA1F')) then
100 w = 2
101 return
102 end if
103
104 ! Hangul Syllables: U+AC00-U+D7AF
105 if (cp >= int(z'AC00') .and. cp <= int(z'D7AF')) then
106 w = 2
107 return
108 end if
109
110 ! Hangul Jamo: U+1100-U+11FF
111 if (cp >= int(z'1100') .and. cp <= int(z'11FF')) then
112 w = 2
113 return
114 end if
115
116 ! Hangul Compatibility Jamo: U+3130-U+318F
117 if (cp >= int(z'3130') .and. cp <= int(z'318F')) then
118 w = 2
119 return
120 end if
121
122 ! Hangul Jamo Extended-A: U+A960-U+A97F
123 if (cp >= int(z'A960') .and. cp <= int(z'A97F')) then
124 w = 2
125 return
126 end if
127
128 ! Hangul Jamo Extended-B: U+D7B0-U+D7FF
129 if (cp >= int(z'D7B0') .and. cp <= int(z'D7FF')) then
130 w = 2
131 return
132 end if
133
134 ! CJK Radicals Supplement: U+2E80-U+2EFF
135 if (cp >= int(z'2E80') .and. cp <= int(z'2EFF')) then
136 w = 2
137 return
138 end if
139
140 ! Kangxi Radicals: U+2F00-U+2FDF
141 if (cp >= int(z'2F00') .and. cp <= int(z'2FDF')) then
142 w = 2
143 return
144 end if
145
146 ! CJK Symbols and Punctuation: U+3000-U+303F
147 if (cp >= int(z'3000') .and. cp <= int(z'303F')) then
148 w = 2
149 return
150 end if
151
152 ! Hiragana: U+3040-U+309F
153 if (cp >= int(z'3040') .and. cp <= int(z'309F')) then
154 w = 2
155 return
156 end if
157
158 ! Katakana: U+30A0-U+30FF
159 if (cp >= int(z'30A0') .and. cp <= int(z'30FF')) then
160 w = 2
161 return
162 end if
163
164 ! Katakana Phonetic Extensions: U+31F0-U+31FF
165 if (cp >= int(z'31F0') .and. cp <= int(z'31FF')) then
166 w = 2
167 return
168 end if
169
170 ! Bopomofo: U+3100-U+312F
171 if (cp >= int(z'3100') .and. cp <= int(z'312F')) then
172 w = 2
173 return
174 end if
175
176 ! Bopomofo Extended: U+31A0-U+31BF
177 if (cp >= int(z'31A0') .and. cp <= int(z'31BF')) then
178 w = 2
179 return
180 end if
181
182 ! CJK Strokes: U+31C0-U+31EF
183 if (cp >= int(z'31C0') .and. cp <= int(z'31EF')) then
184 w = 2
185 return
186 end if
187
188 ! Enclosed CJK Letters and Months: U+3200-U+32FF
189 if (cp >= int(z'3200') .and. cp <= int(z'32FF')) then
190 w = 2
191 return
192 end if
193
194 ! CJK Compatibility: U+3300-U+33FF
195 if (cp >= int(z'3300') .and. cp <= int(z'33FF')) then
196 w = 2
197 return
198 end if
199
200 ! Fullwidth ASCII: U+FF01-U+FF60
201 if (cp >= int(z'FF01') .and. cp <= int(z'FF60')) then
202 w = 2
203 return
204 end if
205
206 ! Fullwidth Punctuation: U+FFE0-U+FFE6
207 if (cp >= int(z'FFE0') .and. cp <= int(z'FFE6')) then
208 w = 2
209 return
210 end if
211
212 ! Halfwidth Katakana: U+FF65-U+FFDC (these are actually width 1)
213 if (cp >= int(z'FF65') .and. cp <= int(z'FFDC')) then
214 w = 1
215 return
216 end if
217
218 ! Yi Syllables: U+A000-U+A48F
219 if (cp >= int(z'A000') .and. cp <= int(z'A48F')) then
220 w = 2
221 return
222 end if
223
224 ! Yi Radicals: U+A490-U+A4CF
225 if (cp >= int(z'A490') .and. cp <= int(z'A4CF')) then
226 w = 2
227 return
228 end if
229
230 ! Emoji (most are wide)
231 ! Miscellaneous Symbols and Pictographs: U+1F300-U+1F5FF
232 if (cp >= int(z'1F300') .and. cp <= int(z'1F5FF')) then
233 w = 2
234 return
235 end if
236
237 ! Emoticons: U+1F600-U+1F64F
238 if (cp >= int(z'1F600') .and. cp <= int(z'1F64F')) then
239 w = 2
240 return
241 end if
242
243 ! Transport and Map Symbols: U+1F680-U+1F6FF
244 if (cp >= int(z'1F680') .and. cp <= int(z'1F6FF')) then
245 w = 2
246 return
247 end if
248
249 ! Supplemental Symbols and Pictographs: U+1F900-U+1F9FF
250 if (cp >= int(z'1F900') .and. cp <= int(z'1F9FF')) then
251 w = 2
252 return
253 end if
254
255 ! Symbols and Pictographs Extended-A: U+1FA00-U+1FA6F
256 if (cp >= int(z'1FA00') .and. cp <= int(z'1FA6F')) then
257 w = 2
258 return
259 end if
260
261 ! Symbols and Pictographs Extended-B: U+1FA70-U+1FAFF
262 if (cp >= int(z'1FA70') .and. cp <= int(z'1FAFF')) then
263 w = 2
264 return
265 end if
266
267 ! Default: width 1
268 w = 1
269 end function codepoint_width
270
271 end module wcwidth_mod
272