10 #define UTF_LAST_BLOCK (281)
11 #define UTF_BLOCKS_COUNT RZ_ARRAY_SIZE(utf_blocks)
12 #define UTF_NONPRINTABLE_RANGES_COUNT RZ_ARRAY_SIZE(nonprintable_ranges)
15 { 0x0000, 0x001F }, { 0x007F, 0x009F }, { 0x034F, 0x034F },
16 { 0x0378, 0x0379 }, { 0x037F, 0x0383 }, { 0x038B, 0x038B },
17 { 0x038D, 0x038D }, { 0x03A2, 0x03A2 }, { 0x0528, 0x0530 },
18 { 0x0557, 0x0558 }, { 0x0560, 0x0560 }, { 0x0588, 0x0588 },
19 { 0x058B, 0x058E }, { 0x0590, 0x0590 }, { 0x05C8, 0x05CF },
20 { 0x05EB, 0x05EF }, { 0x05F5, 0x0605 }, { 0x061C, 0x061D },
21 { 0x06DD, 0x06DD }, { 0x070E, 0x070F }, { 0x074B, 0x074C },
22 { 0x07B2, 0x07BF }, { 0x07FB, 0x07FF }, { 0x082E, 0x082F },
23 { 0x083F, 0x083F }, { 0x085C, 0x085D }, { 0x085F, 0x089F },
24 { 0x08A1, 0x08A1 }, { 0x08AD, 0x08E3 }, { 0x08FF, 0x08FF },
25 { 0x0978, 0x0978 }, { 0x0980, 0x0980 }, { 0x0984, 0x0984 },
26 { 0x098D, 0x098E }, { 0x0991, 0x0992 }, { 0x09A9, 0x09A9 },
27 { 0x09B1, 0x09B1 }, { 0x09B3, 0x09B5 }, { 0x09BA, 0x09BB },
28 { 0x09C5, 0x09C6 }, { 0x09C9, 0x09CA }, { 0x09CF, 0x09D6 },
29 { 0x09D8, 0x09DB }, { 0x09DE, 0x09DE }, { 0x09E4, 0x09E5 },
30 { 0x09FC, 0x0A00 }, { 0x0A04, 0x0A04 }, { 0x0A0B, 0x0A0E },
31 { 0x0A11, 0x0A12 }, { 0x0A29, 0x0A29 }, { 0x0A31, 0x0A31 },
32 { 0x0A34, 0x0A34 }, { 0x0A37, 0x0A37 }, { 0x0A3A, 0x0A3B },
33 { 0x0A3D, 0x0A3D }, { 0x0A43, 0x0A46 }, { 0x0A49, 0x0A4A },
34 { 0x0A4E, 0x0A50 }, { 0x0A52, 0x0A58 }, { 0x0A5D, 0x0A5D },
35 { 0x0A5F, 0x0A65 }, { 0x0A76, 0x0A80 }, { 0x0A84, 0x0A84 },
36 { 0x0A8E, 0x0A8E }, { 0x0A92, 0x0A92 }, { 0x0AA9, 0x0AA9 },
37 { 0x0AB1, 0x0AB1 }, { 0x0AB4, 0x0AB4 }, { 0x0ABA, 0x0ABB },
38 { 0x0AC6, 0x0AC6 }, { 0x0ACA, 0x0ACA }, { 0x0ACE, 0x0ACF },
39 { 0x0AD1, 0x0ADF }, { 0x0AE4, 0x0AE5 }, { 0x0AF2, 0x0B00 },
40 { 0x0B04, 0x0B04 }, { 0x0B0D, 0x0B0E }, { 0x0B11, 0x0B12 },
41 { 0x0B29, 0x0B29 }, { 0x0B31, 0x0B31 }, { 0x0B34, 0x0B34 },
42 { 0x0B3A, 0x0B3B }, { 0x0B45, 0x0B46 }, { 0x0B49, 0x0B4A },
43 { 0x0B4E, 0x0B55 }, { 0x0B58, 0x0B5B }, { 0x0B5E, 0x0B5E },
44 { 0x0B64, 0x0B65 }, { 0x0B78, 0x0B81 }, { 0x0B84, 0x0B84 },
45 { 0x0B8B, 0x0B8D }, { 0x0B91, 0x0B91 }, { 0x0B96, 0x0B98 },
46 { 0x0B9B, 0x0B9B }, { 0x0B9D, 0x0B9D }, { 0x0BA0, 0x0BA2 },
47 { 0x0BA5, 0x0BA7 }, { 0x0BAB, 0x0BAD }, { 0x0BBA, 0x0BBD },
48 { 0x0BC3, 0x0BC5 }, { 0x0BC9, 0x0BC9 }, { 0x0BCE, 0x0BCF },
49 { 0x0BD1, 0x0BD6 }, { 0x0BD8, 0x0BE5 }, { 0x0BFB, 0x0C00 },
50 { 0x0C04, 0x0C04 }, { 0x0C0D, 0x0C0D }, { 0x0C11, 0x0C11 },
51 { 0x0C29, 0x0C29 }, { 0x0C34, 0x0C34 }, { 0x0C3A, 0x0C3C },
52 { 0x0C45, 0x0C45 }, { 0x0C49, 0x0C49 }, { 0x0C4E, 0x0C54 },
53 { 0x0C57, 0x0C57 }, { 0x0C5A, 0x0C5F }, { 0x0C64, 0x0C65 },
54 { 0x0C70, 0x0C77 }, { 0x0C80, 0x0C81 }, { 0x0C84, 0x0C84 },
55 { 0x0C8D, 0x0C8D }, { 0x0C91, 0x0C91 }, { 0x0CA9, 0x0CA9 },
56 { 0x0CB4, 0x0CB4 }, { 0x0CBA, 0x0CBB }, { 0x0CC5, 0x0CC5 },
57 { 0x0CC9, 0x0CC9 }, { 0x0CCE, 0x0CD4 }, { 0x0CD7, 0x0CDD },
58 { 0x0CDF, 0x0CDF }, { 0x0CE4, 0x0CE5 }, { 0x0CF0, 0x0CF0 },
59 { 0x0CF3, 0x0D01 }, { 0x0D04, 0x0D04 }, { 0x0D0D, 0x0D0D },
60 { 0x0D11, 0x0D11 }, { 0x0D3B, 0x0D3C }, { 0x0D45, 0x0D45 },
61 { 0x0D49, 0x0D49 }, { 0x0D4F, 0x0D56 }, { 0x0D58, 0x0D5F },
62 { 0x0D64, 0x0D65 }, { 0x0D76, 0x0D78 }, { 0x0D80, 0x0D81 },
63 { 0x0D84, 0x0D84 }, { 0x0D97, 0x0D99 }, { 0x0DB2, 0x0DB2 },
64 { 0x0DBC, 0x0DBC }, { 0x0DBE, 0x0DBF }, { 0x0DC7, 0x0DC9 },
65 { 0x0DCB, 0x0DCE }, { 0x0DD5, 0x0DD5 }, { 0x0DD7, 0x0DD7 },
66 { 0x0DE0, 0x0DF1 }, { 0x0DF5, 0x0E00 }, { 0x0E3B, 0x0E3E },
67 { 0x0E5C, 0x0E80 }, { 0x0E83, 0x0E83 }, { 0x0E85, 0x0E86 },
68 { 0x0E89, 0x0E89 }, { 0x0E8B, 0x0E8C }, { 0x0E8E, 0x0E93 },
69 { 0x0E98, 0x0E98 }, { 0x0EA0, 0x0EA0 }, { 0x0EA4, 0x0EA4 },
70 { 0x0EA6, 0x0EA6 }, { 0x0EA8, 0x0EA9 }, { 0x0EAC, 0x0EAC },
71 { 0x0EBA, 0x0EBA }, { 0x0EBE, 0x0EBF }, { 0x0EC5, 0x0EC5 },
72 { 0x0EC7, 0x0EC7 }, { 0x0ECE, 0x0ECF }, { 0x0EDA, 0x0EDB },
73 { 0x0EE0, 0x0EFF }, { 0x0F48, 0x0F48 }, { 0x0F6D, 0x0F70 },
74 { 0x0F98, 0x0F98 }, { 0x0FBD, 0x0FBD }, { 0x0FCD, 0x0FCD },
75 { 0x0FDB, 0x0FFF }, { 0x10C6, 0x10C6 }, { 0x10C8, 0x10CC },
76 { 0x10CE, 0x10CF }, { 0x115F, 0x1160 }, { 0x1249, 0x1249 },
77 { 0x124E, 0x124F }, { 0x1257, 0x1257 }, { 0x1259, 0x1259 },
78 { 0x125E, 0x125F }, { 0x1289, 0x1289 }, { 0x128E, 0x128F },
79 { 0x12B1, 0x12B1 }, { 0x12B6, 0x12B7 }, { 0x12BF, 0x12BF },
80 { 0x12C1, 0x12C1 }, { 0x12C6, 0x12C7 }, { 0x12D7, 0x12D7 },
81 { 0x1311, 0x1311 }, { 0x1316, 0x1317 }, { 0x135B, 0x135C },
82 { 0x137D, 0x137F }, { 0x139A, 0x139F }, { 0x13F5, 0x13FF },
83 { 0x169D, 0x169F }, { 0x16F1, 0x16FF }, { 0x170D, 0x170D },
84 { 0x1715, 0x171F }, { 0x1737, 0x173F }, { 0x1754, 0x175F },
85 { 0x176D, 0x176D }, { 0x1771, 0x1771 }, { 0x1774, 0x177F },
86 { 0x17B4, 0x17B5 }, { 0x17DE, 0x17DF }, { 0x17EA, 0x17EF },
87 { 0x17FA, 0x17FF }, { 0x180B, 0x180D }, { 0x180F, 0x180F },
88 { 0x181A, 0x181F }, { 0x1878, 0x187F }, { 0x18AB, 0x18AF },
89 { 0x18F6, 0x18FF }, { 0x191D, 0x191F }, { 0x192C, 0x192F },
90 { 0x193C, 0x193F }, { 0x1941, 0x1943 }, { 0x196E, 0x196F },
91 { 0x1975, 0x197F }, { 0x19AC, 0x19AF }, { 0x19CA, 0x19CF },
92 { 0x19DB, 0x19DD }, { 0x1A1C, 0x1A1D }, { 0x1A5F, 0x1A5F },
93 { 0x1A7D, 0x1A7E }, { 0x1A8A, 0x1A8F }, { 0x1A9A, 0x1A9F },
94 { 0x1AAE, 0x1AFF }, { 0x1B4C, 0x1B4F }, { 0x1B7D, 0x1B7F },
95 { 0x1BF4, 0x1BFB }, { 0x1C38, 0x1C3A }, { 0x1C4A, 0x1C4C },
96 { 0x1C80, 0x1CBF }, { 0x1CC8, 0x1CCF }, { 0x1CF7, 0x1CFF },
97 { 0x1DE7, 0x1DFB }, { 0x1F16, 0x1F17 }, { 0x1F1E, 0x1F1F },
98 { 0x1F46, 0x1F47 }, { 0x1F4E, 0x1F4F }, { 0x1F58, 0x1F58 },
99 { 0x1F5A, 0x1F5A }, { 0x1F5C, 0x1F5C }, { 0x1F5E, 0x1F5E },
100 { 0x1F7E, 0x1F7F }, { 0x1FB5, 0x1FB5 }, { 0x1FC5, 0x1FC5 },
101 { 0x1FD4, 0x1FD5 }, { 0x1FDC, 0x1FDC }, { 0x1FF0, 0x1FF1 },
102 { 0x1FF5, 0x1FF5 }, { 0x1FFF, 0x1FFF }, { 0x200B, 0x200F },
103 { 0x202A, 0x202E }, { 0x2060, 0x206F }, { 0x2072, 0x2073 },
104 { 0x208F, 0x208F }, { 0x209D, 0x209F }, { 0x20BB, 0x20CF },
105 { 0x20F1, 0x20FF }, { 0x218A, 0x218F }, { 0x23F4, 0x23FF },
106 { 0x2427, 0x243F }, { 0x244B, 0x245F }, { 0x2700, 0x2700 },
107 { 0x2B4D, 0x2B4F }, { 0x2B5A, 0x2BFF }, { 0x2C2F, 0x2C2F },
108 { 0x2C5F, 0x2C5F }, { 0x2CF4, 0x2CF8 }, { 0x2D26, 0x2D26 },
109 { 0x2D28, 0x2D2C }, { 0x2D2E, 0x2D2F }, { 0x2D68, 0x2D6E },
110 { 0x2D71, 0x2D7E }, { 0x2D97, 0x2D9F }, { 0x2DA7, 0x2DA7 },
111 { 0x2DAF, 0x2DAF }, { 0x2DB7, 0x2DB7 }, { 0x2DBF, 0x2DBF },
112 { 0x2DC7, 0x2DC7 }, { 0x2DCF, 0x2DCF }, { 0x2DD7, 0x2DD7 },
113 { 0x2DDF, 0x2DDF }, { 0x2E3C, 0x2E7F }, { 0x2E9A, 0x2E9A },
114 { 0x2EF4, 0x2EFF }, { 0x2FD6, 0x2FEF }, { 0x2FFC, 0x2FFF },
115 { 0x3040, 0x3040 }, { 0x3097, 0x3098 }, { 0x3100, 0x3104 },
116 { 0x312E, 0x3130 }, { 0x3164, 0x3164 }, { 0x318F, 0x318F },
117 { 0x31BB, 0x31BF }, { 0x31E4, 0x31EF }, { 0x321F, 0x321F },
118 { 0x32FF, 0x32FF }, { 0x4DB6, 0x4DBF }, { 0x9FCD, 0x9FFF },
119 { 0xA48D, 0xA48F }, { 0xA4C7, 0xA4CF }, { 0xA62C, 0xA63F },
120 { 0xA698, 0xA69E }, { 0xA6F8, 0xA6FF }, { 0xA78F, 0xA78F },
121 { 0xA794, 0xA79F }, { 0xA7AB, 0xA7F7 }, { 0xA82C, 0xA82F },
122 { 0xA83A, 0xA83F }, { 0xA878, 0xA87F }, { 0xA8C5, 0xA8CD },
123 { 0xA8DA, 0xA8DF }, { 0xA8FC, 0xA8FF }, { 0xA954, 0xA95E },
124 { 0xA97D, 0xA97F }, { 0xA9CE, 0xA9CE }, { 0xA9DA, 0xA9DD },
125 { 0xA9E0, 0xA9FF }, { 0xAA37, 0xAA3F }, { 0xAA4E, 0xAA4F },
126 { 0xAA5A, 0xAA5B }, { 0xAA7C, 0xAA7F }, { 0xAAC3, 0xAADA },
127 { 0xAAF7, 0xAB00 }, { 0xAB07, 0xAB08 }, { 0xAB0F, 0xAB10 },
128 { 0xAB17, 0xAB1F }, { 0xAB27, 0xAB27 }, { 0xAB2F, 0xABBF },
129 { 0xABEE, 0xABEF }, { 0xABFA, 0xABFF }, { 0xD7A4, 0xD7AF },
130 { 0xD7C7, 0xD7CA }, { 0xD7FC, 0xDFFF }, { 0xFA6E, 0xFA6F },
131 { 0xFADA, 0xFAFF }, { 0xFB07, 0xFB12 }, { 0xFB18, 0xFB1C },
132 { 0xFB37, 0xFB37 }, { 0xFB3D, 0xFB3D }, { 0xFB3F, 0xFB3F },
133 { 0xFB42, 0xFB42 }, { 0xFB45, 0xFB45 }, { 0xFBC2, 0xFBD2 },
134 { 0xFD40, 0xFD4F }, { 0xFD90, 0xFD91 }, { 0xFDC8, 0xFDEF },
135 { 0xFDFE, 0xFE0F }, { 0xFE1A, 0xFE1F }, { 0xFE27, 0xFE2F },
136 { 0xFE53, 0xFE53 }, { 0xFE67, 0xFE67 }, { 0xFE6C, 0xFE6F },
137 { 0xFE75, 0xFE75 }, { 0xFEFD, 0xFEFF }, { 0xFF00, 0xFF00 },
138 { 0xFFA0, 0xFFA0 }, { 0xFFBF, 0xFFC1 }, { 0xFFC8, 0xFFC9 },
139 { 0xFFD0, 0xFFD1 }, { 0xFFD8, 0xFFD9 }, { 0xFFDD, 0xFFDF },
140 { 0xFFE7, 0xFFE7 }, { 0xFFEF, 0xFFFB }, { 0xFFFE, 0xFFFF },
141 { 0x1000C, 0x1000C }, { 0x10027, 0x10027 }, { 0x1003B, 0x1003B },
142 { 0x1003E, 0x1003E }, { 0x1004E, 0x1004F }, { 0x1005E, 0x1007F },
143 { 0x100FB, 0x100FF }, { 0x10103, 0x10106 }, { 0x10134, 0x10136 },
144 { 0x1018B, 0x1018F }, { 0x1019C, 0x101CF }, { 0x101FE, 0x1027F },
145 { 0x1029D, 0x1029F }, { 0x102D1, 0x102FF }, { 0x1031F, 0x1031F },
146 { 0x10324, 0x1032F }, { 0x1034B, 0x1037F }, { 0x1039E, 0x1039E },
147 { 0x103C4, 0x103C7 }, { 0x103D6, 0x103FF }, { 0x1049E, 0x1049F },
148 { 0x104AA, 0x107FF }, { 0x10806, 0x10807 }, { 0x10809, 0x10809 },
149 { 0x10836, 0x10836 }, { 0x10839, 0x1083B }, { 0x1083D, 0x1083E },
150 { 0x10856, 0x10856 }, { 0x10860, 0x108FF }, { 0x1091C, 0x1091E },
151 { 0x1093A, 0x1093E }, { 0x10940, 0x1097F }, { 0x109B8, 0x109BD },
152 { 0x109C0, 0x109FF }, { 0x10A04, 0x10A04 }, { 0x10A07, 0x10A0B },
153 { 0x10A14, 0x10A14 }, { 0x10A18, 0x10A18 }, { 0x10A34, 0x10A37 },
154 { 0x10A3B, 0x10A3E }, { 0x10A48, 0x10A4F }, { 0x10A59, 0x10A5F },
155 { 0x10A80, 0x10AFF }, { 0x10B36, 0x10B38 }, { 0x10B56, 0x10B57 },
156 { 0x10B73, 0x10B77 }, { 0x10B80, 0x10BFF }, { 0x10C49, 0x10E5F },
157 { 0x10E7F, 0x10FFF }, { 0x1104E, 0x11051 }, { 0x11070, 0x1107F },
158 { 0x110BD, 0x110BD }, { 0x110C2, 0x110CF }, { 0x110E9, 0x110EF },
159 { 0x110FA, 0x110FF }, { 0x11135, 0x11135 }, { 0x11144, 0x1117F },
160 { 0x111C9, 0x111CF }, { 0x111DA, 0x1167F }, { 0x116B8, 0x116BF },
161 { 0x116CA, 0x11FFF }, { 0x1236F, 0x123FF }, { 0x12463, 0x1246F },
162 { 0x12474, 0x12FFF }, { 0x1342F, 0x167FF }, { 0x16A39, 0x16EFF },
163 { 0x16F45, 0x16F4F }, { 0x16F7F, 0x16F8E }, { 0x16FA0, 0x1AFFF },
164 { 0x1B002, 0x1CFFF }, { 0x1D0F6, 0x1D0FF }, { 0x1D127, 0x1D128 },
165 { 0x1D173, 0x1D17A }, { 0x1D1DE, 0x1D1FF }, { 0x1D246, 0x1D2FF },
166 { 0x1D357, 0x1D35F }, { 0x1D372, 0x1D3FF }, { 0x1D455, 0x1D455 },
167 { 0x1D49D, 0x1D49D }, { 0x1D4A0, 0x1D4A1 }, { 0x1D4A3, 0x1D4A4 },
168 { 0x1D4A7, 0x1D4A8 }, { 0x1D4AD, 0x1D4AD }, { 0x1D4BA, 0x1D4BA },
169 { 0x1D4BC, 0x1D4BC }, { 0x1D4C4, 0x1D4C4 }, { 0x1D506, 0x1D506 },
170 { 0x1D50B, 0x1D50C }, { 0x1D515, 0x1D515 }, { 0x1D51D, 0x1D51D },
171 { 0x1D53A, 0x1D53A }, { 0x1D53F, 0x1D53F }, { 0x1D545, 0x1D545 },
172 { 0x1D547, 0x1D549 }, { 0x1D551, 0x1D551 }, { 0x1D6A6, 0x1D6A7 },
173 { 0x1D7CC, 0x1D7CD }, { 0x1D800, 0x1EDFF }, { 0x1EE04, 0x1EE04 },
174 { 0x1EE20, 0x1EE20 }, { 0x1EE23, 0x1EE23 }, { 0x1EE25, 0x1EE26 },
175 { 0x1EE28, 0x1EE28 }, { 0x1EE33, 0x1EE33 }, { 0x1EE38, 0x1EE38 },
176 { 0x1EE3A, 0x1EE3A }, { 0x1EE3C, 0x1EE41 }, { 0x1EE43, 0x1EE46 },
177 { 0x1EE48, 0x1EE48 }, { 0x1EE4A, 0x1EE4A }, { 0x1EE4C, 0x1EE4C },
178 { 0x1EE50, 0x1EE50 }, { 0x1EE53, 0x1EE53 }, { 0x1EE55, 0x1EE56 },
179 { 0x1EE58, 0x1EE58 }, { 0x1EE5A, 0x1EE5A }, { 0x1EE5C, 0x1EE5C },
180 { 0x1EE5E, 0x1EE5E }, { 0x1EE60, 0x1EE60 }, { 0x1EE63, 0x1EE63 },
181 { 0x1EE65, 0x1EE66 }, { 0x1EE6B, 0x1EE6B }, { 0x1EE73, 0x1EE73 },
182 { 0x1EE78, 0x1EE78 }, { 0x1EE7D, 0x1EE7D }, { 0x1EE7F, 0x1EE7F },
183 { 0x1EE8A, 0x1EE8A }, { 0x1EE9C, 0x1EEA0 }, { 0x1EEA4, 0x1EEA4 },
184 { 0x1EEAA, 0x1EEAA }, { 0x1EEBC, 0x1EEEF }, { 0x1EEF2, 0x1EFFF },
185 { 0x1F02C, 0x1F02F }, { 0x1F094, 0x1F09F }, { 0x1F0AF, 0x1F0B0 },
186 { 0x1F0BF, 0x1F0C0 }, { 0x1F0D0, 0x1F0D0 }, { 0x1F0E0, 0x1F0FF },
187 { 0x1F10B, 0x1F10F }, { 0x1F12F, 0x1F12F }, { 0x1F16C, 0x1F16F },
188 { 0x1F19B, 0x1F1E5 }, { 0x1F203, 0x1F20F }, { 0x1F23B, 0x1F23F },
189 { 0x1F249, 0x1F24F }, { 0x1F252, 0x1F2FF }, { 0x1F321, 0x1F32F },
190 { 0x1F336, 0x1F336 }, { 0x1F37D, 0x1F37F }, { 0x1F394, 0x1F39F },
191 { 0x1F3C5, 0x1F3C5 }, { 0x1F3CB, 0x1F3DF }, { 0x1F3F1, 0x1F3FF },
192 { 0x1F43F, 0x1F43F }, { 0x1F441, 0x1F441 }, { 0x1F4F8, 0x1F4F8 },
193 { 0x1F4FD, 0x1F4FF }, { 0x1F53E, 0x1F53F }, { 0x1F544, 0x1F54F },
194 { 0x1F568, 0x1F5FA }, { 0x1F641, 0x1F644 }, { 0x1F650, 0x1F67F },
195 { 0x1F6C6, 0x1F6FF }, { 0x1F774, 0x1FFFF }, { 0x2A6D7, 0x2A6FF },
196 { 0x2B735, 0x2B73F }, { 0x2B81E, 0x2F7FF }, { 0x2FA1E, 0xF0000 },
197 { 0xFFFFE, 0xFFFFF }, { 0x10FFFE, 0x10FFFF }, { 0x110000, 0xFFFFFFFF }
201 { 0x0000, 0x007F,
"Basic Latin" },
202 { 0x0080, 0x00FF,
"Latin-1 Supplement" },
203 { 0x0100, 0x017F,
"Latin Extended-A" },
204 { 0x0180, 0x024F,
"Latin Extended-B" },
205 { 0x0250, 0x02AF,
"IPA Extensions" },
206 { 0x02B0, 0x02FF,
"Spacing Modifier Letters" },
207 { 0x0300, 0x036F,
"Combining Diacritical Marks" },
208 { 0x0370, 0x03FF,
"Greek and Coptic" },
209 { 0x0400, 0x04FF,
"Cyrillic" },
210 { 0x0500, 0x052F,
"Cyrillic Supplement" },
211 { 0x0530, 0x058F,
"Armenian" },
212 { 0x0590, 0x05FF,
"Hebrew" },
213 { 0x0600, 0x06FF,
"Arabic" },
214 { 0x0700, 0x074F,
"Syriac" },
215 { 0x0750, 0x077F,
"Arabic Supplement" },
216 { 0x0780, 0x07BF,
"Thaana" },
217 { 0x07C0, 0x07FF,
"NKo" },
218 { 0x0800, 0x083F,
"Samaritan" },
219 { 0x0840, 0x085F,
"Mandaic" },
220 { 0x0860, 0x086F,
"Syriac Supplement" },
221 { 0x08A0, 0x08FF,
"Arabic Extended-A" },
222 { 0x0900, 0x097F,
"Devanagari" },
223 { 0x0980, 0x09FF,
"Bengali" },
224 { 0x0A00, 0x0A7F,
"Gurmukhi" },
225 { 0x0A80, 0x0AFF,
"Gujarati" },
226 { 0x0B00, 0x0B7F,
"Oriya" },
227 { 0x0B80, 0x0BFF,
"Tamil" },
228 { 0x0C00, 0x0C7F,
"Telugu" },
229 { 0x0C80, 0x0CFF,
"Kannada" },
230 { 0x0D00, 0x0D7F,
"Malayalam" },
231 { 0x0D80, 0x0DFF,
"Sinhala" },
232 { 0x0E00, 0x0E7F,
"Thai" },
233 { 0x0E80, 0x0EFF,
"Lao" },
234 { 0x0F00, 0x0FFF,
"Tibetan" },
235 { 0x1000, 0x109F,
"Myanmar" },
236 { 0x10A0, 0x10FF,
"Georgian" },
237 { 0x1100, 0x11FF,
"Hangul Jamo" },
238 { 0x1200, 0x137F,
"Ethiopic" },
239 { 0x1380, 0x139F,
"Ethiopic Supplement" },
240 { 0x13A0, 0x13FF,
"Cherokee" },
241 { 0x1400, 0x167F,
"Unified Canadian Aboriginal Syllabics" },
242 { 0x1680, 0x169F,
"Ogham" },
243 { 0x16A0, 0x16FF,
"Runic" },
244 { 0x1700, 0x171F,
"Tagalog" },
245 { 0x1720, 0x173F,
"Hanunoo" },
246 { 0x1740, 0x175F,
"Buhid" },
247 { 0x1760, 0x177F,
"Tagbanwa" },
248 { 0x1780, 0x17FF,
"Khmer" },
249 { 0x1800, 0x18AF,
"Mongolian" },
250 { 0x18B0, 0x18FF,
"Unified Canadian Aboriginal Syllabics Extended" },
251 { 0x1900, 0x194F,
"Limbu" },
252 { 0x1950, 0x197F,
"Tai Le" },
253 { 0x1980, 0x19DF,
"New Tai Lue" },
254 { 0x19E0, 0x19FF,
"Khmer Symbols" },
255 { 0x1A00, 0x1A1F,
"Buginese" },
256 { 0x1A20, 0x1AAF,
"Tai Tham" },
257 { 0x1AB0, 0x1AFF,
"Combining Diacritical Marks Extended" },
258 { 0x1B00, 0x1B7F,
"Balinese" },
259 { 0x1B80, 0x1BBF,
"Sundanese" },
260 { 0x1BC0, 0x1BFF,
"Batak" },
261 { 0x1C00, 0x1C4F,
"Lepcha" },
262 { 0x1C50, 0x1C7F,
"Ol Chiki" },
263 { 0x1C80, 0x1C8F,
"Cyrillic Extended-C" },
264 { 0x1CC0, 0x1CCF,
"Sundanese Supplement" },
265 { 0x1CD0, 0x1CFF,
"Vedic Extensions" },
266 { 0x1D00, 0x1D7F,
"Phonetic Extensions" },
267 { 0x1D80, 0x1DBF,
"Phonetic Extensions Supplement" },
268 { 0x1DC0, 0x1DFF,
"Combining Diacritical Marks Supplement" },
269 { 0x1E00, 0x1EFF,
"Latin Extended Additional" },
270 { 0x1F00, 0x1FFF,
"Greek Extended" },
271 { 0x2000, 0x206F,
"General Punctuation" },
272 { 0x2070, 0x209F,
"Superscripts and Subscripts" },
273 { 0x20A0, 0x20CF,
"Currency Symbols" },
274 { 0x20D0, 0x20FF,
"Combining Diacritical Marks for Symbols" },
275 { 0x2100, 0x214F,
"Letterlike Symbols" },
276 { 0x2150, 0x218F,
"Number Forms" },
277 { 0x2190, 0x21FF,
"Arrows" },
278 { 0x2200, 0x22FF,
"Mathematical Operators" },
279 { 0x2300, 0x23FF,
"Miscellaneous Technical" },
280 { 0x2400, 0x243F,
"Control Pictures" },
281 { 0x2440, 0x245F,
"Optical Character Recognition" },
282 { 0x2460, 0x24FF,
"Enclosed Alphanumerics" },
283 { 0x2500, 0x257F,
"Box Drawing" },
284 { 0x2580, 0x259F,
"Block Elements" },
285 { 0x25A0, 0x25FF,
"Geometric Shapes" },
286 { 0x2600, 0x26FF,
"Miscellaneous Symbols" },
287 { 0x2700, 0x27BF,
"Dingbats" },
288 { 0x27C0, 0x27EF,
"Miscellaneous Mathematical Symbols-A" },
289 { 0x27F0, 0x27FF,
"Supplemental Arrows-A" },
290 { 0x2800, 0x28FF,
"Braille Patterns" },
291 { 0x2900, 0x297F,
"Supplemental Arrows-B" },
292 { 0x2980, 0x29FF,
"Miscellaneous Mathematical Symbols-B" },
293 { 0x2A00, 0x2AFF,
"Supplemental Mathematical Operators" },
294 { 0x2B00, 0x2BFF,
"Miscellaneous Symbols and Arrows" },
295 { 0x2C00, 0x2C5F,
"Glagolitic" },
296 { 0x2C60, 0x2C7F,
"Latin Extended-C" },
297 { 0x2C80, 0x2CFF,
"Coptic" },
298 { 0x2D00, 0x2D2F,
"Georgian Supplement" },
299 { 0x2D30, 0x2D7F,
"Tifinagh" },
300 { 0x2D80, 0x2DDF,
"Ethiopic Extended" },
301 { 0x2DE0, 0x2DFF,
"Cyrillic Extended-A" },
302 { 0x2E00, 0x2E7F,
"Supplemental Punctuation" },
303 { 0x2E80, 0x2EFF,
"CJK Radicals Supplement" },
304 { 0x2F00, 0x2FDF,
"Kangxi Radicals" },
305 { 0x2FF0, 0x2FFF,
"Ideographic Description Characters" },
306 { 0x3000, 0x303F,
"CJK Symbols and Punctuation" },
307 { 0x3040, 0x309F,
"Hiragana" },
308 { 0x30A0, 0x30FF,
"Katakana" },
309 { 0x3100, 0x312F,
"Bopomofo" },
310 { 0x3130, 0x318F,
"Hangul Compatibility Jamo" },
311 { 0x3190, 0x319F,
"Kanbun" },
312 { 0x31A0, 0x31BF,
"Bopomofo Extended" },
313 { 0x31C0, 0x31EF,
"CJK Strokes" },
314 { 0x31F0, 0x31FF,
"Katakana Phonetic Extensions" },
315 { 0x3200, 0x32FF,
"Enclosed CJK Letters and Months" },
316 { 0x3300, 0x33FF,
"CJK Compatibility" },
317 { 0x3400, 0x4DBF,
"CJK Unified Ideographs Extension A" },
318 { 0x4DC0, 0x4DFF,
"Yijing Hexagram Symbols" },
319 { 0x4E00, 0x9FFF,
"CJK Unified Ideographs" },
320 { 0xA000, 0xA48F,
"Yi Syllables" },
321 { 0xA490, 0xA4CF,
"Yi Radicals" },
322 { 0xA4D0, 0xA4FF,
"Lisu" },
323 { 0xA500, 0xA63F,
"Vai" },
324 { 0xA640, 0xA69F,
"Cyrillic Extended-B" },
325 { 0xA6A0, 0xA6FF,
"Bamum" },
326 { 0xA700, 0xA71F,
"Modifier Tone Letters" },
327 { 0xA720, 0xA7FF,
"Latin Extended-D" },
328 { 0xA800, 0xA82F,
"Syloti Nagri" },
329 { 0xA830, 0xA83F,
"Common Indic Number Forms" },
330 { 0xA840, 0xA87F,
"Phags-pa" },
331 { 0xA880, 0xA8DF,
"Saurashtra" },
332 { 0xA8E0, 0xA8FF,
"Devanagari Extended" },
333 { 0xA900, 0xA92F,
"Kayah Li" },
334 { 0xA930, 0xA95F,
"Rejang" },
335 { 0xA960, 0xA97F,
"Hangul Jamo Extended-A" },
336 { 0xA980, 0xA9DF,
"Javanese" },
337 { 0xA9E0, 0xA9FF,
"Myanmar Extended-B" },
338 { 0xAA00, 0xAA5F,
"Cham" },
339 { 0xAA60, 0xAA7F,
"Myanmar Extended-A" },
340 { 0xAA80, 0xAADF,
"Tai Viet" },
341 { 0xAAE0, 0xAAFF,
"Meetei Mayek Extensions" },
342 { 0xAB00, 0xAB2F,
"Ethiopic Extended-A" },
343 { 0xAB30, 0xAB6F,
"Latin Extended-E" },
344 { 0xAB70, 0xABBF,
"Cherokee Supplement" },
345 { 0xABC0, 0xABFF,
"Meetei Mayek" },
346 { 0xAC00, 0xD7AF,
"Hangul Syllables" },
347 { 0xD7B0, 0xD7FF,
"Hangul Jamo Extended-B" },
348 { 0xD800, 0xDB7F,
"High Surrogates" },
349 { 0xDB80, 0xDBFF,
"High Private Use Surrogates" },
350 { 0xDC00, 0xDFFF,
"Low Surrogates" },
351 { 0xE000, 0xF8FF,
"Private Use Area" },
352 { 0xF900, 0xFAFF,
"CJK Compatibility Ideographs" },
353 { 0xFB00, 0xFB4F,
"Alphabetic Presentation Forms" },
354 { 0xFB50, 0xFDFF,
"Arabic Presentation Forms-A" },
355 { 0xFE00, 0xFE0F,
"Variation Selectors" },
356 { 0xFE10, 0xFE1F,
"Vertical Forms" },
357 { 0xFE20, 0xFE2F,
"Combining Half Marks" },
358 { 0xFE30, 0xFE4F,
"CJK Compatibility Forms" },
359 { 0xFE50, 0xFE6F,
"Small Form Variants" },
360 { 0xFE70, 0xFEFF,
"Arabic Presentation Forms-B" },
361 { 0xFF00, 0xFFEF,
"Halfwidth and Fullwidth Forms" },
362 { 0xFFF0, 0xFFFF,
"Specials" },
363 { 0x10000, 0x1007F,
"Linear B Syllabary" },
364 { 0x10080, 0x100FF,
"Linear B Ideograms" },
365 { 0x10100, 0x1013F,
"Aegean Numbers" },
366 { 0x10140, 0x1018F,
"Ancient Greek Numbers" },
367 { 0x10190, 0x101CF,
"Ancient Symbols" },
368 { 0x101D0, 0x101FF,
"Phaistos Disc" },
369 { 0x10280, 0x1029F,
"Lycian" },
370 { 0x102A0, 0x102DF,
"Carian" },
371 { 0x102E0, 0x102FF,
"Coptic Epact Numbers" },
372 { 0x10300, 0x1032F,
"Old Italic" },
373 { 0x10330, 0x1034F,
"Gothic" },
374 { 0x10350, 0x1037F,
"Old Permic" },
375 { 0x10380, 0x1039F,
"Ugaritic" },
376 { 0x103A0, 0x103DF,
"Old Persian" },
377 { 0x10400, 0x1044F,
"Deseret" },
378 { 0x10450, 0x1047F,
"Shavian" },
379 { 0x10480, 0x104AF,
"Osmanya" },
380 { 0x104B0, 0x104FF,
"Osage" },
381 { 0x10500, 0x1052F,
"Elbasan" },
382 { 0x10530, 0x1056F,
"Caucasian Albanian" },
383 { 0x10600, 0x1077F,
"Linear A" },
384 { 0x10800, 0x1083F,
"Cypriot Syllabary" },
385 { 0x10840, 0x1085F,
"Imperial Aramaic" },
386 { 0x10860, 0x1087F,
"Palmyrene" },
387 { 0x10880, 0x108AF,
"Nabataean" },
388 { 0x108E0, 0x108FF,
"Hatran" },
389 { 0x10900, 0x1091F,
"Phoenician" },
390 { 0x10920, 0x1093F,
"Lydian" },
391 { 0x10980, 0x1099F,
"Meroitic Hieroglyphs" },
392 { 0x109A0, 0x109FF,
"Meroitic Cursive" },
393 { 0x10A00, 0x10A5F,
"Kharoshthi" },
394 { 0x10A60, 0x10A7F,
"Old South Arabian" },
395 { 0x10A80, 0x10A9F,
"Old North Arabian" },
396 { 0x10AC0, 0x10AFF,
"Manichaean" },
397 { 0x10B00, 0x10B3F,
"Avestan" },
398 { 0x10B40, 0x10B5F,
"Inscriptional Parthian" },
399 { 0x10B60, 0x10B7F,
"Inscriptional Pahlavi" },
400 { 0x10B80, 0x10BAF,
"Psalter Pahlavi" },
401 { 0x10C00, 0x10C4F,
"Old Turkic" },
402 { 0x10C80, 0x10CFF,
"Old Hungarian" },
403 { 0x10E60, 0x10E7F,
"Rumi Numeral Symbols" },
404 { 0x11000, 0x1107F,
"Brahmi" },
405 { 0x11080, 0x110CF,
"Kaithi" },
406 { 0x110D0, 0x110FF,
"Sora Sompeng" },
407 { 0x11100, 0x1114F,
"Chakma" },
408 { 0x11150, 0x1117F,
"Mahajani" },
409 { 0x11180, 0x111DF,
"Sharada" },
410 { 0x111E0, 0x111FF,
"Sinhala Archaic Numbers" },
411 { 0x11200, 0x1124F,
"Khojki" },
412 { 0x11280, 0x112AF,
"Multani" },
413 { 0x112B0, 0x112FF,
"Khudawadi" },
414 { 0x11300, 0x1137F,
"Grantha" },
415 { 0x11400, 0x1147F,
"Newa" },
416 { 0x11480, 0x114DF,
"Tirhuta" },
417 { 0x11580, 0x115FF,
"Siddham" },
418 { 0x11600, 0x1165F,
"Modi" },
419 { 0x11660, 0x1167F,
"Mongolian Supplement" },
420 { 0x11680, 0x116CF,
"Takri" },
421 { 0x11700, 0x1173F,
"Ahom" },
422 { 0x118A0, 0x118FF,
"Warang Citi" },
423 { 0x11A00, 0x11A4F,
"Zanabazar Square" },
424 { 0x11A50, 0x11AAF,
"Soyombo" },
425 { 0x11AC0, 0x11AFF,
"Pau Cin Hau" },
426 { 0x11C00, 0x11C6F,
"Bhaiksuki" },
427 { 0x11C70, 0x11CBF,
"Marchen" },
428 { 0x11D00, 0x11D5F,
"Masaram Gondi" },
429 { 0x12000, 0x123FF,
"Cuneiform" },
430 { 0x12400, 0x1247F,
"Cuneiform Numbers and Punctuation" },
431 { 0x12480, 0x1254F,
"Early Dynastic Cuneiform" },
432 { 0x13000, 0x1342F,
"Egyptian Hieroglyphs" },
433 { 0x14400, 0x1467F,
"Anatolian Hieroglyphs" },
434 { 0x16800, 0x16A3F,
"Bamum Supplement" },
435 { 0x16A40, 0x16A6F,
"Mro" },
436 { 0x16AD0, 0x16AFF,
"Bassa Vah" },
437 { 0x16B00, 0x16B8F,
"Pahawh Hmong" },
438 { 0x16F00, 0x16F9F,
"Miao" },
439 { 0x16FE0, 0x16FFF,
"Ideographic Symbols and Punctuation" },
440 { 0x17000, 0x187FF,
"Tangut" },
441 { 0x18800, 0x18AFF,
"Tangut Components" },
442 { 0x1B000, 0x1B0FF,
"Kana Supplement" },
443 { 0x1B100, 0x1B12F,
"Kana Extended-A" },
444 { 0x1B170, 0x1B2FF,
"Nushu" },
445 { 0x1BC00, 0x1BC9F,
"Duployan" },
446 { 0x1BCA0, 0x1BCAF,
"Shorthand Format Controls" },
447 { 0x1D000, 0x1D0FF,
"Byzantine Musical Symbols" },
448 { 0x1D100, 0x1D1FF,
"Musical Symbols" },
449 { 0x1D200, 0x1D24F,
"Ancient Greek Musical Notation" },
450 { 0x1D300, 0x1D35F,
"Tai Xuan Jing Symbols" },
451 { 0x1D360, 0x1D37F,
"Counting Rod Numerals" },
452 { 0x1D400, 0x1D7FF,
"Mathematical Alphanumeric Symbols" },
453 { 0x1D800, 0x1DAAF,
"Sutton SignWriting" },
454 { 0x1E000, 0x1E02F,
"Glagolitic Supplement" },
455 { 0x1E800, 0x1E8DF,
"Mende Kikakui" },
456 { 0x1E900, 0x1E95F,
"Adlam" },
457 { 0x1EE00, 0x1EEFF,
"Arabic Mathematical Alphabetic Symbols" },
458 { 0x1F000, 0x1F02F,
"Mahjong Tiles" },
459 { 0x1F030, 0x1F09F,
"Domino Tiles" },
460 { 0x1F0A0, 0x1F0FF,
"Playing Cards" },
461 { 0x1F100, 0x1F1FF,
"Enclosed Alphanumeric Supplement" },
462 { 0x1F200, 0x1F2FF,
"Enclosed Ideographic Supplement" },
463 { 0x1F300, 0x1F5FF,
"Miscellaneous Symbols and Pictographs" },
464 { 0x1F600, 0x1F64F,
"Emoticons" },
465 { 0x1F650, 0x1F67F,
"Ornamental Dingbats" },
466 { 0x1F680, 0x1F6FF,
"Transport and Map Symbols" },
467 { 0x1F700, 0x1F77F,
"Alchemical Symbols" },
468 { 0x1F780, 0x1F7FF,
"Geometric Shapes Extended" },
469 { 0x1F800, 0x1F8FF,
"Supplemental Arrows-C" },
470 { 0x1F900, 0x1F9FF,
"Supplemental Symbols and Pictographs" },
471 { 0x20000, 0x2A6DF,
"CJK Unified Ideographs Extension B" },
472 { 0x2A700, 0x2B73F,
"CJK Unified Ideographs Extension C" },
473 { 0x2B740, 0x2B81F,
"CJK Unified Ideographs Extension D" },
474 { 0x2B820, 0x2CEAF,
"CJK Unified Ideographs Extension E" },
475 { 0x2CEB0, 0x2EBEF,
"CJK Unified Ideographs Extension F" },
476 { 0x2F800, 0x2FA1F,
"CJK Compatibility Ideographs Supplement" },
477 { 0xE0000, 0xE007F,
"Tags" },
478 { 0xE0100, 0xE01EF,
"Variation Selectors Supplement" },
479 { 0xF0000, 0xFFFFF,
"Supplementary Private Use Area-A" },
480 { 0x100000, 0x10FFFF,
"Supplementary Private Use Area-B" },
481 { 0x110000, 0xFFFFFFFF,
"No_Block" }
501 }
else if (ptrlen > 1 && (ptr[0] & 0xe0) == 0xc0 && (ptr[1] & 0xc0) == 0x80) {
502 RzRune rune = (ptr[0] & 0x1f) << 6 | (ptr[1] & 0x3f);
506 return rune < 0x80 ? 0 : 2;
507 }
else if (ptrlen > 2 && (ptr[0] & 0xf0) == 0xe0 && (ptr[1] & 0xc0) == 0x80 && (ptr[2] & 0xc0) == 0x80) {
508 RzRune rune = (ptr[0] & 0xf) << 12 | (ptr[1] & 0x3f) << 6 | (ptr[2] & 0x3f);
512 return rune < 0x800 ? 0 : 3;
513 }
else if (ptrlen > 3 && (ptr[0] & 0xf8) == 0xf0 && (ptr[1] & 0xc0) == 0x80 && (ptr[2] & 0xc0) == 0x80 && (ptr[3] & 0xc0) == 0x80) {
514 RzRune rune = (ptr[0] & 7) << 18 | (ptr[1] & 0x3f) << 12 | (ptr[2] & 0x3f) << 6 | (ptr[3] & 0x3f);
518 return rune < 0x10000 ? 0 : 4;
525 if (ptrlen > 1 && ptr[0] == 0xc0 && ptr[1] == 0x80) {
539 }
else if (ch < 0x800) {
540 ptr[0] = 0xc0 | (ch >> 6);
541 ptr[1] = 0x80 | (ch & 0x3f);
543 }
else if (ch < 0x10000) {
544 ptr[0] = 0xe0 | (ch >> 12);
545 ptr[1] = 0x80 | ((ch >> 6) & 0x3f);
546 ptr[2] = 0x80 | (ch & 0x3f);
548 }
else if (ch < 0x200000) {
549 ptr[0] = 0xf0 | (ch >> 18);
550 ptr[1] = 0x80 | ((ch >> 12) & 0x3f);
551 ptr[2] = 0x80 | ((ch >> 6) & 0x3f);
552 ptr[3] = 0x80 | (ch & 0x3f);
565 for (
size_t i = 0;
i <
sizeof(
str) - 1 &&
str[
i] &&
pos < dst_length - 1;
i++) {
575 const int utf8_size[] = {
576 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
577 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
578 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
579 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
580 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
581 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
582 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
583 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
585 return (ptr[0] & 0x80) ? utf8_size[ptr[0] ^ 0x80] : 1;
591 for (
int i = 0;
str[
i];
i++) {
592 if ((
str[
i] & 0xc0) != 0x80) {
614 return !(
c <= 0x1F || (
c >= 0x7F &&
c <= 0x9F));
623 int mid = (low +
hi) >> 1;
639 RZ_API char *rz_utf16_to_utf8_l(
const wchar_t *wc,
int len) {
641 if (!wc || !
len ||
len < -1) {
647 if ((csize = WideCharToMultiByte(CP_UTF8, 0, wc,
len,
NULL, 0,
NULL,
NULL))) {
649 if ((rutf8 =
malloc(csize))) {
650 WideCharToMultiByte(CP_UTF8, 0, wc,
len, rutf8, csize,
NULL,
NULL);
652 rutf8[csize - 1] =
'\0';
659 RZ_API wchar_t *rz_utf8_to_utf16_l(
const char *cstring,
int len) {
661 if (!cstring || !
len ||
len < -1) {
664 wchar_t *rutf16 =
NULL;
667 if ((wcsize = MultiByteToWideChar(CP_UTF8, 0, cstring,
len,
NULL, 0))) {
669 if ((rutf16 = (
wchar_t *)
calloc(wcsize,
sizeof(
wchar_t)))) {
670 MultiByteToWideChar(CP_UTF8, 0, cstring,
len, rutf16, wcsize);
672 rutf16[wcsize - 1] =
L'\0';
679 RZ_API char *rz_utf8_to_acp_l(
const char *
str,
int len) {
685 int wcsize = 0, csize = 0;
686 if ((wcsize = MultiByteToWideChar(CP_UTF8, 0,
str,
len,
NULL, 0))) {
687 wchar_t *rutf16 =
NULL;
689 if ((rutf16 = (
wchar_t *)
calloc(wcsize,
sizeof(
wchar_t)))) {
690 MultiByteToWideChar(CP_UTF8, 0,
str,
len, rutf16, wcsize);
692 rutf16[wcsize - 1] =
L'\0';
694 if ((csize = WideCharToMultiByte(CP_ACP, 0, rutf16, wcsize,
NULL, 0,
NULL,
NULL))) {
696 if ((acp =
malloc(csize))) {
697 WideCharToMultiByte(CP_ACP, 0, rutf16, wcsize, acp, csize,
NULL,
NULL);
699 acp[csize - 1] =
'\0';
709 RZ_API char *rz_acp_to_utf8_l(
const char *
str,
int len) {
715 if ((wcsize = MultiByteToWideChar(CP_ACP, 0,
str,
len,
NULL, 0))) {
716 wchar_t *rutf16 =
NULL;
718 if ((rutf16 = (
wchar_t *)
calloc(wcsize,
sizeof(
wchar_t)))) {
719 MultiByteToWideChar(CP_ACP, 0,
str,
len, rutf16, wcsize);
721 rutf16[wcsize - 1] =
L'\0';
723 char *ret = rz_utf16_to_utf8_l(rutf16, wcsize);
735 int low = 0,
hi = last - 1, mid = 0;
738 mid = (low +
hi) >> 1;
759 len = strlen((
const char *)
str);
766 int *freq_list_ptr =
NULL;
773 freq_list_ptr = *freq_list;
775 int *list_ptr =
list;
779 while (str_ptr < str_end) {
788 if (!block_freq[block_idx]) {
789 *list_ptr = block_idx;
792 block_freq[block_idx]++;
797 for (list_ptr =
list; *list_ptr != -1; list_ptr++) {
798 *freq_list_ptr = block_freq[*list_ptr];
803 for (list_ptr =
list; *list_ptr != -1; list_ptr++) {
804 block_freq[*list_ptr] = 0;
811 if (ptr[0] == 0xff && ptr[1] == 0xfe && !ptr[2] && !ptr[3]) {
814 if (!ptr[0] && !ptr[1] && ptr[2] == 0xfe && ptr[3] == 0xff) {
819 if (ptr[0] == 0xef && ptr[1] == 0xbb && ptr[2] == 0xbf) {
824 if (ptr[0] == 0xff && ptr[1] == 0xfe) {
827 if (ptr[0] == 0xfe && ptr[1] == 0xff) {
RZ_API void Ht_() free(HtName_(Ht) *ht)
static void list(RzEgg *egg)
void * malloc(size_t size)
void * calloc(size_t number, size_t size)
RZ_API int rz_utf8_encode_str(const RzRune *str, ut8 *dst, const int dst_length)
RZ_API int * rz_utf_block_list(const ut8 *str, int len, int **freq_list)
const struct @335 nonprintable_ranges[]
RZ_API int rz_utf_block_idx(RzRune ch)
RZ_API int rz_utf8_size(const ut8 *ptr)
RZ_API const char * rz_utf_block_name(int idx)
#define UTF_NONPRINTABLE_RANGES_COUNT
RZ_API RzStrEnc rz_utf_bom_encoding(const ut8 *ptr, int ptrlen)
const RUtfBlock utf_blocks[]
RZ_API int rz_utf8_decode(const ut8 *ptr, int ptrlen, RzRune *ch)
RZ_API bool rz_rune_is_printable(const RzRune c)
Returns true when the RzRune is a printable symbol.
RZ_API int rz_mutf8_decode(const ut8 *ptr, int ptrlen, RzRune *ch)
RZ_API int rz_utf8_encode(ut8 *ptr, const RzRune ch)
RZ_API int rz_utf8_strlen(const ut8 *str)