55 #define U8_COUNT_TRAIL_BYTES(leadByte) \
56 (U8_IS_LEAD(leadByte) ? \
57 ((uint8_t)(leadByte)>=0xe0)+((uint8_t)(leadByte)>=0xf0)+1 : 0)
70 #define U8_COUNT_TRAIL_BYTES_UNSAFE(leadByte) \
71 (((uint8_t)(leadByte)>=0xc2)+((uint8_t)(leadByte)>=0xe0)+((uint8_t)(leadByte)>=0xf0))
80 #define U8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1)
90 #define U8_LEAD3_T1_BITS "\x20\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x10\x30\x30"
97 #define U8_IS_VALID_LEAD3_AND_T1(lead, t1) (U8_LEAD3_T1_BITS[(lead)&0xf]&(1<<((uint8_t)(t1)>>5)))
107 #define U8_LEAD4_T1_BITS "\x00\x00\x00\x00\x00\x00\x00\x00\x1E\x0F\x0F\x0F\x00\x00\x00\x00"
114 #define U8_IS_VALID_LEAD4_AND_T1(lead, t1) (U8_LEAD4_T1_BITS[(uint8_t)(t1)>>4]&(1<<((lead)&7)))
172 #define U8_IS_SINGLE(c) (((c)&0x80)==0)
180 #define U8_IS_LEAD(c) ((uint8_t)((c)-0xc2)<=0x32)
189 #define U8_IS_TRAIL(c) ((int8_t)(c)<-0x40)
198 #define U8_LENGTH(c) \
199 ((uint32_t)(c)<=0x7f ? 1 : \
200 ((uint32_t)(c)<=0x7ff ? 2 : \
201 ((uint32_t)(c)<=0xd7ff ? 3 : \
202 ((uint32_t)(c)<=0xdfff || (uint32_t)(c)>0x10ffff ? 0 : \
203 ((uint32_t)(c)<=0xffff ? 3 : 4)\
214 #define U8_MAX_LENGTH 4
232 #define U8_GET_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
233 int32_t _u8_get_unsafe_index=(int32_t)(i); \
234 U8_SET_CP_START_UNSAFE(s, _u8_get_unsafe_index); \
235 U8_NEXT_UNSAFE(s, _u8_get_unsafe_index, c); \
236 } UPRV_BLOCK_MACRO_END
259 #define U8_GET(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
260 int32_t _u8_get_index=(i); \
261 U8_SET_CP_START(s, start, _u8_get_index); \
262 U8_NEXT(s, _u8_get_index, length, c); \
263 } UPRV_BLOCK_MACRO_END
290 #define U8_GET_OR_FFFD(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
291 int32_t _u8_get_index=(i); \
292 U8_SET_CP_START(s, start, _u8_get_index); \
293 U8_NEXT_OR_FFFD(s, _u8_get_index, length, c); \
294 } UPRV_BLOCK_MACRO_END
315 #define U8_NEXT_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
316 (c)=(uint8_t)(s)[(i)++]; \
317 if(!U8_IS_SINGLE(c)) { \
319 (c)=(((c)&0x1f)<<6)|((s)[(i)++]&0x3f); \
320 } else if((c)<0xf0) { \
322 (c)=(UChar)(((c)<<12)|(((s)[i]&0x3f)<<6)|((s)[(i)+1]&0x3f)); \
325 (c)=(((c)&7)<<18)|(((s)[i]&0x3f)<<12)|(((s)[(i)+1]&0x3f)<<6)|((s)[(i)+2]&0x3f); \
329 } UPRV_BLOCK_MACRO_END
351 #define U8_NEXT(s, i, length, c) U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, U_SENTINEL)
377 #define U8_NEXT_OR_FFFD(s, i, length, c) U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, 0xfffd)
380 #define U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, sub) UPRV_BLOCK_MACRO_BEGIN { \
381 (c)=(uint8_t)(s)[(i)++]; \
382 if(!U8_IS_SINGLE(c)) { \
384 if((i)!=(length) && \
388 U8_LEAD3_T1_BITS[(c)&=0xf]&(1<<((__t=(s)[i])>>5)) && \
392 U8_LEAD4_T1_BITS[(__t=(s)[i])>>4]&(1<<(c)) && \
393 ((c)=((c)<<6)|(__t&0x3f), ++(i)!=(length)) && \
394 (__t=(s)[i]-0x80)<=0x3f) && \
396 ((c)=((c)<<6)|__t, ++(i)!=(length)) \
398 (c)>=0xc2 && ((c)&=0x1f, 1)) && \
400 (__t=(s)[i]-0x80)<=0x3f && \
401 ((c)=((c)<<6)|__t, ++(i), 1)) { \
406 } UPRV_BLOCK_MACRO_END
421 #define U8_APPEND_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
424 (s)[(i)++]=(uint8_t)__uc; \
427 (s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \
430 (s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \
432 (s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \
433 (s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \
435 (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
437 (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
439 } UPRV_BLOCK_MACRO_END
458 #define U8_APPEND(s, i, capacity, c, isError) UPRV_BLOCK_MACRO_BEGIN { \
461 (s)[(i)++]=(uint8_t)__uc; \
462 } else if(__uc<=0x7ff && (i)+1<(capacity)) { \
463 (s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \
464 (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
465 } else if((__uc<=0xd7ff || (0xe000<=__uc && __uc<=0xffff)) && (i)+2<(capacity)) { \
466 (s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \
467 (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
468 (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
469 } else if(0xffff<__uc && __uc<=0x10ffff && (i)+3<(capacity)) { \
470 (s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \
471 (s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \
472 (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
473 (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
477 } UPRV_BLOCK_MACRO_END
489 #define U8_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
490 (i)+=1+U8_COUNT_TRAIL_BYTES_UNSAFE((s)[i]); \
491 } UPRV_BLOCK_MACRO_END
506 #define U8_FWD_1(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \
507 uint8_t __b=(s)[(i)++]; \
508 if(U8_IS_LEAD(__b) && (i)!=(length)) { \
509 uint8_t __t1=(s)[i]; \
510 if((0xe0<=__b && __b<0xf0)) { \
511 if(U8_IS_VALID_LEAD3_AND_T1(__b, __t1) && \
512 ++(i)!=(length) && U8_IS_TRAIL((s)[i])) { \
515 } else if(__b<0xe0) { \
516 if(U8_IS_TRAIL(__t1)) { \
520 if(U8_IS_VALID_LEAD4_AND_T1(__b, __t1) && \
521 ++(i)!=(length) && U8_IS_TRAIL((s)[i]) && \
522 ++(i)!=(length) && U8_IS_TRAIL((s)[i])) { \
527 } UPRV_BLOCK_MACRO_END
541 #define U8_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
544 U8_FWD_1_UNSAFE(s, i); \
547 } UPRV_BLOCK_MACRO_END
564 #define U8_FWD_N(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \
566 while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \
567 U8_FWD_1(s, i, length); \
570 } UPRV_BLOCK_MACRO_END
585 #define U8_SET_CP_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
586 while(U8_IS_TRAIL((s)[i])) { --(i); } \
587 } UPRV_BLOCK_MACRO_END
606 #define U8_SET_CP_START(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
607 if(U8_IS_TRAIL((s)[(i)])) { \
608 (i)=utf8_back1SafeBody(s, start, (i)); \
610 } UPRV_BLOCK_MACRO_END
638 #define U8_TRUNCATE_IF_INCOMPLETE(s, start, length) UPRV_BLOCK_MACRO_BEGIN { \
639 if((length)>(start)) { \
640 uint8_t __b1=s[(length)-1]; \
641 if(U8_IS_SINGLE(__b1)) { \
643 } else if(U8_IS_LEAD(__b1)) { \
645 } else if(U8_IS_TRAIL(__b1) && ((length)-2)>=(start)) { \
646 uint8_t __b2=s[(length)-2]; \
647 if(0xe0<=__b2 && __b2<=0xf4) { \
648 if(__b2<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(__b2, __b1) : \
649 U8_IS_VALID_LEAD4_AND_T1(__b2, __b1)) { \
652 } else if(U8_IS_TRAIL(__b2) && ((length)-3)>=(start)) { \
653 uint8_t __b3=s[(length)-3]; \
654 if(0xf0<=__b3 && __b3<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(__b3, __b2)) { \
660 } UPRV_BLOCK_MACRO_END
683 #define U8_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
684 (c)=(uint8_t)(s)[--(i)]; \
685 if(U8_IS_TRAIL(c)) { \
686 uint8_t __b, __count=1, __shift=6; \
693 U8_MASK_LEAD_BYTE(__b, __count); \
694 (c)|=(UChar32)__b<<__shift; \
697 (c)|=(UChar32)(__b&0x3f)<<__shift; \
703 } UPRV_BLOCK_MACRO_END
725 #define U8_PREV(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
726 (c)=(uint8_t)(s)[--(i)]; \
727 if(!U8_IS_SINGLE(c)) { \
728 (c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -1); \
730 } UPRV_BLOCK_MACRO_END
756 #define U8_PREV_OR_FFFD(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
757 (c)=(uint8_t)(s)[--(i)]; \
758 if(!U8_IS_SINGLE(c)) { \
759 (c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -3); \
761 } UPRV_BLOCK_MACRO_END
774 #define U8_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
775 while(U8_IS_TRAIL((s)[--(i)])) {} \
776 } UPRV_BLOCK_MACRO_END
790 #define U8_BACK_1(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
791 if(U8_IS_TRAIL((s)[--(i)])) { \
792 (i)=utf8_back1SafeBody(s, start, (i)); \
794 } UPRV_BLOCK_MACRO_END
809 #define U8_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
812 U8_BACK_1_UNSAFE(s, i); \
815 } UPRV_BLOCK_MACRO_END
831 #define U8_BACK_N(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \
833 while(__N>0 && (i)>(start)) { \
834 U8_BACK_1(s, start, i); \
837 } UPRV_BLOCK_MACRO_END
852 #define U8_SET_CP_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
853 U8_BACK_1_UNSAFE(s, i); \
854 U8_FWD_1_UNSAFE(s, i); \
855 } UPRV_BLOCK_MACRO_END
874 #define U8_SET_CP_LIMIT(s, start, i, length) UPRV_BLOCK_MACRO_BEGIN { \
875 if((start)<(i) && ((i)<(length) || (length)<0)) { \
876 U8_BACK_1(s, start, i); \
877 U8_FWD_1(s, i, length); \
879 } UPRV_BLOCK_MACRO_END
static static sync static getppid static getegid const char static filename char static len const char char static bufsiz static mask static vfork const void static prot static getpgrp const char static swapflags static arg static fd static protocol static who struct sockaddr static addrlen static backlog struct timeval struct timezone static tz const struct iovec static count static mode const void const struct sockaddr static tolen const char static pathname void static offset struct stat static buf void long static basep static whence static length const void static len static semflg const void static shmflg const struct timespec struct timespec static rem const char static group const void start
static static sync static getppid static getegid const char static filename char static len const char char static bufsiz static mask static vfork const void static prot static getpgrp const char static swapflags static arg static fd static protocol static who struct sockaddr static addrlen static backlog struct timeval struct timezone static tz const struct iovec static count static mode const void const struct sockaddr static tolen const char static pathname void static offset struct stat static buf void long static basep static whence static length const void static len static semflg const void static shmflg const struct timespec struct timespec static rem const char static group const void length
Basic types and constants for UTF.
U_STABLE int32_t U_EXPORT2 utf8_appendCharSafeBody(uint8_t *s, int32_t i, int32_t length, UChar32 c, UBool *pIsError)
U_STABLE UChar32 U_EXPORT2 utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict)
U_STABLE UChar32 U_EXPORT2 utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, UBool strict)
U_STABLE int32_t U_EXPORT2 utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i)