utf_old.h

Go to the documentation of this file.
00001 /*
00002 *******************************************************************************
00003 *
00004 *   Copyright (C) 2002-2004, International Business Machines
00005 *   Corporation and others.  All Rights Reserved.
00006 *
00007 *******************************************************************************
00008 *   file name:  utf.h
00009 *   encoding:   US-ASCII
00010 *   tab size:   8 (not used)
00011 *   indentation:4
00012 *
00013 *   created on: 2002sep21
00014 *   created by: Markus W. Scherer
00015 */
00016 
00140 #ifndef __UTF_OLD_H__
00141 #define __UTF_OLD_H__
00142 
00143 #ifndef U_HIDE_DEPRECATED_API
00144 
00145 /* utf.h must be included first. */
00146 #ifndef __UTF_H__
00147 #   include "unicode/utf.h"
00148 #endif
00149 
00150 /* Formerly utf.h, part 1 --------------------------------------------------- */
00151 
00152 #ifdef U_USE_UTF_DEPRECATES
00153 
00160 typedef int32_t UTextOffset;
00161 #endif
00162 
00164 #define UTF_SIZE 16
00165 
00172 #define UTF_SAFE
00173 
00174 #undef UTF_UNSAFE
00175 
00176 #undef UTF_STRICT
00177 
00190 #define UTF8_ERROR_VALUE_1 0x15
00191 
00197 #define UTF8_ERROR_VALUE_2 0x9f
00198 
00205 #define UTF_ERROR_VALUE 0xffff
00206 
00213 #define UTF_IS_ERROR(c) \
00214     (((c)&0xfffe)==0xfffe || (c)==UTF8_ERROR_VALUE_1 || (c)==UTF8_ERROR_VALUE_2)
00215 
00221 #define UTF_IS_VALID(c) \
00222     (UTF_IS_UNICODE_CHAR(c) && \
00223      (c)!=UTF8_ERROR_VALUE_1 && (c)!=UTF8_ERROR_VALUE_2)
00224 
00229 #define UTF_IS_SURROGATE(uchar) (((uchar)&0xfffff800)==0xd800)
00230 
00236 #define UTF_IS_UNICODE_NONCHAR(c) \
00237     ((c)>=0xfdd0 && \
00238      ((uint32_t)(c)<=0xfdef || ((c)&0xfffe)==0xfffe) && \
00239      (uint32_t)(c)<=0x10ffff)
00240 
00256 #define UTF_IS_UNICODE_CHAR(c) \
00257     ((uint32_t)(c)<0xd800 || \
00258         ((uint32_t)(c)>0xdfff && \
00259          (uint32_t)(c)<=0x10ffff && \
00260          !UTF_IS_UNICODE_NONCHAR(c)))
00261 
00262 /* Formerly utf8.h ---------------------------------------------------------- */
00263 
00268 #define UTF8_COUNT_TRAIL_BYTES(leadByte) (utf8_countTrailBytes[(uint8_t)leadByte])
00269 
00274 #define UTF8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1)
00275 
00277 #define UTF8_IS_SINGLE(uchar) (((uchar)&0x80)==0)
00278 
00279 #define UTF8_IS_LEAD(uchar) ((uint8_t)((uchar)-0xc0)<0x3e)
00280 
00281 #define UTF8_IS_TRAIL(uchar) (((uchar)&0xc0)==0x80)
00282 
00284 #define UTF8_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0x7f)
00285 
00299 #if 1
00300 #   define UTF8_CHAR_LENGTH(c) \
00301         ((uint32_t)(c)<=0x7f ? 1 : \
00302             ((uint32_t)(c)<=0x7ff ? 2 : \
00303                 ((uint32_t)((c)-0x10000)>0xfffff ? 3 : 4) \
00304             ) \
00305         )
00306 #else
00307 #   define UTF8_CHAR_LENGTH(c) \
00308         ((uint32_t)(c)<=0x7f ? 1 : \
00309             ((uint32_t)(c)<=0x7ff ? 2 : \
00310                 ((uint32_t)(c)<=0xffff ? 3 : \
00311                     ((uint32_t)(c)<=0x10ffff ? 4 : \
00312                         ((uint32_t)(c)<=0x3ffffff ? 5 : \
00313                             ((uint32_t)(c)<=0x7fffffff ? 6 : 3) \
00314                         ) \
00315                     ) \
00316                 ) \
00317             ) \
00318         )
00319 #endif
00320 
00322 #define UTF8_MAX_CHAR_LENGTH 4
00323 
00325 #define UTF8_ARRAY_SIZE(size) ((5*(size))/2)
00326 
00328 #define UTF8_GET_CHAR_UNSAFE(s, i, c) { \
00329     int32_t _utf8_get_char_unsafe_index=(int32_t)(i); \
00330     UTF8_SET_CHAR_START_UNSAFE(s, _utf8_get_char_unsafe_index); \
00331     UTF8_NEXT_CHAR_UNSAFE(s, _utf8_get_char_unsafe_index, c); \
00332 }
00333 
00335 #define UTF8_GET_CHAR_SAFE(s, start, i, length, c, strict) { \
00336     int32_t _utf8_get_char_safe_index=(int32_t)(i); \
00337     UTF8_SET_CHAR_START_SAFE(s, start, _utf8_get_char_safe_index); \
00338     UTF8_NEXT_CHAR_SAFE(s, _utf8_get_char_safe_index, length, c, strict); \
00339 }
00340 
00342 #define UTF8_NEXT_CHAR_UNSAFE(s, i, c) { \
00343     (c)=(s)[(i)++]; \
00344     if((uint8_t)((c)-0xc0)<0x35) { \
00345         uint8_t __count=UTF8_COUNT_TRAIL_BYTES(c); \
00346         UTF8_MASK_LEAD_BYTE(c, __count); \
00347         switch(__count) { \
00348         /* each following branch falls through to the next one */ \
00349         case 3: \
00350             (c)=((c)<<6)|((s)[(i)++]&0x3f); \
00351         case 2: \
00352             (c)=((c)<<6)|((s)[(i)++]&0x3f); \
00353         case 1: \
00354             (c)=((c)<<6)|((s)[(i)++]&0x3f); \
00355         /* no other branches to optimize switch() */ \
00356             break; \
00357         } \
00358     } \
00359 }
00360 
00362 #define UTF8_APPEND_CHAR_UNSAFE(s, i, c) { \
00363     if((uint32_t)(c)<=0x7f) { \
00364         (s)[(i)++]=(uint8_t)(c); \
00365     } else { \
00366         if((uint32_t)(c)<=0x7ff) { \
00367             (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \
00368         } else { \
00369             if((uint32_t)(c)<=0xffff) { \
00370                 (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \
00371             } else { \
00372                 (s)[(i)++]=(uint8_t)(((c)>>18)|0xf0); \
00373                 (s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80); \
00374             } \
00375             (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \
00376         } \
00377         (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \
00378     } \
00379 }
00380 
00382 #define UTF8_FWD_1_UNSAFE(s, i) { \
00383     (i)+=1+UTF8_COUNT_TRAIL_BYTES((s)[i]); \
00384 }
00385 
00387 #define UTF8_FWD_N_UNSAFE(s, i, n) { \
00388     int32_t __N=(n); \
00389     while(__N>0) { \
00390         UTF8_FWD_1_UNSAFE(s, i); \
00391         --__N; \
00392     } \
00393 }
00394 
00396 #define UTF8_SET_CHAR_START_UNSAFE(s, i) { \
00397     while(UTF8_IS_TRAIL((s)[i])) { --(i); } \
00398 }
00399 
00401 #define UTF8_NEXT_CHAR_SAFE(s, i, length, c, strict) { \
00402     (c)=(s)[(i)++]; \
00403     if((c)>=0x80) { \
00404         if(UTF8_IS_LEAD(c)) { \
00405             (c)=utf8_nextCharSafeBody(s, &(i), (int32_t)(length), c, strict); \
00406         } else { \
00407             (c)=UTF8_ERROR_VALUE_1; \
00408         } \
00409     } \
00410 }
00411 
00413 #define UTF8_APPEND_CHAR_SAFE(s, i, length, c)  { \
00414     if((uint32_t)(c)<=0x7f) { \
00415         (s)[(i)++]=(uint8_t)(c); \
00416     } else { \
00417         (i)=utf8_appendCharSafeBody(s, (int32_t)(i), (int32_t)(length), c, NULL); \
00418     } \
00419 }
00420 
00422 #define UTF8_FWD_1_SAFE(s, i, length) U8_FWD_1(s, i, length)
00423 
00425 #define UTF8_FWD_N_SAFE(s, i, length, n) U8_FWD_N(s, i, length, n)
00426 
00428 #define UTF8_SET_CHAR_START_SAFE(s, start, i) U8_SET_CP_START(s, start, i)
00429 
00431 #define UTF8_PREV_CHAR_UNSAFE(s, i, c) { \
00432     (c)=(s)[--(i)]; \
00433     if(UTF8_IS_TRAIL(c)) { \
00434         uint8_t __b, __count=1, __shift=6; \
00435 \
00436         /* c is a trail byte */ \
00437         (c)&=0x3f; \
00438         for(;;) { \
00439             __b=(s)[--(i)]; \
00440             if(__b>=0xc0) { \
00441                 UTF8_MASK_LEAD_BYTE(__b, __count); \
00442                 (c)|=(UChar32)__b<<__shift; \
00443                 break; \
00444             } else { \
00445                 (c)|=(UChar32)(__b&0x3f)<<__shift; \
00446                 ++__count; \
00447                 __shift+=6; \
00448             } \
00449         } \
00450     } \
00451 }
00452 
00454 #define UTF8_BACK_1_UNSAFE(s, i) { \
00455     while(UTF8_IS_TRAIL((s)[--(i)])) {} \
00456 }
00457 
00459 #define UTF8_BACK_N_UNSAFE(s, i, n) { \
00460     int32_t __N=(n); \
00461     while(__N>0) { \
00462         UTF8_BACK_1_UNSAFE(s, i); \
00463         --__N; \
00464     } \
00465 }
00466 
00468 #define UTF8_SET_CHAR_LIMIT_UNSAFE(s, i) { \
00469     UTF8_BACK_1_UNSAFE(s, i); \
00470     UTF8_FWD_1_UNSAFE(s, i); \
00471 }
00472 
00474 #define UTF8_PREV_CHAR_SAFE(s, start, i, c, strict) { \
00475     (c)=(s)[--(i)]; \
00476     if((c)>=0x80) { \
00477         if((c)<=0xbf) { \
00478             (c)=utf8_prevCharSafeBody(s, start, &(i), c, strict); \
00479         } else { \
00480             (c)=UTF8_ERROR_VALUE_1; \
00481         } \
00482     } \
00483 }
00484 
00486 #define UTF8_BACK_1_SAFE(s, start, i) U8_BACK_1(s, start, i)
00487 
00489 #define UTF8_BACK_N_SAFE(s, start, i, n) U8_BACK_N(s, start, i, n)
00490 
00492 #define UTF8_SET_CHAR_LIMIT_SAFE(s, start, i, length) U8_SET_CP_LIMIT(s, start, i, length)
00493 
00494 /* Formerly utf16.h --------------------------------------------------------- */
00495 
00497 #define UTF_IS_FIRST_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xd800)
00498 
00500 #define UTF_IS_SECOND_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xdc00)
00501 
00503 #define UTF_IS_SURROGATE_FIRST(c) (((c)&0x400)==0)
00504 
00506 #define UTF_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
00507 
00509 #define UTF16_GET_PAIR_VALUE(first, second) \
00510     (((first)<<10UL)+(second)-UTF_SURROGATE_OFFSET)
00511 
00513 #define UTF_FIRST_SURROGATE(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
00514 
00516 #define UTF_SECOND_SURROGATE(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
00517 
00519 #define UTF16_LEAD(supplementary) UTF_FIRST_SURROGATE(supplementary)
00520 
00522 #define UTF16_TRAIL(supplementary) UTF_SECOND_SURROGATE(supplementary)
00523 
00525 #define UTF16_IS_SINGLE(uchar) !UTF_IS_SURROGATE(uchar)
00526 
00528 #define UTF16_IS_LEAD(uchar) UTF_IS_FIRST_SURROGATE(uchar)
00529 
00531 #define UTF16_IS_TRAIL(uchar) UTF_IS_SECOND_SURROGATE(uchar)
00532 
00534 #define UTF16_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0xffff)
00535 
00537 #define UTF16_CHAR_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2)
00538 
00540 #define UTF16_MAX_CHAR_LENGTH 2
00541 
00543 #define UTF16_ARRAY_SIZE(size) (size)
00544 
00556 #define UTF16_GET_CHAR_UNSAFE(s, i, c) { \
00557     (c)=(s)[i]; \
00558     if(UTF_IS_SURROGATE(c)) { \
00559         if(UTF_IS_SURROGATE_FIRST(c)) { \
00560             (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)+1]); \
00561         } else { \
00562             (c)=UTF16_GET_PAIR_VALUE((s)[(i)-1], (c)); \
00563         } \
00564     } \
00565 }
00566 
00568 #define UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict) { \
00569     (c)=(s)[i]; \
00570     if(UTF_IS_SURROGATE(c)) { \
00571         uint16_t __c2; \
00572         if(UTF_IS_SURROGATE_FIRST(c)) { \
00573             if((i)+1<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)+1])) { \
00574                 (c)=UTF16_GET_PAIR_VALUE((c), __c2); \
00575                 /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
00576             } else if(strict) {\
00577                 /* unmatched first surrogate */ \
00578                 (c)=UTF_ERROR_VALUE; \
00579             } \
00580         } else { \
00581             if((i)-1>=(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \
00582                 (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \
00583                 /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
00584             } else if(strict) {\
00585                 /* unmatched second surrogate */ \
00586                 (c)=UTF_ERROR_VALUE; \
00587             } \
00588         } \
00589     } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
00590         (c)=UTF_ERROR_VALUE; \
00591     } \
00592 }
00593 
00595 #define UTF16_NEXT_CHAR_UNSAFE(s, i, c) { \
00596     (c)=(s)[(i)++]; \
00597     if(UTF_IS_FIRST_SURROGATE(c)) { \
00598         (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)++]); \
00599     } \
00600 }
00601 
00603 #define UTF16_APPEND_CHAR_UNSAFE(s, i, c) { \
00604     if((uint32_t)(c)<=0xffff) { \
00605         (s)[(i)++]=(uint16_t)(c); \
00606     } else { \
00607         (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
00608         (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
00609     } \
00610 }
00611 
00613 #define UTF16_FWD_1_UNSAFE(s, i) { \
00614     if(UTF_IS_FIRST_SURROGATE((s)[(i)++])) { \
00615         ++(i); \
00616     } \
00617 }
00618 
00620 #define UTF16_FWD_N_UNSAFE(s, i, n) { \
00621     int32_t __N=(n); \
00622     while(__N>0) { \
00623         UTF16_FWD_1_UNSAFE(s, i); \
00624         --__N; \
00625     } \
00626 }
00627 
00629 #define UTF16_SET_CHAR_START_UNSAFE(s, i) { \
00630     if(UTF_IS_SECOND_SURROGATE((s)[i])) { \
00631         --(i); \
00632     } \
00633 }
00634 
00636 #define UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict) { \
00637     (c)=(s)[(i)++]; \
00638     if(UTF_IS_FIRST_SURROGATE(c)) { \
00639         uint16_t __c2; \
00640         if((i)<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)])) { \
00641             ++(i); \
00642             (c)=UTF16_GET_PAIR_VALUE((c), __c2); \
00643             /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
00644         } else if(strict) {\
00645             /* unmatched first surrogate */ \
00646             (c)=UTF_ERROR_VALUE; \
00647         } \
00648     } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
00649         /* unmatched second surrogate or other non-character */ \
00650         (c)=UTF_ERROR_VALUE; \
00651     } \
00652 }
00653 
00655 #define UTF16_APPEND_CHAR_SAFE(s, i, length, c) { \
00656     if((uint32_t)(c)<=0xffff) { \
00657         (s)[(i)++]=(uint16_t)(c); \
00658     } else if((uint32_t)(c)<=0x10ffff) { \
00659         if((i)+1<(length)) { \
00660             (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
00661             (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
00662         } else /* not enough space */ { \
00663             (s)[(i)++]=UTF_ERROR_VALUE; \
00664         } \
00665     } else /* c>0x10ffff, write error value */ { \
00666         (s)[(i)++]=UTF_ERROR_VALUE; \
00667     } \
00668 }
00669 
00671 #define UTF16_FWD_1_SAFE(s, i, length) U16_FWD_1(s, i, length)
00672 
00674 #define UTF16_FWD_N_SAFE(s, i, length, n) U16_FWD_N(s, i, length, n)
00675 
00677 #define UTF16_SET_CHAR_START_SAFE(s, start, i) U16_SET_CP_START(s, start, i)
00678 
00680 #define UTF16_PREV_CHAR_UNSAFE(s, i, c) { \
00681     (c)=(s)[--(i)]; \
00682     if(UTF_IS_SECOND_SURROGATE(c)) { \
00683         (c)=UTF16_GET_PAIR_VALUE((s)[--(i)], (c)); \
00684     } \
00685 }
00686 
00688 #define UTF16_BACK_1_UNSAFE(s, i) { \
00689     if(UTF_IS_SECOND_SURROGATE((s)[--(i)])) { \
00690         --(i); \
00691     } \
00692 }
00693 
00695 #define UTF16_BACK_N_UNSAFE(s, i, n) { \
00696     int32_t __N=(n); \
00697     while(__N>0) { \
00698         UTF16_BACK_1_UNSAFE(s, i); \
00699         --__N; \
00700     } \
00701 }
00702 
00704 #define UTF16_SET_CHAR_LIMIT_UNSAFE(s, i) { \
00705     if(UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \
00706         ++(i); \
00707     } \
00708 }
00709 
00711 #define UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) { \
00712     (c)=(s)[--(i)]; \
00713     if(UTF_IS_SECOND_SURROGATE(c)) { \
00714         uint16_t __c2; \
00715         if((i)>(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \
00716             --(i); \
00717             (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \
00718             /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
00719         } else if(strict) {\
00720             /* unmatched second surrogate */ \
00721             (c)=UTF_ERROR_VALUE; \
00722         } \
00723     } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
00724         /* unmatched first surrogate or other non-character */ \
00725         (c)=UTF_ERROR_VALUE; \
00726     } \
00727 }
00728 
00730 #define UTF16_BACK_1_SAFE(s, start, i) U16_BACK_1(s, start, i)
00731 
00733 #define UTF16_BACK_N_SAFE(s, start, i, n) U16_BACK_N(s, start, i, n)
00734 
00736 #define UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length)
00737 
00738 /* Formerly utf32.h --------------------------------------------------------- */
00739 
00740 /*
00741 * Old documentation:
00742 *
00743 *   This file defines macros to deal with UTF-32 code units and code points.
00744 *   Signatures and semantics are the same as for the similarly named macros
00745 *   in utf16.h.
00746 *   utf32.h is included by utf.h after unicode/umachine.h</p>
00747 *   and some common definitions.
00748 *   <p><b>Usage:</b>  ICU coding guidelines for if() statements should be followed when using these macros.
00749 *                  Compound statements (curly braces {}) must be used  for if-else-while...
00750 *                  bodies and all macro statements should be terminated with semicolon.</p>
00751 */
00752 
00753 /* internal definitions ----------------------------------------------------- */
00754 
00756 #define UTF32_IS_SAFE(c, strict) \
00757     (!(strict) ? \
00758         (uint32_t)(c)<=0x10ffff : \
00759         UTF_IS_UNICODE_CHAR(c))
00760 
00761 /*
00762  * For the semantics of all of these macros, see utf16.h.
00763  * The UTF-32 versions are trivial because any code point is
00764  * encoded using exactly one code unit.
00765  */
00766 
00767 /* single-code point definitions -------------------------------------------- */
00768 
00769 /* classes of code unit values */
00770 
00772 #define UTF32_IS_SINGLE(uchar) 1
00773 
00774 #define UTF32_IS_LEAD(uchar) 0
00775 
00776 #define UTF32_IS_TRAIL(uchar) 0
00777 
00778 /* number of code units per code point */
00779 
00781 #define UTF32_NEED_MULTIPLE_UCHAR(c) 0
00782 
00783 #define UTF32_CHAR_LENGTH(c) 1
00784 
00785 #define UTF32_MAX_CHAR_LENGTH 1
00786 
00787 /* average number of code units compared to UTF-16 */
00788 
00790 #define UTF32_ARRAY_SIZE(size) (size)
00791 
00793 #define UTF32_GET_CHAR_UNSAFE(s, i, c) { \
00794     (c)=(s)[i]; \
00795 }
00796 
00798 #define UTF32_GET_CHAR_SAFE(s, start, i, length, c, strict) { \
00799     (c)=(s)[i]; \
00800     if(!UTF32_IS_SAFE(c, strict)) { \
00801         (c)=UTF_ERROR_VALUE; \
00802     } \
00803 }
00804 
00805 /* definitions with forward iteration --------------------------------------- */
00806 
00808 #define UTF32_NEXT_CHAR_UNSAFE(s, i, c) { \
00809     (c)=(s)[(i)++]; \
00810 }
00811 
00813 #define UTF32_APPEND_CHAR_UNSAFE(s, i, c) { \
00814     (s)[(i)++]=(c); \
00815 }
00816 
00818 #define UTF32_FWD_1_UNSAFE(s, i) { \
00819     ++(i); \
00820 }
00821 
00823 #define UTF32_FWD_N_UNSAFE(s, i, n) { \
00824     (i)+=(n); \
00825 }
00826 
00828 #define UTF32_SET_CHAR_START_UNSAFE(s, i) { \
00829 }
00830 
00832 #define UTF32_NEXT_CHAR_SAFE(s, i, length, c, strict) { \
00833     (c)=(s)[(i)++]; \
00834     if(!UTF32_IS_SAFE(c, strict)) { \
00835         (c)=UTF_ERROR_VALUE; \
00836     } \
00837 }
00838 
00840 #define UTF32_APPEND_CHAR_SAFE(s, i, length, c) { \
00841     if((uint32_t)(c)<=0x10ffff) { \
00842         (s)[(i)++]=(c); \
00843     } else /* c>0x10ffff, write 0xfffd */ { \
00844         (s)[(i)++]=0xfffd; \
00845     } \
00846 }
00847 
00849 #define UTF32_FWD_1_SAFE(s, i, length) { \
00850     ++(i); \
00851 }
00852 
00854 #define UTF32_FWD_N_SAFE(s, i, length, n) { \
00855     if(((i)+=(n))>(length)) { \
00856         (i)=(length); \
00857     } \
00858 }
00859 
00861 #define UTF32_SET_CHAR_START_SAFE(s, start, i) { \
00862 }
00863 
00864 /* definitions with backward iteration -------------------------------------- */
00865 
00867 #define UTF32_PREV_CHAR_UNSAFE(s, i, c) { \
00868     (c)=(s)[--(i)]; \
00869 }
00870 
00872 #define UTF32_BACK_1_UNSAFE(s, i) { \
00873     --(i); \
00874 }
00875 
00877 #define UTF32_BACK_N_UNSAFE(s, i, n) { \
00878     (i)-=(n); \
00879 }
00880 
00882 #define UTF32_SET_CHAR_LIMIT_UNSAFE(s, i) { \
00883 }
00884 
00886 #define UTF32_PREV_CHAR_SAFE(s, start, i, c, strict) { \
00887     (c)=(s)[--(i)]; \
00888     if(!UTF32_IS_SAFE(c, strict)) { \
00889         (c)=UTF_ERROR_VALUE; \
00890     } \
00891 }
00892 
00894 #define UTF32_BACK_1_SAFE(s, start, i) { \
00895     --(i); \
00896 }
00897 
00899 #define UTF32_BACK_N_SAFE(s, start, i, n) { \
00900     (i)-=(n); \
00901     if((i)<(start)) { \
00902         (i)=(start); \
00903     } \
00904 }
00905 
00907 #define UTF32_SET_CHAR_LIMIT_SAFE(s, i, length) { \
00908 }
00909 
00910 /* Formerly utf.h, part 2 --------------------------------------------------- */
00911 
00917 #define UTF_ARRAY_SIZE(size) UTF16_ARRAY_SIZE(size)
00918 
00920 #define UTF_GET_CHAR_UNSAFE(s, i, c)                 UTF16_GET_CHAR_UNSAFE(s, i, c)
00921 
00923 #define UTF_GET_CHAR_SAFE(s, start, i, length, c, strict) UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict)
00924 
00925 
00927 #define UTF_NEXT_CHAR_UNSAFE(s, i, c)                UTF16_NEXT_CHAR_UNSAFE(s, i, c)
00928 
00930 #define UTF_NEXT_CHAR_SAFE(s, i, length, c, strict)  UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict)
00931 
00932 
00934 #define UTF_APPEND_CHAR_UNSAFE(s, i, c)              UTF16_APPEND_CHAR_UNSAFE(s, i, c)
00935 
00937 #define UTF_APPEND_CHAR_SAFE(s, i, length, c)        UTF16_APPEND_CHAR_SAFE(s, i, length, c)
00938 
00939 
00941 #define UTF_FWD_1_UNSAFE(s, i)                       UTF16_FWD_1_UNSAFE(s, i)
00942 
00944 #define UTF_FWD_1_SAFE(s, i, length)                 UTF16_FWD_1_SAFE(s, i, length)
00945 
00946 
00948 #define UTF_FWD_N_UNSAFE(s, i, n)                    UTF16_FWD_N_UNSAFE(s, i, n)
00949 
00951 #define UTF_FWD_N_SAFE(s, i, length, n)              UTF16_FWD_N_SAFE(s, i, length, n)
00952 
00953 
00955 #define UTF_SET_CHAR_START_UNSAFE(s, i)              UTF16_SET_CHAR_START_UNSAFE(s, i)
00956 
00958 #define UTF_SET_CHAR_START_SAFE(s, start, i)         UTF16_SET_CHAR_START_SAFE(s, start, i)
00959 
00960 
00962 #define UTF_PREV_CHAR_UNSAFE(s, i, c)                UTF16_PREV_CHAR_UNSAFE(s, i, c)
00963 
00965 #define UTF_PREV_CHAR_SAFE(s, start, i, c, strict)   UTF16_PREV_CHAR_SAFE(s, start, i, c, strict)
00966 
00967 
00969 #define UTF_BACK_1_UNSAFE(s, i)                      UTF16_BACK_1_UNSAFE(s, i)
00970 
00972 #define UTF_BACK_1_SAFE(s, start, i)                 UTF16_BACK_1_SAFE(s, start, i)
00973 
00974 
00976 #define UTF_BACK_N_UNSAFE(s, i, n)                   UTF16_BACK_N_UNSAFE(s, i, n)
00977 
00979 #define UTF_BACK_N_SAFE(s, start, i, n)              UTF16_BACK_N_SAFE(s, start, i, n)
00980 
00981 
00983 #define UTF_SET_CHAR_LIMIT_UNSAFE(s, i)              UTF16_SET_CHAR_LIMIT_UNSAFE(s, i)
00984 
00986 #define UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length) UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length)
00987 
00988 /* Define default macros (UTF-16 "safe") ------------------------------------ */
00989 
00995 #define UTF_IS_SINGLE(uchar) U16_IS_SINGLE(uchar)
00996 
01002 #define UTF_IS_LEAD(uchar) U16_IS_LEAD(uchar)
01003 
01009 #define UTF_IS_TRAIL(uchar) U16_IS_TRAIL(uchar)
01010 
01016 #define UTF_NEED_MULTIPLE_UCHAR(c) UTF16_NEED_MULTIPLE_UCHAR(c)
01017 
01023 #define UTF_CHAR_LENGTH(c) U16_LENGTH(c)
01024 
01030 #define UTF_MAX_CHAR_LENGTH U16_MAX_LENGTH
01031 
01041 #define UTF_GET_CHAR(s, start, i, length, c) U16_GET(s, start, i, length, c)
01042 
01054 #define UTF_NEXT_CHAR(s, i, length, c) U16_NEXT(s, i, length, c)
01055 
01067 #define UTF_APPEND_CHAR(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c)
01068 
01078 #define UTF_FWD_1(s, i, length) U16_FWD_1(s, i, length)
01079 
01089 #define UTF_FWD_N(s, i, length, n) U16_FWD_N(s, i, length, n)
01090 
01105 #define UTF_SET_CHAR_START(s, start, i) U16_SET_CP_START(s, start, i)
01106 
01118 #define UTF_PREV_CHAR(s, start, i, c) U16_PREV(s, start, i, c)
01119 
01131 #define UTF_BACK_1(s, start, i) U16_BACK_1(s, start, i)
01132 
01144 #define UTF_BACK_N(s, start, i, n) U16_BACK_N(s, start, i, n)
01145 
01160 #define UTF_SET_CHAR_LIMIT(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length)
01161 
01162 #endif /* U_HIDE_DEPRECATED_API */
01163 
01164 #endif
01165 

Generated on Tue Nov 16 10:03:07 2004 for ICU 3.2 by  doxygen 1.3.9.1