Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | File Members | Related Pages

OgreUTFString.h

Go to the documentation of this file.
00001 // Modified from OpenGUI under lenient license
00002 // Original copyright details and licensing below:
00003 // OpenGUI (http://opengui.sourceforge.net)
00004 // This source code is released under the BSD License
00005 
00006 // Permission is given to the Ogre project to use the contents of file within its
00007 // source and binary applications, as well as any derivative works, in accordance
00008 // with the terms of any license under which Ogre is or will be distributed.
00009 //
00010 // Ogre may relicense its copy of this file, as well as any OpenGUI released updates
00011 // to this file, under any terms that it deems fit, and is not required to maintain
00012 // the original BSD licensing terms of this file, however OpenGUI retains the right
00013 // to present its copy of this file under the terms of any license under which
00014 // OpenGUI is distributed.
00015 //
00016 // Ogre is not required to release to OpenGUI any future changes that it makes to
00017 // this file, and understands and agrees that any such changes that are released
00018 // back to OpenGUI will become available under the terms of any license under which
00019 // OpenGUI is distributed.
00020 //
00021 // For brevity, this permission text may be removed from this file if desired.
00022 // The original record kept within the SourceForge (http://sourceforge.net/) tracker
00023 // is sufficient.
00024 //
00025 // - Eric Shorkey (zero/zeroskill) <opengui@rightbracket.com> [January 20th, 2007]
00026 
00027 #ifndef __OGRE_UTFSTRING_H__
00028 #define __OGRE_UTFSTRING_H__
00029 
00030 
00031 #include "OgrePrerequisites.h"
00032 
00033 #if OGRE_UNICODE_SUPPORT 
00034 
00035 // these are explained later
00036 #include <iterator>
00037 #include <string>
00038 #include <stdexcept>
00039 
00040 // Workaround for VC7:
00041 //      when build with /MD or /MDd, VC7 have both std::basic_string<unsigned short> and
00042 // basic_string<__wchar_t> instantiated in msvcprt[d].lib/MSVCP71[D].dll, but the header
00043 // files tells compiler that only one of them is over there (based on /Zc:wchar_t compile
00044 // option). And since this file used both of them, causing compiler instantiating another
00045 // one in user object code, which lead to duplicate symbols with msvcprt.lib/MSVCP71[D].dll.
00046 //
00047 #if OGRE_COMPILER == OGRE_COMPILER_MSVC && (1300 <= OGRE_COMP_VER && OGRE_COMP_VER <= 1310)
00048 
00049 # if defined(_DLL_CPPLIB)
00050 
00051 namespace std
00052 {
00053     template class _CRTIMP2 basic_string<unsigned short, char_traits<unsigned short>,
00054         allocator<unsigned short> >;
00055 
00056     template class _CRTIMP2 basic_string<__wchar_t, char_traits<__wchar_t>,
00057         allocator<__wchar_t> >;
00058 }
00059 
00060 # endif // defined(_DLL_CPPLIB)
00061 
00062 #endif  // OGRE_COMPILER == OGRE_COMPILER_MSVC && OGRE_COMP_VER == 1300
00063 
00064 
00065 namespace Ogre {
00066 
00067     /* READ THIS NOTICE BEFORE USING IN YOUR OWN APPLICATIONS
00068     =NOTICE=
00069     This class is not a complete Unicode solution. It purposefully does not
00070     provide certain functionality, such as proper lexical sorting for
00071     Unicode values. It does provide comparison operators for the sole purpose
00072     of using UTFString as an index with std::map and other operator< sorted
00073     containers, but it should NOT be relied upon for meaningful lexical
00074     operations, such as alphabetical sorts. If you need this type of
00075     functionality, look into using ICU instead (http://icu.sourceforge.net/).
00076 
00077     =REQUIREMENTS=
00078     There are a few requirements for proper operation. They are fairly small,
00079     and shouldn't restrict usage on any reasonable target.
00080     * Compiler must support unsigned 16-bit integer types
00081     * Compiler must support signed 32-bit integer types
00082     * wchar_t must be either UTF-16 or UTF-32 encoding, and specified as such
00083         using the WCHAR_UTF16 macro as outlined below.
00084     * You must include <iterator>, <string>, and <wchar>. Probably more, but
00085         these are the most obvious.
00086 
00087     =REQUIRED PREPROCESSOR MACROS=
00088     This class requires two preprocessor macros to be defined in order to
00089     work as advertised.
00090     INT32 - must be mapped to a signed 32 bit integer (ex. #define INT32 int)
00091     UINT16 - must be mapped to an unsigned 16 bit integer (ex. #define UINT32 unsigned short)
00092 
00093     Additionally, a third macro should be defined to control the evaluation of wchar_t:
00094     WCHAR_UTF16 - should be defined when wchar_t represents UTF-16 code points,
00095         such as in Windows. Otherwise it is assumed that wchar_t is a 32-bit
00096         integer representing UTF-32 code points.
00097     */
00098 
00099     // THIS IS A VERY BRIEF AUTO DETECTION. YOU MAY NEED TO TWEAK THIS
00100 #ifdef __STDC_ISO_10646__
00101 // for any compiler that provides this, wchar_t is guaranteed to hold any Unicode value with a single code point (32-bit or larger)
00102 // so we can safely skip the rest of the testing
00103 #else // #ifdef __STDC_ISO_10646__
00104 #if defined( __WIN32__ ) || defined( _WIN32 )
00105 #define WCHAR_UTF16 // All currently known Windows platforms utilize UTF-16 encoding in wchar_t
00106 #else // #if defined( __WIN32__ ) || defined( _WIN32 )
00107 #if WCHAR_MAX <= 0xFFFF // this is a last resort fall back test; WCHAR_MAX is defined in <wchar.h>
00108 #define WCHAR_UTF16 // best we can tell, wchar_t is not larger than 16-bit
00109 #endif // #if WCHAR_MAX <= 0xFFFF
00110 #endif // #if defined( __WIN32__ ) || defined( _WIN32 )
00111 #endif // #ifdef __STDC_ISO_10646__
00112 
00113 
00114 // OGRE_IS_NATIVE_WCHAR_T means that wchar_t isn't a typedef of
00115 // uint16 or uint32.
00116 #if OGRE_COMPILER == OGRE_COMPILER_MSVC
00117 
00118 // Don't define wchar_t related functions since it'll duplicate
00119 // with UTFString::code_point related functions when compile
00120 // without /Zc:wchar_t, because in this case both of them are
00121 // a typedef of uint16.
00122 # if defined(_NATIVE_WCHAR_T_DEFINED)
00123 #   define OGRE_IS_NATIVE_WCHAR_T      1
00124 # else
00125 #   define OGRE_IS_NATIVE_WCHAR_T      0
00126 # endif
00127 
00128 #else   // OGRE_COMPILER != OGRE_COMPILER_MSVC
00129 
00130 // Assumed wchar_t is natively for other compilers
00131 #   define OGRE_IS_NATIVE_WCHAR_T     1
00132 
00133 #endif  // OGRE_COMPILER == OGRE_COMPILER_MSVC
00134 
00136 
00161     class UTFString {
00162         // constants used in UTF-8 conversions
00163         static const unsigned char _lead1 = 0xC0;      //110xxxxx
00164         static const unsigned char _lead1_mask = 0x1F; //00011111
00165         static const unsigned char _lead2 = 0xE0;      //1110xxxx
00166         static const unsigned char _lead2_mask = 0x0F; //00001111
00167         static const unsigned char _lead3 = 0xF0;      //11110xxx
00168         static const unsigned char _lead3_mask = 0x07; //00000111
00169         static const unsigned char _lead4 = 0xF8;      //111110xx
00170         static const unsigned char _lead4_mask = 0x03; //00000011
00171         static const unsigned char _lead5 = 0xFC;      //1111110x
00172         static const unsigned char _lead5_mask = 0x01; //00000001
00173         static const unsigned char _cont = 0x80;       //10xxxxxx
00174         static const unsigned char _cont_mask = 0x3F;  //00111111
00175 
00176     public:
00178         typedef size_t size_type;
00180         static const size_type npos = ~0;
00181 
00183         typedef uint32 unicode_char;
00184 
00186         typedef uint16 code_point;
00187 
00189         typedef code_point value_type;
00190 
00191         typedef std::basic_string<code_point> dstring; // data string
00192 
00194         typedef std::basic_string<unicode_char> utf32string;
00195 
00197     class invalid_data: public std::runtime_error { /* i don't know why the beautifier is freaking out on this line */
00198         public:
00200             explicit invalid_data( const std::string& _Message ): std::runtime_error( _Message ) {
00201                 /* The thing is, Bob, it's not that I'm lazy, it's that I just don't care. */
00202             }
00203         };
00204 
00205         //#########################################################################
00207     class _base_iterator: public std::iterator<std::random_access_iterator_tag, value_type> { /* i don't know why the beautifier is freaking out on this line */
00208             friend class UTFString;
00209         protected:
00210             _base_iterator() {
00211                 mString = 0;
00212             }
00213 
00214             void _seekFwd( size_type c ) {
00215                 mIter += c;
00216             }
00217             void _seekRev( size_type c ) {
00218                 mIter -= c;
00219             }
00220             void _become( const _base_iterator& i ) {
00221                 mIter = i.mIter;
00222                 mString = i.mString;
00223             }
00224             bool _test_begin() const {
00225                 return mIter == mString->mData.begin();
00226             }
00227             bool _test_end() const {
00228                 return mIter == mString->mData.end();
00229             }
00230             size_type _get_index() const {
00231                 return mIter - mString->mData.begin();
00232             }
00233             void _jump_to( size_type index ) {
00234                 mIter = mString->mData.begin() + index;
00235             }
00236 
00237             unicode_char _getCharacter() const {
00238                 size_type current_index = _get_index();
00239                 return mString->getChar( current_index );
00240             }
00241             int _setCharacter( unicode_char uc ) {
00242                 size_type current_index = _get_index();
00243                 int change = mString->setChar( current_index, uc );
00244                 _jump_to( current_index );
00245                 return change;
00246             }
00247 
00248             void _moveNext() {
00249                 _seekFwd( 1 ); // move 1 code point forward
00250                 if ( _test_end() ) return; // exit if we hit the end
00251                 if ( _utf16_surrogate_follow( mIter[0] ) ) {
00252                     // landing on a follow code point means we might be part of a bigger character
00253                     // so we test for that
00254                     code_point lead_half = 0;
00255                     //NB: we can't possibly be at the beginning here, so no need to test
00256                     lead_half = mIter[-1]; // check the previous code point to see if we're part of a surrogate pair
00257                     if ( _utf16_surrogate_lead( lead_half ) ) {
00258                         _seekFwd( 1 ); // if so, then advance 1 more code point
00259                     }
00260                 }
00261             }
00262             void _movePrev() {
00263                 _seekRev( 1 ); // move 1 code point backwards
00264                 if ( _test_begin() ) return; // exit if we hit the beginning
00265                 if ( _utf16_surrogate_follow( mIter[0] ) ) {
00266                     // landing on a follow code point means we might be part of a bigger character
00267                     // so we test for that
00268                     code_point lead_half = 0;
00269                     lead_half = mIter[-1]; // check the previous character to see if we're part of a surrogate pair
00270                     if ( _utf16_surrogate_lead( lead_half ) ) {
00271                         _seekRev( 1 ); // if so, then rewind 1 more code point
00272                     }
00273                 }
00274             }
00275 
00276             dstring::iterator mIter;
00277             UTFString* mString;
00278         };
00279 
00280         //#########################################################################
00281         // FORWARD ITERATORS
00282         //#########################################################################
00283         class _const_fwd_iterator; // forward declaration
00284 
00286     class _fwd_iterator: public _base_iterator { /* i don't know why the beautifier is freaking out on this line */
00287             friend class _const_fwd_iterator;
00288         public:
00289             _fwd_iterator() {}
00290             _fwd_iterator( const _fwd_iterator& i ) {
00291                 _become( i );
00292             }
00293 
00295             _fwd_iterator& operator++() {
00296                 _seekFwd( 1 );
00297                 return *this;
00298             }
00300             _fwd_iterator operator++( int ) {
00301                 _fwd_iterator tmp( *this );
00302                 _seekFwd( 1 );
00303                 return tmp;
00304             }
00305 
00307             _fwd_iterator& operator--() {
00308                 _seekRev( 1 );
00309                 return *this;
00310             }
00312             _fwd_iterator operator--( int ) {
00313                 _fwd_iterator tmp( *this );
00314                 _seekRev( 1 );
00315                 return tmp;
00316             }
00317 
00319             _fwd_iterator operator+( size_type n ) {
00320                 _fwd_iterator tmp( *this );
00321                 tmp._seekFwd( n );
00322                 return tmp;
00323             }
00325             _fwd_iterator operator+( difference_type n ) {
00326                 _fwd_iterator tmp( *this );
00327                 if ( n < 0 )
00328                     tmp._seekRev( -n );
00329                 else
00330                     tmp._seekFwd( n );
00331                 return tmp;
00332             }
00334             _fwd_iterator operator-( size_type n ) {
00335                 _fwd_iterator tmp( *this );
00336                 tmp._seekRev( n );
00337                 return tmp;
00338             }
00340             _fwd_iterator operator-( difference_type n ) {
00341                 _fwd_iterator tmp( *this );
00342                 if ( n < 0 )
00343                     tmp._seekFwd( -n );
00344                 else
00345                     tmp._seekRev( n );
00346                 return tmp;
00347             }
00348 
00350             _fwd_iterator& operator+=( size_type n ) {
00351                 _seekFwd( n );
00352                 return *this;
00353             }
00355             _fwd_iterator& operator+=( difference_type n ) {
00356                 if ( n < 0 )
00357                     _seekRev( -n );
00358                 else
00359                     _seekFwd( n );
00360                 return *this;
00361             }
00363             _fwd_iterator& operator-=( size_type n ) {
00364                 _seekRev( n );
00365                 return *this;
00366             }
00368             _fwd_iterator& operator-=( difference_type n ) {
00369                 if ( n < 0 )
00370                     _seekFwd( -n );
00371                 else
00372                     _seekRev( n );
00373                 return *this;
00374             }
00375 
00377             value_type& operator*() const {
00378                 return *mIter;
00379             }
00380 
00382             value_type& operator[]( size_type n ) const {
00383                 _fwd_iterator tmp( *this );
00384                 tmp += n;
00385                 return *tmp;
00386             }
00388             value_type& operator[]( difference_type n ) const {
00389                 _fwd_iterator tmp( *this );
00390                 tmp += n;
00391                 return *tmp;
00392             }
00393 
00395             _fwd_iterator& moveNext() {
00396                 _moveNext();
00397                 return *this;
00398             }
00400             _fwd_iterator& movePrev() {
00401                 _movePrev();
00402                 return *this;
00403             }
00405             unicode_char getCharacter() const {
00406                 return _getCharacter();
00407             }
00409             int setCharacter( unicode_char uc ) {
00410                 return _setCharacter( uc );
00411             }
00412         };
00413 
00414 
00415 
00416         //#########################################################################
00418     class _const_fwd_iterator: public _base_iterator { /* i don't know why the beautifier is freaking out on this line */
00419         public:
00420             _const_fwd_iterator() {}
00421             _const_fwd_iterator( const _const_fwd_iterator& i ) {
00422                 _become( i );
00423             }
00424             _const_fwd_iterator( const _fwd_iterator& i ) {
00425                 _become( i );
00426             }
00427 
00429             _const_fwd_iterator& operator++() {
00430                 _seekFwd( 1 );
00431                 return *this;
00432             }
00434             _const_fwd_iterator operator++( int ) {
00435                 _const_fwd_iterator tmp( *this );
00436                 _seekFwd( 1 );
00437                 return tmp;
00438             }
00439 
00441             _const_fwd_iterator& operator--() {
00442                 _seekRev( 1 );
00443                 return *this;
00444             }
00446             _const_fwd_iterator operator--( int ) {
00447                 _const_fwd_iterator tmp( *this );
00448                 _seekRev( 1 );
00449                 return tmp;
00450             }
00451 
00453             _const_fwd_iterator operator+( size_type n ) {
00454                 _const_fwd_iterator tmp( *this );
00455                 tmp._seekFwd( n );
00456                 return tmp;
00457             }
00459             _const_fwd_iterator operator+( difference_type n ) {
00460                 _const_fwd_iterator tmp( *this );
00461                 if ( n < 0 )
00462                     tmp._seekRev( -n );
00463                 else
00464                     tmp._seekFwd( n );
00465                 return tmp;
00466             }
00468             _const_fwd_iterator operator-( size_type n ) {
00469                 _const_fwd_iterator tmp( *this );
00470                 tmp._seekRev( n );
00471                 return tmp;
00472             }
00474             _const_fwd_iterator operator-( difference_type n ) {
00475                 _const_fwd_iterator tmp( *this );
00476                 if ( n < 0 )
00477                     tmp._seekFwd( -n );
00478                 else
00479                     tmp._seekRev( n );
00480                 return tmp;
00481             }
00482 
00484             _const_fwd_iterator& operator+=( size_type n ) {
00485                 _seekFwd( n );
00486                 return *this;
00487             }
00489             _const_fwd_iterator& operator+=( difference_type n ) {
00490                 if ( n < 0 )
00491                     _seekRev( -n );
00492                 else
00493                     _seekFwd( n );
00494                 return *this;
00495             }
00497             _const_fwd_iterator& operator-=( size_type n ) {
00498                 _seekRev( n );
00499                 return *this;
00500             }
00502             _const_fwd_iterator& operator-=( difference_type n ) {
00503                 if ( n < 0 )
00504                     _seekFwd( -n );
00505                 else
00506                     _seekRev( n );
00507                 return *this;
00508             }
00509 
00511             const value_type& operator*() const {
00512                 return *mIter;
00513             }
00514 
00516             const value_type& operator[]( size_type n ) const {
00517                 _const_fwd_iterator tmp( *this );
00518                 tmp += n;
00519                 return *tmp;
00520             }
00522             const value_type& operator[]( difference_type n ) const {
00523                 _const_fwd_iterator tmp( *this );
00524                 tmp += n;
00525                 return *tmp;
00526             }
00527 
00529             _const_fwd_iterator& moveNext() {
00530                 _moveNext();
00531                 return *this;
00532             }
00534             _const_fwd_iterator& movePrev() {
00535                 _movePrev();
00536                 return *this;
00537             }
00539             unicode_char getCharacter() const {
00540                 return _getCharacter();
00541             }
00542 
00544             friend size_type operator-( const _const_fwd_iterator& left, const _const_fwd_iterator& right );
00546             friend bool operator==( const _const_fwd_iterator& left, const _const_fwd_iterator& right );
00548             friend bool operator!=( const _const_fwd_iterator& left, const _const_fwd_iterator& right );
00550             friend bool operator<( const _const_fwd_iterator& left, const _const_fwd_iterator& right );
00552             friend bool operator<=( const _const_fwd_iterator& left, const _const_fwd_iterator& right );
00554             friend bool operator>( const _const_fwd_iterator& left, const _const_fwd_iterator& right );
00556             friend bool operator>=( const _const_fwd_iterator& left, const _const_fwd_iterator& right );
00557 
00558         };
00559 
00560         //#########################################################################
00561         // REVERSE ITERATORS
00562         //#########################################################################
00563         class _const_rev_iterator; // forward declaration
00565     class _rev_iterator: public _base_iterator { /* i don't know why the beautifier is freaking out on this line */
00566             friend class _const_rev_iterator;
00567         public:
00568             _rev_iterator() {}
00569             _rev_iterator( const _rev_iterator& i ) {
00570                 _become( i );
00571             }
00572 
00574             _rev_iterator& operator++() {
00575                 _seekRev( 1 );
00576                 return *this;
00577             }
00579             _rev_iterator operator++( int ) {
00580                 _rev_iterator tmp( *this );
00581                 _seekRev( 1 );
00582                 return tmp;
00583             }
00584 
00586             _rev_iterator& operator--() {
00587                 _seekFwd( 1 );
00588                 return *this;
00589             }
00591             _rev_iterator operator--( int ) {
00592                 _rev_iterator tmp( *this );
00593                 _seekFwd( 1 );
00594                 return tmp;
00595             }
00596 
00598             _rev_iterator operator+( size_type n ) {
00599                 _rev_iterator tmp( *this );
00600                 tmp._seekRev( n );
00601                 return tmp;
00602             }
00604             _rev_iterator operator+( difference_type n ) {
00605                 _rev_iterator tmp( *this );
00606                 if ( n < 0 )
00607                     tmp._seekFwd( -n );
00608                 else
00609                     tmp._seekRev( n );
00610                 return tmp;
00611             }
00613             _rev_iterator operator-( size_type n ) {
00614                 _rev_iterator tmp( *this );
00615                 tmp._seekFwd( n );
00616                 return tmp;
00617             }
00619             _rev_iterator operator-( difference_type n ) {
00620                 _rev_iterator tmp( *this );
00621                 if ( n < 0 )
00622                     tmp._seekRev( -n );
00623                 else
00624                     tmp._seekFwd( n );
00625                 return tmp;
00626             }
00627 
00629             _rev_iterator& operator+=( size_type n ) {
00630                 _seekRev( n );
00631                 return *this;
00632             }
00634             _rev_iterator& operator+=( difference_type n ) {
00635                 if ( n < 0 )
00636                     _seekFwd( -n );
00637                 else
00638                     _seekRev( n );
00639                 return *this;
00640             }
00642             _rev_iterator& operator-=( size_type n ) {
00643                 _seekFwd( n );
00644                 return *this;
00645             }
00647             _rev_iterator& operator-=( difference_type n ) {
00648                 if ( n < 0 )
00649                     _seekRev( -n );
00650                 else
00651                     _seekFwd( n );
00652                 return *this;
00653             }
00654 
00656             value_type& operator*() const {
00657                 return mIter[-1];
00658             }
00659 
00661             value_type& operator[]( size_type n ) const {
00662                 _rev_iterator tmp( *this );
00663                 tmp -= n;
00664                 return *tmp;
00665             }
00667             value_type& operator[]( difference_type n ) const {
00668                 _rev_iterator tmp( *this );
00669                 tmp -= n;
00670                 return *tmp;
00671             }
00672         };
00673         //#########################################################################
00675     class _const_rev_iterator: public _base_iterator { /* i don't know why the beautifier is freaking out on this line */
00676         public:
00677             _const_rev_iterator() {}
00678             _const_rev_iterator( const _const_rev_iterator& i ) {
00679                 _become( i );
00680             }
00681             _const_rev_iterator( const _rev_iterator& i ) {
00682                 _become( i );
00683             }
00685             _const_rev_iterator& operator++() {
00686                 _seekRev( 1 );
00687                 return *this;
00688             }
00690             _const_rev_iterator operator++( int ) {
00691                 _const_rev_iterator tmp( *this );
00692                 _seekRev( 1 );
00693                 return tmp;
00694             }
00695 
00697             _const_rev_iterator& operator--() {
00698                 _seekFwd( 1 );
00699                 return *this;
00700             }
00702             _const_rev_iterator operator--( int ) {
00703                 _const_rev_iterator tmp( *this );
00704                 _seekFwd( 1 );
00705                 return tmp;
00706             }
00707 
00709             _const_rev_iterator operator+( size_type n ) {
00710                 _const_rev_iterator tmp( *this );
00711                 tmp._seekRev( n );
00712                 return tmp;
00713             }
00715             _const_rev_iterator operator+( difference_type n ) {
00716                 _const_rev_iterator tmp( *this );
00717                 if ( n < 0 )
00718                     tmp._seekFwd( -n );
00719                 else
00720                     tmp._seekRev( n );
00721                 return tmp;
00722             }
00724             _const_rev_iterator operator-( size_type n ) {
00725                 _const_rev_iterator tmp( *this );
00726                 tmp._seekFwd( n );
00727                 return tmp;
00728             }
00730             _const_rev_iterator operator-( difference_type n ) {
00731                 _const_rev_iterator tmp( *this );
00732                 if ( n < 0 )
00733                     tmp._seekRev( -n );
00734                 else
00735                     tmp._seekFwd( n );
00736                 return tmp;
00737             }
00738 
00740             _const_rev_iterator& operator+=( size_type n ) {
00741                 _seekRev( n );
00742                 return *this;
00743             }
00745             _const_rev_iterator& operator+=( difference_type n ) {
00746                 if ( n < 0 )
00747                     _seekFwd( -n );
00748                 else
00749                     _seekRev( n );
00750                 return *this;
00751             }
00753             _const_rev_iterator& operator-=( size_type n ) {
00754                 _seekFwd( n );
00755                 return *this;
00756             }
00758             _const_rev_iterator& operator-=( difference_type n ) {
00759                 if ( n < 0 )
00760                     _seekRev( -n );
00761                 else
00762                     _seekFwd( n );
00763                 return *this;
00764             }
00765 
00767             const value_type& operator*() const {
00768                 return mIter[-1];
00769             }
00770 
00772             const value_type& operator[]( size_type n ) const {
00773                 _const_rev_iterator tmp( *this );
00774                 tmp -= n;
00775                 return *tmp;
00776             }
00778             const value_type& operator[]( difference_type n ) const {
00779                 _const_rev_iterator tmp( *this );
00780                 tmp -= n;
00781                 return *tmp;
00782             }
00783 
00785             friend size_type operator-( const _const_rev_iterator& left, const _const_rev_iterator& right );
00787             friend bool operator==( const _const_rev_iterator& left, const _const_rev_iterator& right );
00789             friend bool operator!=( const _const_rev_iterator& left, const _const_rev_iterator& right );
00791             friend bool operator<( const _const_rev_iterator& left, const _const_rev_iterator& right );
00793             friend bool operator<=( const _const_rev_iterator& left, const _const_rev_iterator& right );
00795             friend bool operator>( const _const_rev_iterator& left, const _const_rev_iterator& right );
00797             friend bool operator>=( const _const_rev_iterator& left, const _const_rev_iterator& right );
00798         };
00799         //#########################################################################
00800 
00801         typedef _fwd_iterator iterator;                     
00802         typedef _rev_iterator reverse_iterator;             
00803         typedef _const_fwd_iterator const_iterator;         
00804         typedef _const_rev_iterator const_reverse_iterator; 
00805 
00806 
00808 
00809 
00810         UTFString() {
00811             _init();
00812         }
00814         UTFString( const UTFString& copy ) {
00815             _init();
00816             mData = copy.mData;
00817         }
00819         UTFString( size_type length, const code_point& ch ) {
00820             _init();
00821             assign( length, ch );
00822         }
00824         UTFString( const code_point* str ) {
00825             _init();
00826             assign( str );
00827         }
00829         UTFString( const code_point* str, size_type length ) {
00830             _init();
00831             assign( str, length );
00832         }
00834         UTFString( const UTFString& str, size_type index, size_type length ) {
00835             _init();
00836             assign( str, index, length );
00837         }
00838 #if OGRE_IS_NATIVE_WCHAR_T
00839 
00840         UTFString( const wchar_t* w_str ) {
00841             _init();
00842             assign( w_str );
00843         }
00845         UTFString( const wchar_t* w_str, size_type length ) {
00846             _init();
00847             assign( w_str, length );
00848         }
00849 #endif
00850 
00851         UTFString( const std::wstring& wstr ) {
00852             _init();
00853             assign( wstr );
00854         }
00856         UTFString( const char* c_str ) {
00857             _init();
00858             assign( c_str );
00859         }
00861         UTFString( const char* c_str, size_type length ) {
00862             _init();
00863             assign( c_str, length );
00864         }
00866         UTFString( const std::string& str ) {
00867             _init();
00868             assign( str );
00869         }
00871         ~UTFString() {
00872             _cleanBuffer();
00873         }
00875 
00877 
00879 
00880 
00881         size_type size() const {
00882             return mData.size();
00883         }
00885         size_type length() const {
00886             return size();
00887         }
00889 
00890         size_type length_Characters() const {
00891             const_iterator i = begin(), ie = end();
00892             size_type c = 0;
00893             while ( i != ie ) {
00894                 i.moveNext();
00895                 ++c;
00896             }
00897             return c;
00898         }
00900         size_type max_size() const {
00901             return mData.max_size();
00902         }
00904         void reserve( size_type size ) {
00905             mData.reserve( size );
00906         }
00908         void resize( size_type num, const code_point& val = 0 ) {
00909             mData.resize( num, val );
00910         }
00912         void swap( UTFString& from ) {
00913             mData.swap( from.mData );
00914         }
00916         bool empty() const {
00917             return mData.empty();
00918         }
00920         const code_point* c_str() const {
00921             return mData.c_str();
00922         }
00924         const code_point* data() const {
00925             return c_str();
00926         }
00928         size_type capacity() const {
00929             return mData.capacity();
00930         }
00932         void clear() {
00933             mData.clear();
00934         }
00936 
00937         UTFString substr( size_type index, size_type num = npos ) const {
00938             // this could avoid the extra copy if we used a private specialty constructor
00939             dstring data = mData.substr( index, num );
00940             UTFString tmp;
00941             tmp.mData.swap( data );
00942             return tmp;
00943         }
00945         void push_back( unicode_char val ) {
00946             code_point cp[2];
00947             size_t c = _utf32_to_utf16( val, cp );
00948             if ( c > 0 ) push_back( cp[0] );
00949             if ( c > 1 ) push_back( cp[1] );
00950         }
00951 #if OGRE_IS_NATIVE_WCHAR_T
00952 
00953         void push_back( wchar_t val ) {
00954             // we do this because the Unicode method still preserves UTF-16 code points
00955             mData.push_back( static_cast<unicode_char>( val ) );
00956         }
00957 #endif
00958 
00959 
00961         void push_back( code_point val ) {
00962             mData.push_back( val );
00963         }
00965 
00966         void push_back( char val ) {
00967             mData.push_back( static_cast<code_point>( val ) );
00968         }
00970         bool inString( unicode_char ch ) const {
00971             const_iterator i, ie = end();
00972             for ( i = begin(); i != ie; i.moveNext() ) {
00973                 if ( i.getCharacter() == ch )
00974                     return true;
00975             }
00976             return false;
00977         }
00979 
00981 
00983 
00984 
00985         const std::string& asUTF8() const {
00986             _load_buffer_UTF8();
00987             return *m_buffer.mStrBuffer;
00988         }
00990         const char* asUTF8_c_str() const {
00991             _load_buffer_UTF8();
00992             return m_buffer.mStrBuffer->c_str();
00993         }
00995         const utf32string& asUTF32() const {
00996             _load_buffer_UTF32();
00997             return *m_buffer.mUTF32StrBuffer;
00998         }
01000         const unicode_char* asUTF32_c_str() const {
01001             _load_buffer_UTF32();
01002             return m_buffer.mUTF32StrBuffer->c_str();
01003         }
01005         const std::wstring& asWStr() const {
01006             _load_buffer_WStr();
01007             return *m_buffer.mWStrBuffer;
01008         }
01010         const wchar_t* asWStr_c_str() const {
01011             _load_buffer_WStr();
01012             return m_buffer.mWStrBuffer->c_str();
01013         }
01015 
01017 
01019 
01020 
01021         code_point& at( size_type loc ) {
01022             return mData.at( loc );
01023         }
01025         const code_point& at( size_type loc ) const {
01026             return mData.at( loc );
01027         }
01029 
01033         unicode_char getChar( size_type loc ) const {
01034             const code_point* ptr = c_str();
01035             unicode_char uc;
01036             size_t l = _utf16_char_length( ptr[loc] );
01037             code_point cp[2] = { /* blame the code beautifier */
01038                                    0, 0
01039                                };
01040             cp[0] = ptr[loc];
01041 
01042             if ( l == 2 && ( loc + 1 ) < mData.length() ) {
01043                 cp[1] = ptr[loc+1];
01044             }
01045             _utf16_to_utf32( cp, uc );
01046             return uc;
01047         }
01049 
01057         int setChar( size_type loc, unicode_char ch ) {
01058             code_point cp[2] = { /* blame the code beautifier */
01059                                    0, 0
01060                                };
01061             size_t l = _utf32_to_utf16( ch, cp );
01062             unicode_char existingChar = getChar( loc );
01063             size_t existingSize = _utf16_char_length( existingChar );
01064             size_t newSize = _utf16_char_length( ch );
01065 
01066             if ( newSize > existingSize ) {
01067                 at( loc ) = cp[0];
01068                 insert( loc + 1, 1, cp[1] );
01069                 return 1;
01070             }
01071             if ( newSize < existingSize ) {
01072                 erase( loc, 1 );
01073                 at( loc ) = cp[0];
01074                 return -1;
01075             }
01076 
01077             // newSize == existingSize
01078             at( loc ) = cp[0];
01079             if ( l == 2 ) at( loc + 1 ) = cp[1];
01080             return 0;
01081         }
01083 
01085 
01087 
01088 
01089         iterator begin() {
01090             iterator i;
01091             i.mIter = mData.begin();
01092             i.mString = this;
01093             return i;
01094         }
01096         const_iterator begin() const {
01097             const_iterator i;
01098             i.mIter = const_cast<UTFString*>( this )->mData.begin();
01099             i.mString = const_cast<UTFString*>( this );
01100             return i;
01101         }
01103         iterator end() {
01104             iterator i;
01105             i.mIter = mData.end();
01106             i.mString = this;
01107             return i;
01108         }
01110         const_iterator end() const {
01111             const_iterator i;
01112             i.mIter = const_cast<UTFString*>( this )->mData.end();
01113             i.mString = const_cast<UTFString*>( this );
01114             return i;
01115         }
01117         reverse_iterator rbegin() {
01118             reverse_iterator i;
01119             i.mIter = mData.end();
01120             i.mString = this;
01121             return i;
01122         }
01124         const_reverse_iterator rbegin() const {
01125             const_reverse_iterator i;
01126             i.mIter = const_cast<UTFString*>( this )->mData.end();
01127             i.mString = const_cast<UTFString*>( this );
01128             return i;
01129         }
01131         reverse_iterator rend() {
01132             reverse_iterator i;
01133             i.mIter = mData.begin();
01134             i.mString = this;
01135             return i;
01136         }
01138         const_reverse_iterator rend() const {
01139             const_reverse_iterator i;
01140             i.mIter = const_cast<UTFString*>( this )->mData.begin();
01141             i.mString = const_cast<UTFString*>( this );
01142             return i;
01143         }
01145 
01147 
01149 
01150 
01151         UTFString& assign( iterator start, iterator end ) {
01152             mData.assign( start.mIter, end.mIter );
01153             return *this;
01154         }
01156         UTFString& assign( const UTFString& str ) {
01157             mData.assign( str.mData );
01158             return *this;
01159         }
01161         UTFString& assign( const code_point* str ) {
01162             mData.assign( str );
01163             return *this;
01164         }
01166         UTFString& assign( const code_point* str, size_type num ) {
01167             mData.assign( str, num );
01168             return *this;
01169         }
01171         UTFString& assign( const UTFString& str, size_type index, size_type len ) {
01172             mData.assign( str.mData, index, len );
01173             return *this;
01174         }
01176         UTFString& assign( size_type num, const code_point& ch ) {
01177             mData.assign( num, ch );
01178             return *this;
01179         }
01181         UTFString& assign( const std::wstring& wstr ) {
01182             mData.clear();
01183             mData.reserve( wstr.length() ); // best guess bulk allocate
01184 #ifdef WCHAR_UTF16 // if we're already working in UTF-16, this is easy
01185             code_point tmp;
01186             std::wstring::const_iterator i, ie = wstr.end();
01187             for ( i = wstr.begin(); i != ie; i++ ) {
01188                 tmp = static_cast<code_point>( *i );
01189                 mData.push_back( tmp );
01190             }
01191 #else // otherwise we do it the safe way (which is still 100% safe to pass UTF-16 through, just slower)
01192             code_point cp[3] = {0, 0, 0};
01193             unicode_char tmp;
01194             std::wstring::const_iterator i, ie = wstr.end();
01195             for ( i = wstr.begin(); i != ie; i++ ) {
01196                 tmp = static_cast<unicode_char>( *i );
01197                 size_t l = _utf32_to_utf16( tmp, cp );
01198                 if ( l > 0 ) mData.push_back( cp[0] );
01199                 if ( l > 1 ) mData.push_back( cp[1] );
01200             }
01201 #endif
01202             return *this;
01203         }
01204 #if OGRE_IS_NATIVE_WCHAR_T
01205 
01206         UTFString& assign( const wchar_t* w_str ) {
01207             std::wstring tmp;
01208             tmp.assign( w_str );
01209             return assign( tmp );
01210         }
01212         UTFString& assign( const wchar_t* w_str, size_type num ) {
01213             std::wstring tmp;
01214             tmp.assign( w_str, num );
01215             return assign( tmp );
01216         }
01217 #endif
01218 
01219         UTFString& assign( const std::string& str ) {
01220             size_type len = _verifyUTF8( str );
01221             clear(); // empty our contents, if there are any
01222             reserve( len ); // best guess bulk capacity growth
01223 
01224             // This is a 3 step process, converting each byte in the UTF-8 stream to UTF-32,
01225             // then converting it to UTF-16, then finally appending the data buffer
01226 
01227             unicode_char uc;          // temporary Unicode character buffer
01228             unsigned char utf8buf[7]; // temporary UTF-8 buffer
01229             utf8buf[6] = 0;
01230             size_t utf8len;           // UTF-8 length
01231             code_point utf16buff[3];  // temporary UTF-16 buffer
01232             utf16buff[2] = 0;
01233             size_t utf16len;          // UTF-16 length
01234 
01235             std::string::const_iterator i, ie = str.end();
01236             for ( i = str.begin(); i != ie; i++ ) {
01237                 utf8len = _utf8_char_length( static_cast<unsigned char>( *i ) ); // estimate bytes to load
01238                 for ( size_t j = 0; j < utf8len; j++ ) { // load the needed UTF-8 bytes
01239                     utf8buf[j] = ( static_cast<unsigned char>( *( i + j ) ) ); // we don't increment 'i' here just in case the estimate is wrong (shouldn't happen, but we're being careful)
01240                 }
01241                 utf8buf[utf8len] = 0; // nul terminate so we throw an exception before running off the end of the buffer
01242                 utf8len = _utf8_to_utf32( utf8buf, uc ); // do the UTF-8 -> UTF-32 conversion
01243                 i += utf8len - 1; // we subtract 1 for the increment of the 'for' loop
01244 
01245                 utf16len = _utf32_to_utf16( uc, utf16buff ); // UTF-32 -> UTF-16 conversion
01246                 append( utf16buff, utf16len ); // append the characters to the string
01247             }
01248             return *this;
01249         }
01251         UTFString& assign( const char* c_str ) {
01252             std::string tmp( c_str );
01253             return assign( tmp );
01254         }
01256         UTFString& assign( const char* c_str, size_type num ) {
01257             std::string tmp;
01258             tmp.assign( c_str, num );
01259             return assign( tmp );
01260         }
01262 
01264 
01266 
01267 
01268         UTFString& append( const UTFString& str ) {
01269             mData.append( str.mData );
01270             return *this;
01271         }
01273         UTFString& append( const code_point* str ) {
01274             mData.append( str );
01275             return *this;
01276         }
01278         UTFString& append( const UTFString& str, size_type index, size_type len ) {
01279             mData.append( str.mData, index, len );
01280             return *this;
01281         }
01283         UTFString& append( const code_point* str, size_type num ) {
01284             mData.append( str, num );
01285             return *this;
01286         }
01288         UTFString& append( size_type num, code_point ch ) {
01289             mData.append( num, ch );
01290             return *this;
01291         }
01293         UTFString& append( iterator start, iterator end ) {
01294             mData.append( start.mIter, end.mIter );
01295             return *this;
01296         }
01297 #if OGRE_IS_NATIVE_WCHAR_T
01298 
01299         UTFString& append( const wchar_t* w_str, size_type num ) {
01300             std::wstring tmp( w_str, num );
01301             return append( tmp );
01302         }
01304         UTFString& append( size_type num, wchar_t ch ) {
01305             return append( num, static_cast<unicode_char>( ch ) );
01306         }
01307 #endif
01308 
01309         UTFString& append( const char* c_str, size_type num ) {
01310             UTFString tmp( c_str, num );
01311             append( tmp );
01312             return *this;
01313         }
01315         UTFString& append( size_type num, char ch ) {
01316             append( num, static_cast<code_point>( ch ) );
01317             return *this;
01318         }
01320         UTFString& append( size_type num, unicode_char ch ) {
01321             code_point cp[2] = {0, 0};
01322             if ( _utf32_to_utf16( ch, cp ) == 2 ) {
01323                 for ( size_type i = 0; i < num; i++ ) {
01324                     append( 1, cp[0] );
01325                     append( 1, cp[1] );
01326                 }
01327             } else {
01328                 for ( size_type i = 0; i < num; i++ ) {
01329                     append( 1, cp[0] );
01330                 }
01331             }
01332             return *this;
01333         }
01335 
01337 
01339 
01340 
01341         iterator insert( iterator i, const code_point& ch ) {
01342             iterator ret;
01343             ret.mIter = mData.insert( i.mIter, ch );
01344             ret.mString = this;
01345             return ret;
01346         }
01348         UTFString& insert( size_type index, const UTFString& str ) {
01349             mData.insert( index, str.mData );
01350             return *this;
01351         }
01353         UTFString& insert( size_type index, const code_point* str ) {
01354             mData.insert( index, str );
01355             return *this;
01356         }
01358         UTFString& insert( size_type index1, const UTFString& str, size_type index2, size_type num ) {
01359             mData.insert( index1, str.mData, index2, num );
01360             return *this;
01361         }
01363         void insert( iterator i, iterator start, iterator end ) {
01364             mData.insert( i.mIter, start.mIter, end.mIter );
01365         }
01367         UTFString& insert( size_type index, const code_point* str, size_type num ) {
01368             mData.insert( index, str, num );
01369             return *this;
01370         }
01371 #if OGRE_IS_NATIVE_WCHAR_T
01372 
01373         UTFString& insert( size_type index, const wchar_t* w_str, size_type num ) {
01374             UTFString tmp( w_str, num );
01375             insert( index, tmp );
01376             return *this;
01377         }
01378 #endif
01379 
01380         UTFString& insert( size_type index, const char* c_str, size_type num ) {
01381             UTFString tmp( c_str, num );
01382             insert( index, tmp );
01383             return *this;
01384         }
01386         UTFString& insert( size_type index, size_type num, code_point ch ) {
01387             mData.insert( index, num, ch );
01388             return *this;
01389         }
01390 #if OGRE_IS_NATIVE_WCHAR_T
01391 
01392         UTFString& insert( size_type index, size_type num, wchar_t ch ) {
01393             insert( index, num, static_cast<unicode_char>( ch ) );
01394             return *this;
01395         }
01396 #endif
01397 
01398         UTFString& insert( size_type index, size_type num, char ch ) {
01399             insert( index, num, static_cast<code_point>( ch ) );
01400             return *this;
01401         }
01403         UTFString& insert( size_type index, size_type num, unicode_char ch ) {
01404             code_point cp[3] = {0, 0, 0};
01405             size_t l = _utf32_to_utf16( ch, cp );
01406             if ( l == 1 ) {
01407                 return insert( index, num, cp[0] );
01408             }
01409             for ( size_type c = 0; c < num; c++ ) {
01410                 // insert in reverse order to preserve ordering after insert
01411                 insert( index, 1, cp[1] );
01412                 insert( index, 1, cp[0] );
01413             }
01414             return *this;
01415         }
01417         void insert( iterator i, size_type num, const code_point& ch ) {
01418             mData.insert( i.mIter, num, ch );
01419         }
01420 #if OGRE_IS_NATIVE_WCHAR_T
01421 
01422         void insert( iterator i, size_type num, const wchar_t& ch ) {
01423             insert( i, num, static_cast<unicode_char>( ch ) );
01424         }
01425 #endif
01426 
01427         void insert( iterator i, size_type num, const char& ch ) {
01428             insert( i, num, static_cast<code_point>( ch ) );
01429         }
01431         void insert( iterator i, size_type num, const unicode_char& ch ) {
01432             code_point cp[3] = {0, 0, 0};
01433             size_t l = _utf32_to_utf16( ch, cp );
01434             if ( l == 1 ) {
01435                 insert( i, num, cp[0] );
01436             } else {
01437                 for ( size_type c = 0; c < num; c++ ) {
01438                     // insert in reverse order to preserve ordering after insert
01439                     insert( i, 1, cp[1] );
01440                     insert( i, 1, cp[0] );
01441                 }
01442             }
01443         }
01445 
01447 
01449 
01450 
01451         iterator erase( iterator loc ) {
01452             iterator ret;
01453             ret.mIter = mData.erase( loc.mIter );
01454             ret.mString = this;
01455             return ret;
01456         }
01458         iterator erase( iterator start, iterator end ) {
01459             iterator ret;
01460             ret.mIter = mData.erase( start.mIter, end.mIter );
01461             ret.mString = this;
01462             return ret;
01463         }
01465         UTFString& erase( size_type index = 0, size_type num = npos ) {
01466             if ( num == npos )
01467                 mData.erase( index );
01468             else
01469                 mData.erase( index, num );
01470             return *this;
01471         }
01473 
01475 
01477 
01478 
01479         UTFString& replace( size_type index1, size_type num1, const UTFString& str ) {
01480             mData.replace( index1, num1, str.mData, 0, npos );
01481             return *this;
01482         }
01484         UTFString& replace( size_type index1, size_type num1, const UTFString& str, size_type num2 ) {
01485             mData.replace( index1, num1, str.mData, 0, num2 );
01486             return *this;
01487         }
01489         UTFString& replace( size_type index1, size_type num1, const UTFString& str, size_type index2, size_type num2 ) {
01490             mData.replace( index1, num1, str.mData, index2, num2 );
01491             return *this;
01492         }
01494         UTFString& replace( iterator start, iterator end, const UTFString& str, size_type num = npos ) {
01495             _const_fwd_iterator st(start); //Work around for gcc, allow it to find correct overload
01496 
01497             size_type index1 = begin() - st;
01498             size_type num1 = end - st;
01499             return replace( index1, num1, str, 0, num );
01500         }
01502         UTFString& replace( size_type index, size_type num1, size_type num2, code_point ch ) {
01503             mData.replace( index, num1, num2, ch );
01504             return *this;
01505         }
01507         UTFString& replace( iterator start, iterator end, size_type num, code_point ch ) {
01508             _const_fwd_iterator st(start); //Work around for gcc, allow it to find correct overload
01509 
01510             size_type index1 = begin() - st;
01511             size_type num1 = end - st;
01512             return replace( index1, num1, num, ch );
01513         }
01515 
01517 
01519 
01520 
01521         int compare( const UTFString& str ) const {
01522             return mData.compare( str.mData );
01523         }
01525         int compare( const code_point* str ) const {
01526             return mData.compare( str );
01527         }
01529         int compare( size_type index, size_type length, const UTFString& str ) const {
01530             return mData.compare( index, length, str.mData );
01531         }
01533         int compare( size_type index, size_type length, const UTFString& str, size_type index2, size_type length2 ) const {
01534             return mData.compare( index, length, str.mData, index2, length2 );
01535         }
01537         int compare( size_type index, size_type length, const code_point* str, size_type length2 ) const {
01538             return mData.compare( index, length, str, length2 );
01539         }
01540 #if OGRE_IS_NATIVE_WCHAR_T
01541 
01542         int compare( size_type index, size_type length, const wchar_t* w_str, size_type length2 ) const {
01543             UTFString tmp( w_str, length2 );
01544             return compare( index, length, tmp );
01545         }
01546 #endif
01547 
01548         int compare( size_type index, size_type length, const char* c_str, size_type length2 ) const {
01549             UTFString tmp( c_str, length2 );
01550             return compare( index, length, tmp );
01551         }
01553 
01555 
01557 
01558 
01559 
01560         size_type find( const UTFString& str, size_type index = 0 ) const {
01561             return mData.find( str.c_str(), index );
01562         }
01564 
01565         size_type find( const code_point* cp_str, size_type index, size_type length ) const {
01566             UTFString tmp( cp_str );
01567             return mData.find( tmp.c_str(), index, length );
01568         }
01570 
01571         size_type find( const char* c_str, size_type index, size_type length ) const {
01572             UTFString tmp( c_str );
01573             return mData.find( tmp.c_str(), index, length );
01574         }
01575 #if OGRE_IS_NATIVE_WCHAR_T
01576 
01577 
01578         size_type find( const wchar_t* w_str, size_type index, size_type length ) const {
01579             UTFString tmp( w_str );
01580             return mData.find( tmp.c_str(), index, length );
01581         }
01582 #endif
01583 
01584 
01585         size_type find( char ch, size_type index = 0 ) const {
01586             return find( static_cast<code_point>( ch ), index );
01587         }
01589 
01590         size_type find( code_point ch, size_type index = 0 ) const {
01591             return mData.find( ch, index );
01592         }
01593 #if OGRE_IS_NATIVE_WCHAR_T
01594 
01595 
01596         size_type find( wchar_t ch, size_type index = 0 ) const {
01597             return find( static_cast<unicode_char>( ch ), index );
01598         }
01599 #endif
01600 
01601 
01602         size_type find( unicode_char ch, size_type index = 0 ) const {
01603             code_point cp[3] = {0, 0, 0};
01604             size_t l = _utf32_to_utf16( ch, cp );
01605             return find( UTFString( cp, l ), index );
01606         }
01607 
01609         size_type rfind( const UTFString& str, size_type index = 0 ) const {
01610             return mData.rfind( str.c_str(), index );
01611         }
01613         size_type rfind( const code_point* cp_str, size_type index, size_type num ) const {
01614             UTFString tmp( cp_str );
01615             return mData.rfind( tmp.c_str(), index, num );
01616         }
01618         size_type rfind( const char* c_str, size_type index, size_type num ) const {
01619             UTFString tmp( c_str );
01620             return mData.rfind( tmp.c_str(), index, num );
01621         }
01622 #if OGRE_IS_NATIVE_WCHAR_T
01623 
01624         size_type rfind( const wchar_t* w_str, size_type index, size_type num ) const {
01625             UTFString tmp( w_str );
01626             return mData.rfind( tmp.c_str(), index, num );
01627         }
01628 #endif
01629 
01630         size_type rfind( char ch, size_type index = 0 ) const {
01631             return rfind( static_cast<code_point>( ch ), index );
01632         }
01634         size_type rfind( code_point ch, size_type index ) const {
01635             return mData.rfind( ch, index );
01636         }
01637 #if OGRE_IS_NATIVE_WCHAR_T
01638 
01639         size_type rfind( wchar_t ch, size_type index = 0 ) const {
01640             return rfind( static_cast<unicode_char>( ch ), index );
01641         }
01642 #endif
01643 
01644         size_type rfind( unicode_char ch, size_type index = 0 ) const {
01645             code_point cp[3] = {0, 0, 0};
01646             size_t l = _utf32_to_utf16( ch, cp );
01647             return rfind( UTFString( cp, l ), index );
01648         }
01650 
01652 
01654 
01655 
01656         size_type find_first_of( const UTFString &str, size_type index = 0, size_type num = npos ) const {
01657             size_type i = 0;
01658             const size_type len = length();
01659             while ( i < num && ( index + i ) < len ) {
01660                 unicode_char ch = getChar( index + i );
01661                 if ( str.inString( ch ) )
01662                     return index + i;
01663                 i += _utf16_char_length( ch ); // increment by the Unicode character length
01664             }
01665             return npos;
01666         }
01668         size_type find_first_of( code_point ch, size_type index = 0 ) const {
01669             UTFString tmp;
01670             tmp.assign( 1, ch );
01671             return find_first_of( tmp, index );
01672         }
01674         size_type find_first_of( char ch, size_type index = 0 ) const {
01675             return find_first_of( static_cast<code_point>( ch ), index );
01676         }
01677 #if OGRE_IS_NATIVE_WCHAR_T
01678 
01679         size_type find_first_of( wchar_t ch, size_type index = 0 ) const {
01680             return find_first_of( static_cast<unicode_char>( ch ), index );
01681         }
01682 #endif
01683 
01684         size_type find_first_of( unicode_char ch, size_type index = 0 ) const {
01685             code_point cp[3] = {0, 0, 0};
01686             size_t l = _utf32_to_utf16( ch, cp );
01687             return find_first_of( UTFString( cp, l ), index );
01688         }
01689 
01691         size_type find_first_not_of( const UTFString& str, size_type index = 0, size_type num = npos ) const {
01692             size_type i = 0;
01693             const size_type len = length();
01694             while ( i < num && ( index + i ) < len ) {
01695                 unicode_char ch = getChar( index + i );
01696                 if ( !str.inString( ch ) )
01697                     return index + i;
01698                 i += _utf16_char_length( ch ); // increment by the Unicode character length
01699             }
01700             return npos;
01701         }
01703         size_type find_first_not_of( code_point ch, size_type index = 0 ) const {
01704             UTFString tmp;
01705             tmp.assign( 1, ch );
01706             return find_first_not_of( tmp, index );
01707         }
01709         size_type find_first_not_of( char ch, size_type index = 0 ) const {
01710             return find_first_not_of( static_cast<code_point>( ch ), index );
01711         }
01712 #if OGRE_IS_NATIVE_WCHAR_T
01713 
01714         size_type find_first_not_of( wchar_t ch, size_type index = 0 ) const {
01715             return find_first_not_of( static_cast<unicode_char>( ch ), index );
01716         }
01717 #endif
01718 
01719         size_type find_first_not_of( unicode_char ch, size_type index = 0 ) const {
01720             code_point cp[3] = {0, 0, 0};
01721             size_t l = _utf32_to_utf16( ch, cp );
01722             return find_first_not_of( UTFString( cp, l ), index );
01723         }
01724 
01726         size_type find_last_of( const UTFString& str, size_type index = npos, size_type num = npos ) const {
01727             size_type i = 0;
01728             const size_type len = length();
01729             if ( index > len ) index = len - 1;
01730 
01731             while ( i < num && ( index - i ) != npos ) {
01732                 size_type j = index - i;
01733                 // careful to step full Unicode characters
01734                 if ( j != 0 && _utf16_surrogate_follow( at( j ) ) && _utf16_surrogate_lead( at( j - 1 ) ) ) {
01735                     j = index - ++i;
01736                 }
01737                 // and back to the usual dull test
01738                 unicode_char ch = getChar( j );
01739                 if ( str.inString( ch ) )
01740                     return j;
01741                 i++;
01742             }
01743             return npos;
01744         }
01746         size_type find_last_of( code_point ch, size_type index = npos ) const {
01747             UTFString tmp;
01748             tmp.assign( 1, ch );
01749             return find_last_of( tmp, index );
01750         }
01752         size_type find_last_of( char ch, size_type index = npos ) const {
01753             return find_last_of( static_cast<code_point>( ch ), index );
01754         }
01755 #if OGRE_IS_NATIVE_WCHAR_T
01756 
01757         size_type find_last_of( wchar_t ch, size_type index = npos ) const {
01758             return find_last_of( static_cast<unicode_char>( ch ), index );
01759         }
01760 #endif
01761 
01762         size_type find_last_of( unicode_char ch, size_type index = npos ) const {
01763             code_point cp[3] = {0, 0, 0};
01764             size_t l = _utf32_to_utf16( ch, cp );
01765             return find_last_of( UTFString( cp, l ), index );
01766         }
01767 
01769         size_type find_last_not_of( const UTFString& str, size_type index = npos, size_type num = npos ) const {
01770             size_type i = 0;
01771             const size_type len = length();
01772             if ( index > len ) index = len - 1;
01773 
01774             while ( i < num && ( index - i ) != npos ) {
01775                 size_type j = index - i;
01776                 // careful to step full Unicode characters
01777                 if ( j != 0 && _utf16_surrogate_follow( at( j ) ) && _utf16_surrogate_lead( at( j - 1 ) ) ) {
01778                     j = index - ++i;
01779                 }
01780                 // and back to the usual dull test
01781                 unicode_char ch = getChar( j );
01782                 if ( !str.inString( ch ) )
01783                     return j;
01784                 i++;
01785             }
01786             return npos;
01787         }
01789         size_type find_last_not_of( code_point ch, size_type index = npos ) const {
01790             UTFString tmp;
01791             tmp.assign( 1, ch );
01792             return find_last_not_of( tmp, index );
01793         }
01795         size_type find_last_not_of( char ch, size_type index = npos ) const {
01796             return find_last_not_of( static_cast<code_point>( ch ), index );
01797         }
01798 #if OGRE_IS_NATIVE_WCHAR_T
01799 
01800         size_type find_last_not_of( wchar_t ch, size_type index = npos ) const {
01801             return find_last_not_of( static_cast<unicode_char>( ch ), index );
01802         }
01803 #endif
01804 
01805         size_type find_last_not_of( unicode_char ch, size_type index = npos ) const {
01806             code_point cp[3] = {0, 0, 0};
01807             size_t l = _utf32_to_utf16( ch, cp );
01808             return find_last_not_of( UTFString( cp, l ), index );
01809         }
01811 
01813 
01815 
01816 
01817         bool operator<( const UTFString& right ) const {
01818             return compare( right ) < 0;
01819         }
01821         bool operator<=( const UTFString& right ) const {
01822             return compare( right ) <= 0;
01823         }
01825         bool operator>( const UTFString& right ) const {
01826             return compare( right ) > 0;
01827         }
01829         bool operator>=( const UTFString& right ) const {
01830             return compare( right ) >= 0;
01831         }
01833         bool operator==( const UTFString& right ) const {
01834             return compare( right ) == 0;
01835         }
01837         bool operator!=( const UTFString& right ) const {
01838             return !operator==( right );
01839         }
01841         UTFString& operator=( const UTFString& s ) {
01842             return assign( s );
01843         }
01845         UTFString& operator=( code_point ch ) {
01846             clear();
01847             return append( 1, ch );
01848         }
01850         UTFString& operator=( char ch ) {
01851             clear();
01852             return append( 1, ch );
01853         }
01854 #if OGRE_IS_NATIVE_WCHAR_T
01855 
01856         UTFString& operator=( wchar_t ch ) {
01857             clear();
01858             return append( 1, ch );
01859         }
01860 #endif
01861 
01862         UTFString& operator=( unicode_char ch ) {
01863             clear();
01864             return append( 1, ch );
01865         }
01867         code_point& operator[]( size_type index ) {
01868             return at( index );
01869         }
01871         const code_point& operator[]( size_type index ) const {
01872             return at( index );
01873         }
01875 
01877 
01879 
01880 
01881         operator std::string() const {
01882             return std::string( asUTF8() );
01883         }
01885         operator std::wstring() const {
01886             return std::wstring( asWStr() );
01887         }
01889 
01891 
01893 
01894 
01895         static bool _utf16_independent_char( code_point cp ) {
01896             if ( 0xD800 <= cp && cp <= 0xDFFF ) // tests if the cp is within the surrogate pair range
01897                 return false; // it matches a surrogate pair signature
01898             return true; // everything else is a standalone code point
01899         }
01901         static bool _utf16_surrogate_lead( code_point cp ) {
01902             if ( 0xD800 <= cp && cp <= 0xDBFF ) // tests if the cp is within the 2nd word of a surrogate pair
01903                 return true; // it is a 1st word
01904             return false; // it isn't
01905         }
01907         static bool _utf16_surrogate_follow( code_point cp ) {
01908             if ( 0xDC00 <= cp && cp <= 0xDFFF ) // tests if the cp is within the 2nd word of a surrogate pair
01909                 return true; // it is a 2nd word
01910             return false; // everything else isn't
01911         }
01913         static size_t _utf16_char_length( code_point cp ) {
01914             if ( 0xD800 <= cp && cp <= 0xDBFF ) // test if cp is the beginning of a surrogate pair
01915                 return 2; // if it is, then we are 2 words long
01916             return 1; // otherwise we are only 1 word long
01917         }
01919         static size_t _utf16_char_length( unicode_char uc ) {
01920             if ( uc > 0xFFFF ) // test if uc is greater than the single word maximum
01921                 return 2; // if so, we need a surrogate pair
01922             return 1; // otherwise we can stuff it into a single word
01923         }
01925 
01929         static size_t _utf16_to_utf32( const code_point in_cp[2], unicode_char& out_uc ) {
01930             const code_point& cp1 = in_cp[0];
01931             const code_point& cp2 = in_cp[1];
01932             bool wordPair = false;
01933 
01934             // does it look like a surrogate pair?
01935             if ( 0xD800 <= cp1 && cp1 <= 0xDBFF ) {
01936                 // looks like one, but does the other half match the algorithm as well?
01937                 if ( 0xDC00 <= cp2 && cp2 <= 0xDFFF )
01938                     wordPair = true; // yep!
01939             }
01940 
01941             if ( !wordPair ) { // if we aren't a 100% authentic surrogate pair, then just copy the value
01942                 out_uc = cp1;
01943                 return 1;
01944             }
01945 
01946             unsigned short cU = cp1, cL = cp2; // copy upper and lower words of surrogate pair to writable buffers
01947             cU -= 0xD800; // remove the encoding markers
01948             cL -= 0xDC00;
01949 
01950             out_uc = ( cU & 0x03FF ) << 10; // grab the 10 upper bits and set them in their proper location
01951             out_uc |= ( cL & 0x03FF ); // combine in the lower 10 bits
01952             out_uc += 0x10000; // add back in the value offset
01953 
01954             return 2; // this whole operation takes to words, so that's what we'll return
01955         }
01957 
01962         static size_t _utf32_to_utf16( const unicode_char& in_uc, code_point out_cp[2] ) {
01963             if ( in_uc <= 0xFFFF ) { // we blindly preserve sentinel values because our decoder understands them
01964                 out_cp[0] = in_uc;
01965                 return 1;
01966             }
01967             unicode_char uc = in_uc; // copy to writable buffer
01968             unsigned short tmp; // single code point buffer
01969             uc -= 0x10000; // subtract value offset
01970 
01971             //process upper word
01972             tmp = ( uc >> 10 ) & 0x03FF; // grab the upper 10 bits
01973             tmp += 0xD800; // add encoding offset
01974             out_cp[0] = tmp; // write
01975 
01976             // process lower word
01977             tmp = uc & 0x03FF; // grab the lower 10 bits
01978             tmp += 0xDC00; // add encoding offset
01979             out_cp[1] = tmp; // write
01980 
01981             return 2; // return used word count (2 for surrogate pairs)
01982         }
01984 
01986 
01988 
01989 
01990         static bool _utf8_start_char( unsigned char cp ) {
01991             return ( cp & ~_cont_mask ) != _cont;
01992         }
01994         static size_t _utf8_char_length( unsigned char cp ) {
01995             if ( !( cp & 0x80 ) ) return 1;
01996             if (( cp & ~_lead1_mask ) == _lead1 ) return 2;
01997             if (( cp & ~_lead2_mask ) == _lead2 ) return 3;
01998             if (( cp & ~_lead3_mask ) == _lead3 ) return 4;
01999             if (( cp & ~_lead4_mask ) == _lead4 ) return 5;
02000             if (( cp & ~_lead5_mask ) == _lead5 ) return 6;
02001             throw invalid_data( "invalid UTF-8 sequence header value" );
02002         }
02004         static size_t _utf8_char_length( unicode_char uc ) {
02005             /*
02006             7 bit:  U-00000000 - U-0000007F: 0xxxxxxx
02007             11 bit: U-00000080 - U-000007FF: 110xxxxx 10xxxxxx
02008             16 bit: U-00000800 - U-0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx
02009             21 bit: U-00010000 - U-001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
02010             26 bit: U-00200000 - U-03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
02011             31 bit: U-04000000 - U-7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
02012             */
02013             if ( !( uc & ~0x0000007F ) ) return 1;
02014             if ( !( uc & ~0x000007FF ) ) return 2;
02015             if ( !( uc & ~0x0000FFFF ) ) return 3;
02016             if ( !( uc & ~0x001FFFFF ) ) return 4;
02017             if ( !( uc & ~0x03FFFFFF ) ) return 5;
02018             if ( !( uc & ~0x7FFFFFFF ) ) return 6;
02019             throw invalid_data( "invalid UTF-32 value" );
02020         }
02021 
02023         static size_t _utf8_to_utf32( const unsigned char in_cp[6], unicode_char& out_uc ) {
02024             size_t len = _utf8_char_length( in_cp[0] );
02025             if ( len == 1 ) { // if we are only 1 byte long, then just grab it and exit
02026                 out_uc = in_cp[0];
02027                 return 1;
02028             }
02029 
02030             unicode_char c = 0; // temporary buffer
02031             size_t i = 0;
02032             switch ( len ) { // load header byte
02033             case 6:
02034                 c = in_cp[i] & _lead5_mask;
02035                 break;
02036             case 5:
02037                 c = in_cp[i] & _lead4_mask;
02038                 break;
02039             case 4:
02040                 c = in_cp[i] & _lead3_mask;
02041                 break;
02042             case 3:
02043                 c = in_cp[i] & _lead2_mask;
02044                 break;
02045             case 2:
02046                 c = in_cp[i] & _lead1_mask;
02047                 break;
02048             }
02049 
02050             for ( ++i; i < len; i++ ) { // load each continuation byte
02051                 if (( in_cp[i] & ~_cont_mask ) != _cont )
02052                     throw invalid_data( "bad UTF-8 continuation byte" );
02053                 c <<= 6;
02054                 c |= ( in_cp[i] & _cont_mask );
02055             }
02056 
02057             out_uc = c; // write the final value and return the used byte length
02058             return len;
02059         }
02061         static size_t _utf32_to_utf8( const unicode_char& in_uc, unsigned char out_cp[6] ) {
02062             size_t len = _utf8_char_length( in_uc ); // predict byte length of sequence
02063             unicode_char c = in_uc; // copy to temp buffer
02064 
02065             //stuff all of the lower bits
02066             for ( size_t i = len - 1; i > 0; i-- ) {
02067                 out_cp[i] = (( c ) & _cont_mask ) | _cont;
02068                 c >>= 6;
02069             }
02070 
02071             //now write the header byte
02072             switch ( len ) {
02073             case 6:
02074                 out_cp[0] = (( c ) & _lead5_mask ) | _lead5;
02075                 break;
02076             case 5:
02077                 out_cp[0] = (( c ) & _lead4_mask ) | _lead4;
02078                 break;
02079             case 4:
02080                 out_cp[0] = (( c ) & _lead3_mask ) | _lead3;
02081                 break;
02082             case 3:
02083                 out_cp[0] = (( c ) & _lead2_mask ) | _lead2;
02084                 break;
02085             case 2:
02086                 out_cp[0] = (( c ) & _lead1_mask ) | _lead1;
02087                 break;
02088             case 1:
02089             default:
02090                 out_cp[0] = ( c ) & 0x7F;
02091                 break;
02092             }
02093 
02094             // return the byte length of the sequence
02095             return len;
02096         }
02097 
02099         static size_type _verifyUTF8( const unsigned char* c_str ) {
02100             std::string tmp( reinterpret_cast<const char*>( c_str ) );
02101             return _verifyUTF8( tmp );
02102         }
02104         static size_type _verifyUTF8( const std::string& str ) {
02105             std::string::const_iterator i, ie = str.end();
02106             i = str.begin();
02107             size_type length = 0;
02108 
02109             while ( i != ie ) {
02110                 // characters pass until we find an extended sequence
02111                 if (( *i ) & 0x80 ) {
02112                     unsigned char c = ( *i );
02113                     size_t contBytes = 0;
02114 
02115                     // get continuation byte count and test for overlong sequences
02116                     if (( c & ~_lead1_mask ) == _lead1 ) { // 1 additional byte
02117                         if ( c == _lead1 ) throw invalid_data( "overlong UTF-8 sequence" );
02118                         contBytes = 1;
02119 
02120                     } else if (( c & ~_lead2_mask ) == _lead2 ) { // 2 additional bytes
02121                         contBytes = 2;
02122                         if ( c == _lead2 ) { // possible overlong UTF-8 sequence
02123                             c = ( *( i + 1 ) ); // look ahead to next byte in sequence
02124                             if (( c & _lead2 ) == _cont ) throw invalid_data( "overlong UTF-8 sequence" );
02125                         }
02126 
02127                     } else if (( c & ~_lead3_mask ) == _lead3 ) { // 3 additional bytes
02128                         contBytes = 3;
02129                         if ( c == _lead3 ) { // possible overlong UTF-8 sequence
02130                             c = ( *( i + 1 ) ); // look ahead to next byte in sequence
02131                             if (( c & _lead3 ) == _cont ) throw invalid_data( "overlong UTF-8 sequence" );
02132                         }
02133 
02134                     } else if (( c & ~_lead4_mask ) == _lead4 ) { // 4 additional bytes
02135                         contBytes = 4;
02136                         if ( c == _lead4 ) { // possible overlong UTF-8 sequence
02137                             c = ( *( i + 1 ) ); // look ahead to next byte in sequence
02138                             if (( c & _lead4 ) == _cont ) throw invalid_data( "overlong UTF-8 sequence" );
02139                         }
02140 
02141                     } else if (( c & ~_lead5_mask ) == _lead5 ) { // 5 additional bytes
02142                         contBytes = 5;
02143                         if ( c == _lead5 ) { // possible overlong UTF-8 sequence
02144                             c = ( *( i + 1 ) ); // look ahead to next byte in sequence
02145                             if (( c & _lead5 ) == _cont ) throw invalid_data( "overlong UTF-8 sequence" );
02146                         }
02147                     }
02148 
02149                     // check remaining continuation bytes for
02150                     while ( contBytes-- ) {
02151                         c = ( *( ++i ) ); // get next byte in sequence
02152                         if (( c & ~_cont_mask ) != _cont )
02153                             throw invalid_data( "bad UTF-8 continuation byte" );
02154                     }
02155                 }
02156                 length++;
02157                 i++;
02158             }
02159             return length;
02160         }
02162 
02163     private:
02164         //template<class ITER_TYPE> friend class _iterator;
02165         dstring mData;
02166 
02168         enum BufferType {
02169             bt_none,
02170             bt_string,
02171             bt_wstring,
02172             bt_utf32string
02173         };
02174 
02176         void _init() {
02177             m_buffer.mVoidBuffer = 0;
02178             m_bufferType = bt_none;
02179             m_bufferSize = 0;
02180         }
02181 
02183         // Scratch buffer
02185         void _cleanBuffer() const {
02186             if ( m_buffer.mVoidBuffer != 0 ) {
02187                 switch ( m_bufferType ) {
02188                 case bt_string:
02189                     delete m_buffer.mStrBuffer;
02190                     break;
02191                 case bt_wstring:
02192                     delete m_buffer.mWStrBuffer;
02193                     break;
02194                 case bt_utf32string:
02195                     delete m_buffer.mUTF32StrBuffer;
02196                     break;
02197                 case bt_none: // under the worse of circumstances, this is all we can do, and hope it works out
02198                 default:
02199                     //delete m_buffer.mVoidBuffer;
02200                     // delete void* is undefined, don't do that
02201                     assert("This should never happen - mVoidBuffer should never contain something if we "
02202                         "don't know the type");
02203                     break;
02204                 }
02205                 m_buffer.mVoidBuffer = 0;
02206                 m_bufferSize = 0;
02207             }
02208         }
02209 
02211         void _getBufferStr() const {
02212             if ( m_bufferType != bt_string ) {
02213                 _cleanBuffer();
02214                 m_buffer.mStrBuffer = new std::string();
02215                 m_bufferType = bt_string;
02216             }
02217             m_buffer.mStrBuffer->clear();
02218         }
02220         void _getBufferWStr() const {
02221             if ( m_bufferType != bt_wstring ) {
02222                 _cleanBuffer();
02223                 m_buffer.mWStrBuffer = new std::wstring();
02224                 m_bufferType = bt_wstring;
02225             }
02226             m_buffer.mWStrBuffer->clear();
02227         }
02229         void _getBufferUTF32Str() const {
02230             if ( m_bufferType != bt_utf32string ) {
02231                 _cleanBuffer();
02232                 m_buffer.mUTF32StrBuffer = new utf32string();
02233                 m_bufferType = bt_utf32string;
02234             }
02235             m_buffer.mUTF32StrBuffer->clear();
02236         }
02237 
02238         void _load_buffer_UTF8() const {
02239             _getBufferStr();
02240             std::string& buffer = ( *m_buffer.mStrBuffer );
02241             buffer.reserve( length() );
02242 
02243             unsigned char utf8buf[6];
02244             char* charbuf = ( char* )utf8buf;
02245             unicode_char c;
02246             size_t len;
02247 
02248             const_iterator i, ie = end();
02249             for ( i = begin(); i != ie; i.moveNext() ) {
02250                 c = i.getCharacter();
02251                 len = _utf32_to_utf8( c, utf8buf );
02252                 size_t j = 0;
02253                 while ( j < len )
02254                     buffer.push_back( charbuf[j++] );
02255             }
02256         }
02257         void _load_buffer_WStr() const {
02258             _getBufferWStr();
02259             std::wstring& buffer = ( *m_buffer.mWStrBuffer );
02260             buffer.reserve( length() ); // may over reserve, but should be close enough
02261 #ifdef WCHAR_UTF16 // wchar_t matches UTF-16
02262             const_iterator i, ie = end();
02263             for ( i = begin(); i != ie; ++i ) {
02264                 buffer.push_back(( wchar_t )( *i ) );
02265             }
02266 #else // wchar_t fits UTF-32
02267             unicode_char c;
02268             const_iterator i, ie = end();
02269             for ( i = begin(); i != ie; i.moveNext() ) {
02270                 c = i.getCharacter();
02271                 buffer.push_back(( wchar_t )c );
02272             }
02273 #endif
02274         }
02275         void _load_buffer_UTF32() const {
02276             _getBufferUTF32Str();
02277             utf32string& buffer = ( *m_buffer.mUTF32StrBuffer );
02278             buffer.reserve( length() ); // may over reserve, but should be close enough
02279 
02280             unicode_char c;
02281 
02282             const_iterator i, ie = end();
02283             for ( i = begin(); i != ie; i.moveNext() ) {
02284                 c = i.getCharacter();
02285                 buffer.push_back( c );
02286             }
02287         }
02288 
02289         mutable BufferType m_bufferType; // identifies the data type held in m_buffer
02290         mutable size_t m_bufferSize; // size of the CString buffer
02291 
02292         // multi-purpose buffer used everywhere we need a throw-away buffer
02293         union {
02294             mutable void* mVoidBuffer;
02295             mutable std::string* mStrBuffer;
02296             mutable std::wstring* mWStrBuffer;
02297             mutable utf32string* mUTF32StrBuffer;
02298         }
02299         m_buffer;
02300     };
02301 
02303     inline UTFString operator+( const UTFString& s1, const UTFString& s2 ) {
02304         return UTFString( s1 ).append( s2 );
02305     }
02307     inline UTFString operator+( const UTFString& s1, UTFString::code_point c ) {
02308         return UTFString( s1 ).append( 1, c );
02309     }
02311     inline UTFString operator+( const UTFString& s1, UTFString::unicode_char c ) {
02312         return UTFString( s1 ).append( 1, c );
02313     }
02315     inline UTFString operator+( const UTFString& s1, char c ) {
02316         return UTFString( s1 ).append( 1, c );
02317     }
02318 #if OGRE_IS_NATIVE_WCHAR_T
02319 
02320     inline UTFString operator+( const UTFString& s1, wchar_t c ) {
02321         return UTFString( s1 ).append( 1, c );
02322     }
02323 #endif
02324 
02325     inline UTFString operator+( UTFString::code_point c, const UTFString& s2 ) {
02326         return UTFString().append( 1, c ).append( s2 );
02327     }
02329     inline UTFString operator+( UTFString::unicode_char c, const UTFString& s2 ) {
02330         return UTFString().append( 1, c ).append( s2 );
02331     }
02333     inline UTFString operator+( char c, const UTFString& s2 ) {
02334         return UTFString().append( 1, c ).append( s2 );
02335     }
02336 #if OGRE_IS_NATIVE_WCHAR_T
02337 
02338     inline UTFString operator+( wchar_t c, const UTFString& s2 ) {
02339         return UTFString().append( 1, c ).append( s2 );
02340     }
02341 #endif
02342 
02343     // (const) forward iterator common operators
02344     inline UTFString::size_type operator-( const UTFString::_const_fwd_iterator& left, const UTFString::_const_fwd_iterator& right ) {
02345         return ( left.mIter - right.mIter );
02346     }
02347     inline bool operator==( const UTFString::_const_fwd_iterator& left, const UTFString::_const_fwd_iterator& right ) {
02348         return left.mIter == right.mIter;
02349     }
02350     inline bool operator!=( const UTFString::_const_fwd_iterator& left, const UTFString::_const_fwd_iterator& right ) {
02351         return left.mIter != right.mIter;
02352     }
02353     inline bool operator<( const UTFString::_const_fwd_iterator& left, const UTFString::_const_fwd_iterator& right ) {
02354         return left.mIter < right.mIter;
02355     }
02356     inline bool operator<=( const UTFString::_const_fwd_iterator& left, const UTFString::_const_fwd_iterator& right ) {
02357         return left.mIter <= right.mIter;
02358     }
02359     inline bool operator>( const UTFString::_const_fwd_iterator& left, const UTFString::_const_fwd_iterator& right ) {
02360         return left.mIter > right.mIter;
02361     }
02362     inline bool operator>=( const UTFString::_const_fwd_iterator& left, const UTFString::_const_fwd_iterator& right ) {
02363         return left.mIter >= right.mIter;
02364     }
02365 
02366     // (const) reverse iterator common operators
02367     // NB: many of these operations are evaluated in reverse because this is a reverse iterator wrapping a forward iterator
02368     inline UTFString::size_type operator-( const UTFString::_const_rev_iterator& left, const UTFString::_const_rev_iterator& right ) {
02369         return ( right.mIter - left.mIter );
02370     }
02371     inline bool operator==( const UTFString::_const_rev_iterator& left, const UTFString::_const_rev_iterator& right ) {
02372         return left.mIter == right.mIter;
02373     }
02374     inline bool operator!=( const UTFString::_const_rev_iterator& left, const UTFString::_const_rev_iterator& right ) {
02375         return left.mIter != right.mIter;
02376     }
02377     inline bool operator<( const UTFString::_const_rev_iterator& left, const UTFString::_const_rev_iterator& right ) {
02378         return right.mIter < left.mIter;
02379     }
02380     inline bool operator<=( const UTFString::_const_rev_iterator& left, const UTFString::_const_rev_iterator& right ) {
02381         return right.mIter <= left.mIter;
02382     }
02383     inline bool operator>( const UTFString::_const_rev_iterator& left, const UTFString::_const_rev_iterator& right ) {
02384         return right.mIter > left.mIter;
02385     }
02386     inline bool operator>=( const UTFString::_const_rev_iterator& left, const UTFString::_const_rev_iterator& right ) {
02387         return right.mIter >= left.mIter;
02388     }
02389 
02391     inline std::ostream& operator << ( std::ostream& os, const UTFString& s ) {
02392         return os << s.asUTF8();
02393     }
02394 
02396     inline std::wostream& operator << ( std::wostream& os, const UTFString& s ) {
02397         return os << s.asWStr();
02398     }
02399 
02400 
02401 
02402 } // namespace Ogre{
02403 
02404 #endif // OGRE_UNICODE_SUPPORT
02405 
02406 #endif 

Copyright © 2000-2005 by The OGRE Team
Creative Commons License
This work is licensed under a Creative Commons Attribution-ShareAlike 2.5 License.
Last modified Sun Mar 25 13:03:17 2007