37 return mIter == mString->mData.begin();
42 return mIter == mString->mData.end();
47 return mIter - mString->mData.begin();
52 mIter = mString->mData.begin() + index;
58 return mString->getChar( current_index );
64 int change = mString->setChar( current_index, uc );
65 _jump_to( current_index );
72 if ( _test_end() )
return;
78 lead_half = mIter[-1];
88 if ( _test_begin() )
return;
93 lead_half = mIter[-1];
203 return _getCharacter();
208 return _setCharacter( uc );
319 return _getCharacter();
543 #if MYGUI_IS_NATIVE_WCHAR_T 609 return mData.max_size();
614 mData.reserve(
size );
619 mData.resize( num, val );
624 mData.swap( from.mData );
629 return mData.empty();
634 return mData.c_str();
644 return mData.capacity();
657 tmp.mData.swap(
data );
669 #if MYGUI_IS_NATIVE_WCHAR_T 673 mData.push_back( static_cast<code_point>( val ) );
679 mData.push_back( val );
684 mData.push_back( static_cast<code_point>( val ) );
700 return *m_buffer.mStrBuffer;
706 return m_buffer.mStrBuffer->c_str();
711 _load_buffer_UTF32();
712 return *m_buffer.mUTF32StrBuffer;
717 _load_buffer_UTF32();
718 return m_buffer.mUTF32StrBuffer->c_str();
724 return *m_buffer.mWStrBuffer;
730 return m_buffer.mWStrBuffer->c_str();
735 return mData.at( loc );
740 return mData.at( loc );
753 if ( l == 2 && ( loc + 1 ) < mData.length() ) {
770 if ( newSize > existingSize ) {
772 insert( loc + 1, 1, cp[1] );
775 if ( newSize < existingSize ) {
783 if ( l == 2 )
at( loc + 1 ) = cp[1];
790 i.
mIter = mData.begin();
798 i.
mIter = const_cast<UString*>(
this )->mData.begin();
799 i.
mString = const_cast<UString*>(
this );
806 i.
mIter = mData.end();
814 i.
mIter = const_cast<UString*>(
this )->mData.end();
815 i.
mString = const_cast<UString*>(
this );
822 i.
mIter = mData.end();
830 i.
mIter = const_cast<UString*>(
this )->mData.end();
831 i.
mString = const_cast<UString*>(
this );
838 i.
mIter = mData.begin();
846 i.
mIter = const_cast<UString*>(
this )->mData.begin();
847 i.
mString = const_cast<UString*>(
this );
859 mData.assign( str.mData );
871 mData.assign( str, num );
877 mData.assign( str.mData, index,
len );
883 mData.assign( num, ch );
890 mData.reserve( wstr.length() );
891 #ifdef WCHAR_UTF16 // if we're already working in UTF-16, this is easy 893 std::wstring::const_iterator i, ie = wstr.end();
894 for ( i = wstr.begin(); i != ie; i++ ) {
895 tmp = static_cast<code_point>( *i );
896 mData.push_back( tmp );
898 #else // otherwise we do it the safe way (which is still 100% safe to pass UTF-16 through, just slower) 901 std::wstring::const_iterator i, ie = wstr.end();
902 for ( i = wstr.begin(); i != ie; i++ ) {
903 tmp = static_cast<unicode_char>( *i );
905 if ( l > 0 ) mData.push_back( cp[0] );
906 if ( l > 1 ) mData.push_back( cp[1] );
912 #if MYGUI_IS_NATIVE_WCHAR_T 938 unsigned char utf8buf[7];
945 std::string::const_iterator i, ie = str.end();
946 for ( i = str.begin(); i != ie; i++ ) {
948 for (
size_t j = 0; j < utf8len; j++ ) {
949 utf8buf[j] = ( static_cast<unsigned char>( *( i + j ) ) );
951 utf8buf[utf8len] = 0;
956 append( utf16buff, utf16len );
963 std::string tmp(
c_str );
976 mData.append( str.mData );
988 mData.append( str.mData, index,
len );
994 mData.append( str, num );
1000 mData.append( num, ch );
1010 #if MYGUI_IS_NATIVE_WCHAR_T 1013 std::wstring tmp( w_str, num );
1019 return append( num, static_cast<unicode_char>( ch ) );
1031 append( num, static_cast<code_point>( ch ) );
1061 mData.insert( index, str.mData );
1067 mData.insert( index1, str.mData, index2, num );
1078 mData.insert( index, str, num );
1082 #if MYGUI_IS_NATIVE_WCHAR_T 1100 mData.insert( index, num, ch );
1104 #if MYGUI_IS_NATIVE_WCHAR_T 1107 insert( index, num, static_cast<unicode_char>( ch ) );
1114 insert( index, num, static_cast<code_point>( ch ) );
1123 return insert( index, num, cp[0] );
1127 insert( index, 1, cp[1] );
1128 insert( index, 1, cp[0] );
1135 mData.insert( i.
mIter, num, ch );
1137 #if MYGUI_IS_NATIVE_WCHAR_T 1140 insert( i, num, static_cast<unicode_char>( ch ) );
1146 insert( i, num, static_cast<code_point>( ch ) );
1183 mData.erase( index );
1185 mData.erase( index, num );
1191 mData.replace( index1, num1, str.mData, 0,
npos );
1197 mData.replace( index1, num1, str.mData, 0, num2 );
1203 mData.replace( index1, num1, str.mData, index2, num2 );
1213 return replace( index1, num1, str, 0, num );
1218 mData.replace( index, num1, num2, ch );
1228 return replace( index1, num1, num, ch );
1233 return mData.compare( str.mData );
1238 return mData.compare( str );
1243 return mData.compare( index,
length, str.mData );
1248 return mData.compare( index,
length, str.mData, index2, length2 );
1253 return mData.compare( index,
length, str, length2 );
1256 #if MYGUI_IS_NATIVE_WCHAR_T 1259 UString tmp( w_str, length2 );
1272 return mData.find( str.
c_str(), index );
1287 #if MYGUI_IS_NATIVE_WCHAR_T 1291 return mData.find( tmp.c_str(), index,
length );
1297 return find( static_cast<code_point>( ch ), index );
1302 return mData.find( ch, index );
1305 #if MYGUI_IS_NATIVE_WCHAR_T 1308 return find( static_cast<unicode_char>( ch ), index );
1321 return mData.rfind( str.
c_str(), index );
1327 return mData.rfind( tmp.
c_str(), index, num );
1333 return mData.rfind( tmp.
c_str(), index, num );
1336 #if MYGUI_IS_NATIVE_WCHAR_T 1340 return mData.rfind( tmp.c_str(), index, num );
1346 return rfind( static_cast<code_point>( ch ), index );
1351 return mData.rfind( ch, index );
1354 #if MYGUI_IS_NATIVE_WCHAR_T 1357 return rfind( static_cast<unicode_char>( ch ), index );
1372 while ( i < num && ( index + i ) <
len ) {
1390 return find_first_of( static_cast<code_point>( ch ), index );
1393 #if MYGUI_IS_NATIVE_WCHAR_T 1396 return find_first_of( static_cast<unicode_char>( ch ), index );
1411 while ( i < num && ( index + i ) <
len ) {
1432 #if MYGUI_IS_NATIVE_WCHAR_T 1450 if ( index >
len ) index =
len - 1;
1452 while ( i < num && ( index - i ) !=
npos ) {
1474 #if MYGUI_IS_NATIVE_WCHAR_T 1477 return find_last_of( static_cast<unicode_char>( ch ), index );
1492 if ( index >
len ) index =
len - 1;
1494 while ( i < num && ( index - i ) !=
npos ) {
1521 #if MYGUI_IS_NATIVE_WCHAR_T 1562 #if MYGUI_IS_NATIVE_WCHAR_T 1606 UString::operator std::string()
const 1608 return std::string(
asUTF8() );
1612 UString::operator std::wstring()
const 1614 return std::wstring(
asWStr() );
1620 if ( 0xD800 <= cp && cp <= 0xDFFF )
1627 if ( 0xD800 <= cp && cp <= 0xDBFF )
1634 if ( 0xDC00 <= cp && cp <= 0xDFFF )
1641 if ( 0xD800 <= cp && cp <= 0xDBFF )
1657 bool wordPair =
false;
1660 if ( 0xD800 <= cp1 && cp1 <= 0xDBFF ) {
1662 if ( 0xDC00 <= cp2 && cp2 <= 0xDFFF )
1671 unsigned short cU = cp1, cL = cp2;
1675 out_uc = ( cU & 0x03FF ) << 10;
1676 out_uc |= ( cL & 0x03FF );
1684 if ( in_uc <= 0xFFFF ) {
1685 out_cp[0] = static_cast<code_point>(in_uc);
1693 tmp = static_cast<unsigned short>(( uc >> 10 ) & 0x03FF);
1698 tmp = static_cast<unsigned short>(uc & 0x03FF);
1707 return ( cp & ~_cont_mask ) != _cont;
1712 if ( !( cp & 0x80 ) )
return 1;
1713 if (( cp & ~_lead1_mask ) == _lead1 )
return 2;
1714 if (( cp & ~_lead2_mask ) == _lead2 )
return 3;
1715 if (( cp & ~_lead3_mask ) == _lead3 )
return 4;
1716 if (( cp & ~_lead4_mask ) == _lead4 )
return 5;
1717 if (( cp & ~_lead5_mask ) == _lead5 )
return 6;
1733 if ( !( uc & ~0x0000007F ) )
return 1;
1734 if ( !( uc & ~0x000007FF ) )
return 2;
1735 if ( !( uc & ~0x0000FFFF ) )
return 3;
1736 if ( !( uc & ~0x001FFFFF ) )
return 4;
1737 if ( !( uc & ~0x03FFFFFF ) )
return 5;
1738 if ( !( uc & ~0x7FFFFFFF ) )
return 6;
1756 c = in_cp[i] & _lead5_mask;
1759 c = in_cp[i] & _lead4_mask;
1762 c = in_cp[i] & _lead3_mask;
1765 c = in_cp[i] & _lead2_mask;
1768 c = in_cp[i] & _lead1_mask;
1773 for ( ++i; i <
len; i++ )
1775 if (( in_cp[i] & ~_cont_mask ) != _cont )
1782 c |= ( in_cp[i] & _cont_mask );
1795 for (
size_t i =
len - 1; i > 0; i-- ) {
1796 out_cp[i] = static_cast<unsigned char>((( c ) & _cont_mask ) | _cont);
1803 out_cp[0] = static_cast<unsigned char>((( c ) & _lead5_mask ) | _lead5);
1806 out_cp[0] = static_cast<unsigned char>((( c ) & _lead4_mask ) | _lead4);
1809 out_cp[0] = static_cast<unsigned char>((( c ) & _lead3_mask ) | _lead3);
1812 out_cp[0] = static_cast<unsigned char>((( c ) & _lead2_mask ) | _lead2);
1815 out_cp[0] = static_cast<unsigned char>((( c ) & _lead1_mask ) | _lead1);
1819 out_cp[0] = static_cast<unsigned char>(( c ) & 0x7F);
1829 std::string tmp( reinterpret_cast<const char*>(
c_str ) );
1835 std::string::const_iterator i, ie = str.end();
1841 if (( *i ) & 0x80 ) {
1842 unsigned char c = ( *i );
1843 size_t contBytes = 0;
1846 if (( c & ~_lead1_mask ) == _lead1 ) {
1854 }
else if (( c & ~_lead2_mask ) == _lead2 ) {
1856 if ( c == _lead2 ) {
1858 if (( c & _lead2 ) == _cont )
1865 }
else if (( c & ~_lead3_mask ) == _lead3 ) {
1867 if ( c == _lead3 ) {
1869 if (( c & _lead3 ) == _cont )
1876 }
else if (( c & ~_lead4_mask ) == _lead4 ) {
1878 if ( c == _lead4 ) {
1880 if (( c & _lead4 ) == _cont )
1887 }
else if (( c & ~_lead5_mask ) == _lead5 ) {
1889 if ( c == _lead5 ) {
1891 if (( c & _lead5 ) == _cont )
1900 while ( contBytes-- ) {
1902 if (( c & ~_cont_mask ) != _cont )
1915 void UString::_init()
1917 m_buffer.mVoidBuffer = 0;
1918 m_bufferType = bt_none;
1922 void UString::_cleanBuffer()
const 1924 if ( m_buffer.mVoidBuffer != 0 ) {
1925 switch ( m_bufferType ) {
1927 delete m_buffer.mStrBuffer;
1930 delete m_buffer.mWStrBuffer;
1932 case bt_utf32string:
1933 delete m_buffer.mUTF32StrBuffer;
1938 assert(
"This should never happen - mVoidBuffer should never contain something if we " 1939 "don't know the type");
1942 m_buffer.mVoidBuffer = 0;
1944 m_bufferType = bt_none;
1948 void UString::_getBufferStr()
const 1950 if ( m_bufferType != bt_string ) {
1952 m_buffer.mStrBuffer =
new std::string();
1953 m_bufferType = bt_string;
1955 m_buffer.mStrBuffer->clear();
1958 void UString::_getBufferWStr()
const 1960 if ( m_bufferType != bt_wstring ) {
1962 m_buffer.mWStrBuffer =
new std::wstring();
1963 m_bufferType = bt_wstring;
1965 m_buffer.mWStrBuffer->clear();
1968 void UString::_getBufferUTF32Str()
const 1970 if ( m_bufferType != bt_utf32string ) {
1973 m_bufferType = bt_utf32string;
1975 m_buffer.mUTF32StrBuffer->clear();
1978 void UString::_load_buffer_UTF8()
const 1981 std::string& buffer = ( *m_buffer.mStrBuffer );
1982 buffer.reserve(
length() );
1984 unsigned char utf8buf[6];
1985 char* charbuf = (
char* )utf8buf;
1991 c = i.getCharacter();
1995 buffer.push_back( charbuf[j++] );
1999 void UString::_load_buffer_WStr()
const 2002 std::wstring& buffer = ( *m_buffer.mWStrBuffer );
2003 buffer.reserve(
length() );
2004 #ifdef WCHAR_UTF16 // wchar_t matches UTF-16 2006 for ( i =
begin(); i != ie; ++i ) {
2007 buffer.push_back((
wchar_t )( *i ) );
2009 #else // wchar_t fits UTF-32 2013 c = i.getCharacter();
2014 buffer.push_back((
wchar_t )c );
2019 void UString::_load_buffer_UTF32()
const 2021 _getBufferUTF32Str();
2022 utf32string& buffer = ( *m_buffer.mUTF32StrBuffer );
2023 buffer.reserve(
length() );
2029 c = i.getCharacter();
2030 buffer.push_back( c );
code_point & operator[](size_type index)
code point dereference operator
std::basic_string< code_point > dstring
base iterator class for UString
size_type find_last_of(const UString &str, size_type index=npos, size_type num=npos) const
returns the index of the first character within the current string that matches any character in str,...
size_type rfind(const UString &str, size_type index=0) const
returns the location of the first occurrence of str in the current string, doing a reverse search fro...
_const_rev_iterator operator-(difference_type n)
subtraction operator
bool inString(unicode_char ch) const
returns true if the given Unicode character ch is in this string
bool empty() const
returns true if the string has no elements, false otherwise
_const_fwd_iterator & operator--()
pre-decrement
const code_point * c_str() const
returns a pointer to the first character in the current string
reverse_iterator rend()
returns a reverse iterator just past the beginning of the string
void resize(size_type num, const code_point &val=0)
changes the size of the string to size, filling in any new area with val
UString & append(const UString &str)
appends str on to the end of the current string
_rev_iterator & operator--()
pre-decrement
void _seekRev(size_type c)
bool operator!=(const UString &right) const
inequality operator
int _setCharacter(unicode_char uc)
static size_t _utf32_to_utf8(const unicode_char &in_uc, unsigned char out_cp[6])
writes the given UTF-32 uc_in to the buffer location out_cp using UTF-8 encoding, returns the number ...
iterator erase(iterator loc)
removes the code point pointed to by loc, returning an iterator to the next character
UString()
default constructor, creates an empty string
static size_t _utf8_char_length(unsigned char cp)
estimates the number of UTF-8 code points in the sequence starting with cp
value_type & operator[](difference_type n) const
dereference at offset operator
_const_fwd_iterator operator+(difference_type n)
addition operator
_const_fwd_iterator & operator+=(difference_type n)
addition assignment operator
_const_rev_iterator & operator+=(difference_type n)
addition assignment operator
_rev_iterator operator-(difference_type n)
subtraction operator
size_type length_Characters() const
Returns the number of Unicode characters in the string.
const code_point * data() const
returns a pointer to the first character in the current string
_const_fwd_iterator operator-(difference_type n)
subtraction operator
_fwd_iterator & operator+=(difference_type n)
addition assignment operator
const utf32string & asUTF32() const
returns the current string in UTF-32 form within a utf32string
size_t size_type
size type used to indicate string size and character positions within the string
_const_rev_iterator & operator++()
pre-increment
const value_type & operator*() const
dereference operator
int setChar(size_type loc, unicode_char ch)
sets the value of the character at loc to the Unicode value ch (UTF-32)
_const_fwd_iterator const_iterator
const iterator
unicode_char getCharacter() const
Returns the Unicode value of the character at the current position (decodes surrogate pairs if needed...
static const size_type npos
the usual constant representing: not found, no limit, etc
const value_type & operator[](difference_type n) const
dereference at offset operator
const forward iterator for UString
_fwd_iterator & operator-=(difference_type n)
subtraction assignment operator
_rev_iterator & operator-=(difference_type n)
subtraction assignment operator
_const_rev_iterator operator+(difference_type n)
addition operator
static size_t _utf32_to_utf16(const unicode_char &in_uc, code_point out_cp[2])
writes the given UTF-32 uc_in to the buffer location out_cp using UTF-16 encoding,...
void push_back(unicode_char val)
appends val to the end of the string
std::basic_string< unicode_char > utf32string
string type used for returning UTF-32 formatted data
bool operator<(const UString &right) const
less than operator
void clear()
deletes all of the elements in the string
const reverse iterator for UString
iterator begin()
returns an iterator to the first element of the string
const char * asUTF8_c_str() const
returns the current string in UTF-8 form as a nul-terminated char array
_fwd_iterator iterator
iterator
_const_fwd_iterator & movePrev()
rewinds to the previous Unicode character, honoring surrogate pairs in the UTF-16 stream
reverse_iterator rbegin()
returns a reverse iterator to the last element of the string
value_type & operator*() const
dereference operator
_const_fwd_iterator & operator++()
pre-increment
uint16 code_point
a single UTF-16 code point
_fwd_iterator operator+(difference_type n)
addition operator
void _seekFwd(size_type c)
_const_rev_iterator & operator-=(difference_type n)
subtraction assignment operator
bool operator>(const UString &right) const
greater than operator
static bool _utf8_start_char(unsigned char cp)
returns true if cp is the beginning of a UTF-8 sequence
static bool _utf16_surrogate_lead(code_point cp)
returns true if cp matches the signature of a surrogate pair lead character
const value_type & operator[](difference_type n) const
dereference at offset operator
void swap(UString &from)
exchanges the elements of the current string with those of from
static size_t _utf16_to_utf32(const code_point in_cp[2], unicode_char &out_uc)
converts the given UTF-16 character buffer in_cp to a single UTF-32 Unicode character out_uc,...
_fwd_iterator operator-(difference_type n)
subtraction operator
const value_type & operator*() const
dereference operator
_rev_iterator operator+(difference_type n)
addition operator
size_type find_last_not_of(const UString &str, size_type index=npos, size_type num=npos) const
returns the index of the last character within the current string that does not match any character i...
size_type find_first_of(const UString &str, size_type index=0, size_type num=npos) const
Returns the index of the first character within the current string that matches any character in str,...
unicode_char _getCharacter() const
code_point value_type
value type typedef for use in iterators
const std::string & asUTF8() const
returns the current string in UTF-8 form within a std::string
size_type find(const UString &str, size_type index=0) const
returns the index of the first occurrence of str within the current string, starting at index; return...
size_type _get_index() const
void reserve(size_type size)
sets the capacity of the string to at least size code points
_fwd_iterator & movePrev()
rewinds to the previous Unicode character, honoring surrogate pairs in the UTF-16 stream
const wchar_t * asWStr_c_str() const
returns the current string in the native form of a nul-terminated wchar_t array
int compare(const UString &str) const
compare str to the current string
forward iterator for UString
uint32 unicode_char
a single 32-bit Unicode character
static bool _utf16_surrogate_follow(code_point cp)
returns true if cp matches the signature of a surrogate pair following character
static size_t _utf16_char_length(code_point cp)
estimates the number of UTF-16 code points in the sequence starting with cp
bool operator>=(const UString &right) const
greater than or equal operator
code_point & at(size_type loc)
returns a reference to the element in the string at index loc
static bool _utf16_independent_char(code_point cp)
returns true if cp does not match the signature for the lead of follow code point of a surrogate pair...
_const_rev_iterator & operator--()
pre-decrement
void _become(const _base_iterator &i)
value_type & operator[](difference_type n) const
dereference at offset operator
_fwd_iterator & operator--()
pre-decrement
size_type size() const
Returns the number of code points in the current string.
size_type find_first_not_of(const UString &str, size_type index=0, size_type num=npos) const
returns the index of the first character within the current string that does not match any character ...
_rev_iterator & operator++()
pre-increment
_fwd_iterator & moveNext()
advances to the next Unicode character, honoring surrogate pairs in the UTF-16 stream
forward iterator for UString
static size_type _verifyUTF8(const unsigned char *c_str)
verifies a UTF-8 stream, returning the total number of Unicode characters found
UString & assign(iterator start, iterator end)
gives the current string the values from start to end
int setCharacter(unicode_char uc)
Sets the Unicode value of the character at the current position (adding a surrogate pair if needed); ...
_const_fwd_iterator & moveNext()
advances to the next Unicode character, honoring surrogate pairs in the UTF-16 stream
A UTF-16 string with implicit conversion to/from std::string and std::wstring.
_rev_iterator & operator+=(difference_type n)
addition assignment operator
static size_t _utf8_to_utf32(const unsigned char in_cp[6], unicode_char &out_uc)
converts the given UTF-8 character buffer to a single UTF-32 Unicode character, returns the number of...
iterator insert(iterator i, const code_point &ch)
inserts ch before the code point denoted by i
size_type length() const
Returns the number of code points in the current string.
UString & operator=(const UString &s)
assignment operator, implicitly casts all compatible types
iterator end()
returns an iterator just past the end of the string
UString substr(size_type index, size_type num=npos) const
returns a substring of the current string, starting at index, and num characters long.
const std::wstring & asWStr() const
returns the current string in the native form of std::wstring
UString & replace(size_type index1, size_type num1, const UString &str)
replaces up to num1 code points of the current string (starting at index1) with str
const unicode_char * asUTF32_c_str() const
returns the current string in UTF-32 form as a nul-terminated unicode_char array
unicode_char getChar(size_type loc) const
returns the data point loc evaluated as a UTF-32 value
value_type & operator*() const
dereference operator
unicode_char getCharacter() const
Returns the Unicode value of the character at the current position (decodes surrogate pairs if needed...
size_type capacity() const
returns the number of elements that the string can hold before it will need to allocate more space
float len(float x, float y)
size_type max_size() const
returns the maximum number of UTF-16 code points that the string can hold
bool operator<=(const UString &right) const
less than or equal operator
_const_fwd_iterator & operator-=(difference_type n)
subtraction assignment operator
_fwd_iterator & operator++()
pre-increment
bool operator==(const UString &right) const
equality operator
void _jump_to(size_type index)