CVSROOT: /sources/gnash Module name: gnash Changes by: Benjamin Wolsey <bwy> 08/02/04 15:16:55
Modified files: . : ChangeLog server : edit_text_character.cpp libbase : utf8.cpp utf8.h Log message: * libbase/utf8.{h,cpp}: make decodeNextUnicodeCharacter take a std::string::const_iterator by ref, encodeUnicodeCharacter (presently unused) return a std::string. * server/edit_text_character.cpp: use decodeNextUnicodeCharacter. decodeNextUnicodeCharacter works if it's passed a unicode character, but this never seems to be the case (it's only used in edit_text_character.cpp). It may be possible to use this for string.cpp (whose methods are largely bogus for non-ASCII strings). CVSWeb URLs: http://cvs.savannah.gnu.org/viewcvs/gnash/ChangeLog?cvsroot=gnash&r1=1.5554&r2=1.5555 http://cvs.savannah.gnu.org/viewcvs/gnash/server/edit_text_character.cpp?cvsroot=gnash&r1=1.144&r2=1.145 http://cvs.savannah.gnu.org/viewcvs/gnash/libbase/utf8.cpp?cvsroot=gnash&r1=1.6&r2=1.7 http://cvs.savannah.gnu.org/viewcvs/gnash/libbase/utf8.h?cvsroot=gnash&r1=1.4&r2=1.5 Patches: Index: ChangeLog =================================================================== RCS file: /sources/gnash/gnash/ChangeLog,v retrieving revision 1.5554 retrieving revision 1.5555 diff -u -b -r1.5554 -r1.5555 --- ChangeLog 4 Feb 2008 11:32:25 -0000 1.5554 +++ ChangeLog 4 Feb 2008 15:16:52 -0000 1.5555 @@ -1,3 +1,10 @@ +2008-02-04 Benjamin Wolsey <[EMAIL PROTECTED]> + + * libbase/utf8.{h,cpp}: make decodeNextUnicodeCharacter take a + std::string::const_iterator by ref, encodeUnicodeCharacter + (presently unused) return a std::string. + * server/edit_text_character.cpp: use decodeNextUnicodeCharacter. + 2008-02-04 Sandro Santilli <[EMAIL PROTECTED]> * server/as_object.cpp (getOwnProperty): remove useless round Index: server/edit_text_character.cpp =================================================================== RCS file: /sources/gnash/gnash/server/edit_text_character.cpp,v retrieving revision 1.144 retrieving revision 1.145 diff -u -b -r1.144 -r1.145 --- server/edit_text_character.cpp 2 Feb 2008 08:51:52 -0000 1.144 +++ server/edit_text_character.cpp 4 Feb 2008 15:16:54 -0000 1.145 @@ -17,7 +17,7 @@ // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA // -/* $Id: edit_text_character.cpp,v 1.144 2008/02/02 08:51:52 strk Exp $ */ +/* $Id: edit_text_character.cpp,v 1.145 2008/02/04 15:16:54 bwy Exp $ */ #ifdef HAVE_CONFIG_H #include "gnashconfig.h" @@ -1104,7 +1104,6 @@ // nothing more to do if text is empty if ( _text.empty() ) return; - AutoSizeValue autoSize = getAutoSize(); if ( autoSize != autoSizeNone ) { @@ -1172,8 +1171,13 @@ m_ycursor = y; assert(! _text.empty() ); - const char* text = &_text[0]; - while (boost::uint32_t code = utf8::decode_next_unicode_character(&text)) + + std::string::const_iterator it = _text.begin(); + + // decodeNextUnicodeCharacter(std::string::const_iterator &it) works, + // but unfortunately nothing is encoded in utf8. + + while (boost::uint32_t code = utf8::decodeNextUnicodeCharacter(it)) { if ( _embedFonts ) { @@ -1258,7 +1262,7 @@ // HTML tag, just skip it... bool closingTagFound = false; - while ( (code = utf8::decode_next_unicode_character(&text)) ) + while ( (code = utf8::decodeNextUnicodeCharacter(it)) ) { if (code == '>') { @@ -1365,7 +1369,7 @@ //log_debug(" autoSize=NONE!"); // truncate long line, but keep expanding text box bool newlinefound = false; - while ( (code = utf8::decode_next_unicode_character(&text)) ) + while ( (code = utf8::decodeNextUnicodeCharacter(it)) ) { if ( _embedFonts ) { Index: libbase/utf8.cpp =================================================================== RCS file: /sources/gnash/gnash/libbase/utf8.cpp,v retrieving revision 1.6 retrieving revision 1.7 diff -u -b -r1.6 -r1.7 --- libbase/utf8.cpp 4 Dec 2007 11:45:24 -0000 1.6 +++ libbase/utf8.cpp 4 Feb 2008 15:16:54 -0000 1.7 @@ -10,11 +10,9 @@ #include "utf8.h" - -boost::uint32_t utf8::decode_next_unicode_character(const char** utf8_buffer) +boost::uint32_t utf8::decodeNextUnicodeCharacter(std::string::const_iterator& it) { boost::uint32_t uc; - char c; // Security considerations: // @@ -35,23 +33,23 @@ #define INVALID 0x0FFFD #define FIRST_BYTE(mask, shift) \ - uc = (c & (mask)) << (shift); + /* Post-increment iterator */ \ + uc = (*it++ & (mask)) << (shift); #define NEXT_BYTE(shift) \ - c = **utf8_buffer; \ - if (c == 0) return 0; /* end of buffer, do not advance */ \ - if ((c & 0xC0) != 0x80) return INVALID; /* standard check */ \ - (*utf8_buffer)++; \ - uc |= (c & 0x3F) << shift; + \ + if (*it == 0) return 0; /* end of buffer, do not advance */ \ + if ((*it & 0xC0) != 0x80) return INVALID; /* standard check */ \ + /* Post-increment iterator: */ \ + uc |= (*it++ & 0x3F) << shift; - c = **utf8_buffer; - if (c == 0) return 0; // End of buffer. Do not advance. + if (*it == 0) return 0; // End of buffer. Do not advance. - (*utf8_buffer)++; - if ((c & 0x80) == 0) return (boost::uint32_t) c; // Conventional 7-bit ASCII. + // Conventional 7-bit ASCII; return and increment iterator: + if ((*it & 0x80) == 0) return (boost::uint32_t) *it++; - // Multi-byte sequences. - if ((c & 0xE0) == 0xC0) + // Multi-byte sequences + if ((*it & 0xE0) == 0xC0) { // Two-byte sequence. FIRST_BYTE(0x1F, 6); @@ -59,7 +57,7 @@ if (uc < 0x80) return INVALID; // overlong return uc; } - else if ((c & 0xF0) == 0xE0) + else if ((*it & 0xF0) == 0xE0) { // Three-byte sequence. FIRST_BYTE(0x0F, 12); @@ -70,7 +68,7 @@ if (uc == 0x0FFFE || uc == 0x0FFFF) return INVALID; // not valid ISO 10646 return uc; } - else if ((c & 0xF8) == 0xF0) + else if ((*it & 0xF8) == 0xF0) { // Four-byte sequence. FIRST_BYTE(0x07, 18); @@ -80,7 +78,7 @@ if (uc < 0x010000) return INVALID; // overlong return uc; } - else if ((c & 0xFC) == 0xF8) + else if ((*it & 0xFC) == 0xF8) { // Five-byte sequence. FIRST_BYTE(0x03, 24); @@ -91,7 +89,7 @@ if (uc < 0x0200000) return INVALID; // overlong return uc; } - else if ((c & 0xFE) == 0xFC) + else if ((*it & 0xFE) == 0xFC) { // Six-byte sequence. FIRST_BYTE(0x01, 30); @@ -110,58 +108,65 @@ } } +// TODO: buffer as std::string; index (iterator); -void utf8::encode_unicode_character(char* buffer, int* index, boost::uint32_t ucs_character) +std::string +utf8::encodeUnicodeCharacter(boost::uint32_t ucs_character) { + + std::string text = ""; + if (ucs_character <= 0x7F) { // Plain single-byte ASCII. - buffer[(*index)++] = (char) ucs_character; + text += (char) ucs_character; } else if (ucs_character <= 0x7FF) { // Two bytes. - buffer[(*index)++] = 0xC0 | (ucs_character >> 6); - buffer[(*index)++] = 0x80 | ((ucs_character >> 0) & 0x3F); + text += 0xC0 | (ucs_character >> 6); + text += 0x80 | ((ucs_character >> 0) & 0x3F); } else if (ucs_character <= 0xFFFF) { // Three bytes. - buffer[(*index)++] = 0xE0 | (ucs_character >> 12); - buffer[(*index)++] = 0x80 | ((ucs_character >> 6) & 0x3F); - buffer[(*index)++] = 0x80 | ((ucs_character >> 0) & 0x3F); + text += 0xE0 | (ucs_character >> 12); + text += 0x80 | ((ucs_character >> 6) & 0x3F); + text += 0x80 | ((ucs_character >> 0) & 0x3F); } else if (ucs_character <= 0x1FFFFF) { // Four bytes. - buffer[(*index)++] = 0xF0 | (ucs_character >> 18); - buffer[(*index)++] = 0x80 | ((ucs_character >> 12) & 0x3F); - buffer[(*index)++] = 0x80 | ((ucs_character >> 6) & 0x3F); - buffer[(*index)++] = 0x80 | ((ucs_character >> 0) & 0x3F); + text += 0xF0 | (ucs_character >> 18); + text += 0x80 | ((ucs_character >> 12) & 0x3F); + text += 0x80 | ((ucs_character >> 6) & 0x3F); + text += 0x80 | ((ucs_character >> 0) & 0x3F); } else if (ucs_character <= 0x3FFFFFF) { // Five bytes. - buffer[(*index)++] = 0xF8 | (ucs_character >> 24); - buffer[(*index)++] = 0x80 | ((ucs_character >> 18) & 0x3F); - buffer[(*index)++] = 0x80 | ((ucs_character >> 12) & 0x3F); - buffer[(*index)++] = 0x80 | ((ucs_character >> 6) & 0x3F); - buffer[(*index)++] = 0x80 | ((ucs_character >> 0) & 0x3F); + text += 0xF8 | (ucs_character >> 24); + text += 0x80 | ((ucs_character >> 18) & 0x3F); + text += 0x80 | ((ucs_character >> 12) & 0x3F); + text += 0x80 | ((ucs_character >> 6) & 0x3F); + text += 0x80 | ((ucs_character >> 0) & 0x3F); } else if (ucs_character <= 0x7FFFFFFF) { // Six bytes. - buffer[(*index)++] = 0xFC | (ucs_character >> 30); - buffer[(*index)++] = 0x80 | ((ucs_character >> 24) & 0x3F); - buffer[(*index)++] = 0x80 | ((ucs_character >> 18) & 0x3F); - buffer[(*index)++] = 0x80 | ((ucs_character >> 12) & 0x3F); - buffer[(*index)++] = 0x80 | ((ucs_character >> 6) & 0x3F); - buffer[(*index)++] = 0x80 | ((ucs_character >> 0) & 0x3F); + text += 0xFC | (ucs_character >> 30); + text += 0x80 | ((ucs_character >> 24) & 0x3F); + text += 0x80 | ((ucs_character >> 18) & 0x3F); + text += 0x80 | ((ucs_character >> 12) & 0x3F); + text += 0x80 | ((ucs_character >> 6) & 0x3F); + text += 0x80 | ((ucs_character >> 0) & 0x3F); } else { // Invalid char; don't encode anything. } + + return text; } Index: libbase/utf8.h =================================================================== RCS file: /sources/gnash/gnash/libbase/utf8.h,v retrieving revision 1.4 retrieving revision 1.5 diff -u -b -r1.4 -r1.5 --- libbase/utf8.h 4 Dec 2007 11:45:24 -0000 1.4 +++ libbase/utf8.h 4 Feb 2008 15:16:54 -0000 1.5 @@ -10,6 +10,7 @@ #define UTF8_H #include "tu_config.h" // needed ? +#include <string> #include <boost/cstdint.hpp> // for boost::?int??_t @@ -17,11 +18,11 @@ namespace utf8 { // Return the next Unicode character in the UTF-8 encoded - // buffer. Invalid UTF-8 sequences produce a U+FFFD character - // as output. Advances *utf8_buffer past the character + // string. Invalid UTF-8 sequences produce a U+FFFD character + // as output. Advances string iterator past the character // returned, unless the returned character is '\0', in which - // case the buffer does not advance. - DSOEXPORT boost::uint32_t decode_next_unicode_character(const char** utf8_buffer); + // case the iterator does not advance. + DSOEXPORT boost::uint32_t decodeNextUnicodeCharacter(std::string::const_iterator& it); // Encodes the given UCS character into the given UTF-8 // buffer. Writes the data starting at buffer[offset], and @@ -29,7 +30,7 @@ // // May write up to 6 bytes, so make sure there's room in the // buffer! - DSOEXPORT void encode_unicode_character(char* buffer, int* offset, boost::uint32_t ucs_character); + DSOEXPORT std::string encodeUnicodeCharacter(boost::uint32_t ucs_character); } _______________________________________________ Gnash-commit mailing list Gnash-commit@gnu.org http://lists.gnu.org/mailman/listinfo/gnash-commit