[Gnash-commit] gnash ChangeLog server/edit_text_character.cpp ...

Benjamin Wolsey Mon, 04 Feb 2008 07:24:32 -0800

CVSROOT:        /sources/gnash
Module name:    gnash
Changes by:     Benjamin Wolsey <bwy>   08/02/04 15:16:55


Modified files:
        .              : ChangeLog 
        server         : edit_text_character.cpp 
        libbase        : utf8.cpp utf8.h 

Log message:
                * libbase/utf8.{h,cpp}: make decodeNextUnicodeCharacter take a 
                  std::string::const_iterator by ref, encodeUnicodeCharacter
                  (presently unused) return a std::string.
                * server/edit_text_character.cpp: use 
decodeNextUnicodeCharacter.
        
        decodeNextUnicodeCharacter works if it's passed a unicode character, 
but this never seems to be the case (it's only used in 
edit_text_character.cpp). It may be possible to use this for string.cpp (whose 
methods are largely bogus for non-ASCII strings).

CVSWeb URLs:
http://cvs.savannah.gnu.org/viewcvs/gnash/ChangeLog?cvsroot=gnash&r1=1.5554&r2=1.5555
http://cvs.savannah.gnu.org/viewcvs/gnash/server/edit_text_character.cpp?cvsroot=gnash&r1=1.144&r2=1.145
http://cvs.savannah.gnu.org/viewcvs/gnash/libbase/utf8.cpp?cvsroot=gnash&r1=1.6&r2=1.7
http://cvs.savannah.gnu.org/viewcvs/gnash/libbase/utf8.h?cvsroot=gnash&r1=1.4&r2=1.5

Patches:
Index: ChangeLog
===================================================================
RCS file: /sources/gnash/gnash/ChangeLog,v
retrieving revision 1.5554
retrieving revision 1.5555
diff -u -b -r1.5554 -r1.5555
--- ChangeLog   4 Feb 2008 11:32:25 -0000       1.5554
+++ ChangeLog   4 Feb 2008 15:16:52 -0000       1.5555
@@ -1,3 +1,10 @@
+2008-02-04 Benjamin Wolsey <[EMAIL PROTECTED]>
+
+       * libbase/utf8.{h,cpp}: make decodeNextUnicodeCharacter take a 
+         std::string::const_iterator by ref, encodeUnicodeCharacter
+         (presently unused) return a std::string.
+       * server/edit_text_character.cpp: use decodeNextUnicodeCharacter.
+
 2008-02-04 Sandro Santilli <[EMAIL PROTECTED]>
 
        * server/as_object.cpp (getOwnProperty): remove useless round

Index: server/edit_text_character.cpp
===================================================================
RCS file: /sources/gnash/gnash/server/edit_text_character.cpp,v
retrieving revision 1.144
retrieving revision 1.145
diff -u -b -r1.144 -r1.145
--- server/edit_text_character.cpp      2 Feb 2008 08:51:52 -0000       1.144
+++ server/edit_text_character.cpp      4 Feb 2008 15:16:54 -0000       1.145
@@ -17,7 +17,7 @@
 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 //
 
-/* $Id: edit_text_character.cpp,v 1.144 2008/02/02 08:51:52 strk Exp $ */
+/* $Id: edit_text_character.cpp,v 1.145 2008/02/04 15:16:54 bwy Exp $ */
 
 #ifdef HAVE_CONFIG_H
 #include "gnashconfig.h"
@@ -1104,7 +1104,6 @@
        // nothing more to do if text is empty
        if ( _text.empty() ) return;
 
-
        AutoSizeValue autoSize = getAutoSize();
        if ( autoSize != autoSizeNone )
        {
@@ -1172,8 +1171,13 @@
        m_ycursor = y;
 
        assert(! _text.empty() );
-       const char*     text = &_text[0]; 
-       while (boost::uint32_t code = 
utf8::decode_next_unicode_character(&text))
+
+       std::string::const_iterator it = _text.begin();
+       
+       // decodeNextUnicodeCharacter(std::string::const_iterator &it) works,
+       // but unfortunately nothing is encoded in utf8.
+       
+       while (boost::uint32_t code = utf8::decodeNextUnicodeCharacter(it))
        {
                if ( _embedFonts )
                {
@@ -1258,7 +1262,7 @@
 
                        // HTML tag, just skip it...
                        bool closingTagFound = false;
-                       while ( (code = 
utf8::decode_next_unicode_character(&text)) )
+                       while ( (code = utf8::decodeNextUnicodeCharacter(it)) )
                        {
                                if (code == '>')
                                {
@@ -1365,7 +1369,7 @@
                                        //log_debug(" autoSize=NONE!");
                                        // truncate long line, but keep 
expanding text box
                                        bool newlinefound = false;
-                                       while ( (code = 
utf8::decode_next_unicode_character(&text)) )
+                                       while ( (code = 
utf8::decodeNextUnicodeCharacter(it)) )
                                        {
                                                if ( _embedFonts )
                                                {

Index: libbase/utf8.cpp
===================================================================
RCS file: /sources/gnash/gnash/libbase/utf8.cpp,v
retrieving revision 1.6
retrieving revision 1.7
diff -u -b -r1.6 -r1.7
--- libbase/utf8.cpp    4 Dec 2007 11:45:24 -0000       1.6
+++ libbase/utf8.cpp    4 Feb 2008 15:16:54 -0000       1.7
@@ -10,11 +10,9 @@
 
 #include "utf8.h"
 
-
-boost::uint32_t        utf8::decode_next_unicode_character(const char** 
utf8_buffer)
+boost::uint32_t        
utf8::decodeNextUnicodeCharacter(std::string::const_iterator& it)
 {
        boost::uint32_t uc;
-       char    c;
 
        // Security considerations:
        //
@@ -35,23 +33,23 @@
 #define INVALID 0x0FFFD
 
 #define FIRST_BYTE(mask, shift)                \
-       uc = (c & (mask)) << (shift);
+       /* Post-increment iterator */ \
+       uc = (*it++ & (mask)) << (shift);
 
 #define NEXT_BYTE(shift)                                               \
-       c = **utf8_buffer;                                              \
-       if (c == 0) return 0; /* end of buffer, do not advance */       \
-       if ((c & 0xC0) != 0x80) return INVALID; /* standard check */    \
-       (*utf8_buffer)++;                                               \
-       uc |= (c & 0x3F) << shift;
+                                       \
+       if (*it == 0) return 0; /* end of buffer, do not advance */     \
+       if ((*it & 0xC0) != 0x80) return INVALID; /* standard check */  \
+       /* Post-increment iterator: */          \
+       uc |= (*it++ & 0x3F) << shift;
 
-       c = **utf8_buffer;
-       if (c == 0) return 0;   // End of buffer.  Do not advance.
+       if (*it == 0) return 0; // End of buffer.  Do not advance.
 
-       (*utf8_buffer)++;
-       if ((c & 0x80) == 0) return (boost::uint32_t) c;        // Conventional 
7-bit ASCII.
+       // Conventional 7-bit ASCII; return and increment iterator:
+       if ((*it & 0x80) == 0) return (boost::uint32_t) *it++;
 
-       // Multi-byte sequences.
-       if ((c & 0xE0) == 0xC0)
+       // Multi-byte sequences
+       if ((*it & 0xE0) == 0xC0)
        {
                // Two-byte sequence.
                FIRST_BYTE(0x1F, 6);
@@ -59,7 +57,7 @@
                if (uc < 0x80) return INVALID;  // overlong
                return uc;
        }
-       else if ((c & 0xF0) == 0xE0)
+       else if ((*it & 0xF0) == 0xE0)
        {
                // Three-byte sequence.
                FIRST_BYTE(0x0F, 12);
@@ -70,7 +68,7 @@
                if (uc == 0x0FFFE || uc == 0x0FFFF) return INVALID;     // not 
valid ISO 10646
                return uc;
        }
-       else if ((c & 0xF8) == 0xF0)
+       else if ((*it & 0xF8) == 0xF0)
        {
                // Four-byte sequence.
                FIRST_BYTE(0x07, 18);
@@ -80,7 +78,7 @@
                if (uc < 0x010000) return INVALID;      // overlong
                return uc;
        }
-       else if ((c & 0xFC) == 0xF8)
+       else if ((*it & 0xFC) == 0xF8)
        {
                // Five-byte sequence.
                FIRST_BYTE(0x03, 24);
@@ -91,7 +89,7 @@
                if (uc < 0x0200000) return INVALID;     // overlong
                return uc;
        }
-       else if ((c & 0xFE) == 0xFC)
+       else if ((*it & 0xFE) == 0xFC)
        {
                // Six-byte sequence.
                FIRST_BYTE(0x01, 30);
@@ -110,58 +108,65 @@
        }
 }
 
+// TODO: buffer as std::string; index (iterator); 
 
-void   utf8::encode_unicode_character(char* buffer, int* index, 
boost::uint32_t ucs_character)
+std::string
+utf8::encodeUnicodeCharacter(boost::uint32_t ucs_character)
 {
+
+       std::string text = "";
+
        if (ucs_character <= 0x7F)
        {
                // Plain single-byte ASCII.
-               buffer[(*index)++] = (char) ucs_character;
+               text += (char) ucs_character;
        }
        else if (ucs_character <= 0x7FF)
        {
                // Two bytes.
-               buffer[(*index)++] = 0xC0 | (ucs_character >> 6);
-               buffer[(*index)++] = 0x80 | ((ucs_character >> 0) & 0x3F);
+               text += 0xC0 | (ucs_character >> 6);
+               text += 0x80 | ((ucs_character >> 0) & 0x3F);
        }
        else if (ucs_character <= 0xFFFF)
        {
                // Three bytes.
-               buffer[(*index)++] = 0xE0 | (ucs_character >> 12);
-               buffer[(*index)++] = 0x80 | ((ucs_character >> 6) & 0x3F);
-               buffer[(*index)++] = 0x80 | ((ucs_character >> 0) & 0x3F);
+               text += 0xE0 | (ucs_character >> 12);
+               text += 0x80 | ((ucs_character >> 6) & 0x3F);
+               text += 0x80 | ((ucs_character >> 0) & 0x3F);
        }
        else if (ucs_character <= 0x1FFFFF)
        {
                // Four bytes.
-               buffer[(*index)++] = 0xF0 | (ucs_character >> 18);
-               buffer[(*index)++] = 0x80 | ((ucs_character >> 12) & 0x3F);
-               buffer[(*index)++] = 0x80 | ((ucs_character >> 6) & 0x3F);
-               buffer[(*index)++] = 0x80 | ((ucs_character >> 0) & 0x3F);
+               text += 0xF0 | (ucs_character >> 18);
+               text += 0x80 | ((ucs_character >> 12) & 0x3F);
+               text += 0x80 | ((ucs_character >> 6) & 0x3F);
+               text += 0x80 | ((ucs_character >> 0) & 0x3F);
        }
        else if (ucs_character <= 0x3FFFFFF)
        {
                // Five bytes.
-               buffer[(*index)++] = 0xF8 | (ucs_character >> 24);
-               buffer[(*index)++] = 0x80 | ((ucs_character >> 18) & 0x3F);
-               buffer[(*index)++] = 0x80 | ((ucs_character >> 12) & 0x3F);
-               buffer[(*index)++] = 0x80 | ((ucs_character >> 6) & 0x3F);
-               buffer[(*index)++] = 0x80 | ((ucs_character >> 0) & 0x3F);
+               text += 0xF8 | (ucs_character >> 24);
+               text += 0x80 | ((ucs_character >> 18) & 0x3F);
+               text += 0x80 | ((ucs_character >> 12) & 0x3F);
+               text += 0x80 | ((ucs_character >> 6) & 0x3F);
+               text += 0x80 | ((ucs_character >> 0) & 0x3F);
        }
        else if (ucs_character <= 0x7FFFFFFF)
        {
                // Six bytes.
-               buffer[(*index)++] = 0xFC | (ucs_character >> 30);
-               buffer[(*index)++] = 0x80 | ((ucs_character >> 24) & 0x3F);
-               buffer[(*index)++] = 0x80 | ((ucs_character >> 18) & 0x3F);
-               buffer[(*index)++] = 0x80 | ((ucs_character >> 12) & 0x3F);
-               buffer[(*index)++] = 0x80 | ((ucs_character >> 6) & 0x3F);
-               buffer[(*index)++] = 0x80 | ((ucs_character >> 0) & 0x3F);
+               text += 0xFC | (ucs_character >> 30);
+               text += 0x80 | ((ucs_character >> 24) & 0x3F);
+               text += 0x80 | ((ucs_character >> 18) & 0x3F);
+               text += 0x80 | ((ucs_character >> 12) & 0x3F);
+               text += 0x80 | ((ucs_character >> 6) & 0x3F);
+               text += 0x80 | ((ucs_character >> 0) & 0x3F);
        }
        else
        {
                // Invalid char; don't encode anything.
        }
+       
+       return text;
 }
 
 

Index: libbase/utf8.h
===================================================================
RCS file: /sources/gnash/gnash/libbase/utf8.h,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -b -r1.4 -r1.5
--- libbase/utf8.h      4 Dec 2007 11:45:24 -0000       1.4
+++ libbase/utf8.h      4 Feb 2008 15:16:54 -0000       1.5
@@ -10,6 +10,7 @@
 #define UTF8_H
 
 #include "tu_config.h" // needed ?
+#include <string>
 
 #include <boost/cstdint.hpp> // for boost::?int??_t
 
@@ -17,11 +18,11 @@
 namespace utf8
 {
        // Return the next Unicode character in the UTF-8 encoded
-       // buffer.  Invalid UTF-8 sequences produce a U+FFFD character
-       // as output.  Advances *utf8_buffer past the character
+       // string.  Invalid UTF-8 sequences produce a U+FFFD character
+       // as output.  Advances string iterator past the character
        // returned, unless the returned character is '\0', in which
-       // case the buffer does not advance.
-       DSOEXPORT boost::uint32_t       decode_next_unicode_character(const 
char** utf8_buffer);
+       // case the iterator does not advance.
+       DSOEXPORT boost::uint32_t 
decodeNextUnicodeCharacter(std::string::const_iterator& it);
 
        // Encodes the given UCS character into the given UTF-8
        // buffer.  Writes the data starting at buffer[offset], and
@@ -29,7 +30,7 @@
        //
        // May write up to 6 bytes, so make sure there's room in the
        // buffer!
-       DSOEXPORT void  encode_unicode_character(char* buffer, int* offset, 
boost::uint32_t ucs_character);
+       DSOEXPORT std::string encodeUnicodeCharacter(boost::uint32_t 
ucs_character);
 }
 
 


_______________________________________________
Gnash-commit mailing list
Gnash-commit@gnu.org
http://lists.gnu.org/mailman/listinfo/gnash-commit

[Gnash-commit] gnash ChangeLog server/edit_text_character.cpp ...

Reply via email to