CVSROOT: /sources/gnash Module name: gnash Changes by: Benjamin Wolsey <bwy> 08/02/05 12:01:52
Modified files: . : ChangeLog server : button_character_instance.cpp edit_text_character.cpp edit_text_character.h event_id.h gnash.h movie_root.cpp libbase : utf8.cpp utf8.h Log message: 2008-02-05 Benjamin Wolsey <[EMAIL PROTECTED]> * server/movie_root.cpp: send unique gnash::key::code not SWFCode to event_id; edit_text_character relies on this code to work out which character was passed and SWFcode is bogus. * server/event_id.h: store gnash::key::code not SWFCode; setKeyCode converts SWFCode to a corresponding gnash::key::code. * server/gnash.h: add enum for different key code types. * server/button_character_instance.cpp: lookup SWFCode from gnash::key::code. * libbase/utf8.{cpp,h}: add {decode,encode}CanonicalString methods for converting between UTF-8 encoded std::string and std::wstring. Fix some bugs in character decoding. * server/edit_text_character.{cpp.h}: use std::wstring internally, as once we receive multi-byte characters the cursor position and std::string position are different. Wide characters mean string manipulation can stay as before. Interfaces still use std::string with conversions when necessary. Make wstring methods private. set_text_value(const char*) -> setTextValue(const wstring&). Handles characters like ö or ¶. Sorry for the large commit. It was just a small bug with a lot of implications. It passes the testsuite and I think I've adapted button key events correctly, but I hope someone who knows more about them can check. CVSWeb URLs: http://cvs.savannah.gnu.org/viewcvs/gnash/ChangeLog?cvsroot=gnash&r1=1.5557&r2=1.5558 http://cvs.savannah.gnu.org/viewcvs/gnash/server/button_character_instance.cpp?cvsroot=gnash&r1=1.79&r2=1.80 http://cvs.savannah.gnu.org/viewcvs/gnash/server/edit_text_character.cpp?cvsroot=gnash&r1=1.145&r2=1.146 http://cvs.savannah.gnu.org/viewcvs/gnash/server/edit_text_character.h?cvsroot=gnash&r1=1.65&r2=1.66 http://cvs.savannah.gnu.org/viewcvs/gnash/server/event_id.h?cvsroot=gnash&r1=1.15&r2=1.16 http://cvs.savannah.gnu.org/viewcvs/gnash/server/gnash.h?cvsroot=gnash&r1=1.113&r2=1.114 http://cvs.savannah.gnu.org/viewcvs/gnash/server/movie_root.cpp?cvsroot=gnash&r1=1.157&r2=1.158 http://cvs.savannah.gnu.org/viewcvs/gnash/libbase/utf8.cpp?cvsroot=gnash&r1=1.7&r2=1.8 http://cvs.savannah.gnu.org/viewcvs/gnash/libbase/utf8.h?cvsroot=gnash&r1=1.5&r2=1.6 Patches: Index: ChangeLog =================================================================== RCS file: /sources/gnash/gnash/ChangeLog,v retrieving revision 1.5557 retrieving revision 1.5558 diff -u -b -r1.5557 -r1.5558 --- ChangeLog 5 Feb 2008 10:50:14 -0000 1.5557 +++ ChangeLog 5 Feb 2008 12:01:50 -0000 1.5558 @@ -1,3 +1,24 @@ +2008-02-05 Benjamin Wolsey <[EMAIL PROTECTED]> + + * server/movie_root.cpp: send unique gnash::key::code not SWFCode + to event_id; edit_text_character relies on this code to work out + which character was passed and SWFcode is bogus. + * server/event_id.h: store gnash::key::code not SWFCode; setKeyCode + converts SWFCode to a corresponding gnash::key::code. + * server/gnash.h: add enum for different key code types. + * server/button_character_instance.cpp: lookup SWFCode from + gnash::key::code. + * libbase/utf8.{cpp,h}: add {decode,encode}CanonicalString methods + for converting between UTF-8 encoded std::string and std::wstring. + Fix some bugs in character decoding. + * server/edit_text_character.{cpp.h}: use std::wstring internally, + as once we receive multi-byte characters the cursor position and + std::string position are different. Wide characters mean string + manipulation can stay as before. Interfaces still use std::string + with conversions when necessary. Make wstring methods private. + set_text_value(const char*) -> setTextValue(const wstring&). + Handles characters like ö or ¶. + 2008-02-05 Bastiaan Jacques <[EMAIL PROTECTED]> * libmedia/gst/VideoDecoderGst.cpp: Use a different ffdec Index: server/button_character_instance.cpp =================================================================== RCS file: /sources/gnash/gnash/server/button_character_instance.cpp,v retrieving revision 1.79 retrieving revision 1.80 diff -u -b -r1.79 -r1.80 --- server/button_character_instance.cpp 29 Jan 2008 12:31:09 -0000 1.79 +++ server/button_character_instance.cpp 5 Feb 2008 12:01:51 -0000 1.80 @@ -331,7 +331,7 @@ button_character_instance::on_event(const event_id& id) { - if( (id.m_id==event_id::KEY_PRESS) && (id.m_key_code == key::INVALID) ) + if( (id.m_id==event_id::KEY_PRESS) && (id.keyCode == key::INVALID) ) { // onKeypress only responds to valid key code return false; @@ -346,8 +346,10 @@ button_action& ba = *(m_def->m_button_actions[i]); int keycode = (ba.m_conditions & 0xFE00) >> 9; - event_id key_event(event_id::KEY_PRESS, (key::code) keycode); - if (key_event == id) + + // Test match between button action conditions and the SWF code + // that maps to id.keyCode (the gnash unique key code). + if (id.m_id == event_id::KEY_PRESS && gnash::key::codeMap[id.keyCode][key::SWF] == keycode) { // Matching action. VM::get().getRoot().pushAction(ba.m_actions, boost::intrusive_ptr<character>(this)); Index: server/edit_text_character.cpp =================================================================== RCS file: /sources/gnash/gnash/server/edit_text_character.cpp,v retrieving revision 1.145 retrieving revision 1.146 diff -u -b -r1.145 -r1.146 --- server/edit_text_character.cpp 4 Feb 2008 15:16:54 -0000 1.145 +++ server/edit_text_character.cpp 5 Feb 2008 12:01:51 -0000 1.146 @@ -17,8 +17,6 @@ // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA // -/* $Id: edit_text_character.cpp,v 1.145 2008/02/04 15:16:54 bwy Exp $ */ - #ifdef HAVE_CONFIG_H #include "gnashconfig.h" #endif @@ -373,6 +371,7 @@ edit_text_character_def* def, int id) : character(parent, id), + _text(L""), m_def(def), _font(0), m_has_focus(false), @@ -408,7 +407,8 @@ // set default text *before* calling registerTextVariable // (if the textvariable already exist and has a value // the text will be replaced with it) - set_text_value(m_def->get_default_text().c_str()); + + setTextValue(utf8::decodeCanonicalString(m_def->get_default_text())); m_dummy_style.push_back(fill_style()); @@ -590,21 +590,22 @@ case event_id::KEY_PRESS: { - std::string s(_text); - std::string c; - c = (char) id.m_key_code; + std::wstring s = _text; + + // id.keyCode is the unique gnash::key::code for a character + uint32_t c = (uint32_t) id.keyCode; - // may be _text is changed in ActionScript + // maybe _text is changed in ActionScript m_cursor = imin(m_cursor, _text.size()); - switch (c[0]) + switch (c) { case key::BACKSPACE: if (m_cursor > 0) { s.erase(m_cursor - 1, 1); m_cursor--; - set_text_value(s.c_str()); + setTextValue(s); } break; @@ -612,7 +613,7 @@ if (s.size() > m_cursor) { s.erase(m_cursor, 1); - set_text_value(s.c_str()); + setTextValue(s); } break; @@ -644,12 +645,14 @@ break; default: + wchar_t t = (wchar_t) gnash::key::codeMap[c][key::ASCII]; + if (t != 0) { - s.insert(m_cursor, c); + s.insert(m_cursor, 1, t); m_cursor++; - set_text_value(s.c_str()); - break; } + setTextValue(s); + break; } onChanged(); } @@ -692,11 +695,19 @@ } void -edit_text_character::updateText(const std::string& new_text) +edit_text_character::updateText(const std::string& str) +{ + std::wstring wstr = utf8::decodeCanonicalString(str); + updateText(wstr); +} + + +void +edit_text_character::updateText(const std::wstring& wstr) { unsigned int maxLen = m_def->get_max_length(); - std::string newText = new_text; // copy needed for eventual resize + std::wstring newText = wstr; // copy needed for eventual resize if (maxLen && newText.length() > maxLen ) { newText.resize(maxLen); @@ -718,12 +729,10 @@ } void -edit_text_character::set_text_value(const char* new_text_cstr) +edit_text_character::setTextValue(const std::wstring& wstr) { - std::string newText; - if ( new_text_cstr ) newText = new_text_cstr; - updateText(newText); + updateText(wstr); if ( ! _variable_name.empty() && _text_variable_registered ) { @@ -732,12 +741,12 @@ as_object* tgt = ref.first; if ( tgt ) { - tgt->set_member(ref.second, newText); // we shouldn't truncate, right ? + tgt->set_member(ref.second, utf8::encodeCanonicalString(wstr)); // we shouldn't truncate, right ? } else { // nothing to do (too early ?) - log_debug("set_text_value: variable name %s points to an unexisting target, I guess we would not be registered in this was true, or the sprite we've registered our variable name has been unloaded", _variable_name.c_str()); + log_debug("setTextValue: variable name %s points to an unexisting target, I guess we would not be registered in this was true, or the sprite we've registered our variable name has been unloaded", _variable_name.c_str()); } } } @@ -747,13 +756,13 @@ { // we need the const_cast here because registerTextVariable // *might* change our text value, calling the non-const - // set_text_value(). + // setTextValue(). // This happens if the TextVariable has not been already registered // and during registration comes out to name an existing variable // with a pre-existing value. const_cast<edit_text_character*>(this)->registerTextVariable(); - return _text; + return utf8::encodeCanonicalString(_text); } void @@ -774,14 +783,14 @@ //if (name == "text") { int version = get_parent()->get_movie_definition()->get_version(); - set_text_value(val.to_string_versioned(version).c_str()); + setTextValue(utf8::decodeCanonicalString(val.to_string_versioned(version))); return; } case NSV::PROP_HTML_TEXT: //if (name == "htmlText") { int version = get_parent()->get_movie_definition()->get_version(); - set_text_value(val.to_string_versioned(version).c_str()); + setTextValue(utf8::decodeCanonicalString(val.to_string_versioned(version))); format_text(); return; } @@ -1172,12 +1181,12 @@ assert(! _text.empty() ); - std::string::const_iterator it = _text.begin(); + std::wstring::const_iterator it = _text.begin(); // decodeNextUnicodeCharacter(std::string::const_iterator &it) works, // but unfortunately nothing is encoded in utf8. - while (boost::uint32_t code = utf8::decodeNextUnicodeCharacter(it)) + while (boost::uint32_t code = *it++) { if ( _embedFonts ) { @@ -1262,7 +1271,7 @@ // HTML tag, just skip it... bool closingTagFound = false; - while ( (code = utf8::decodeNextUnicodeCharacter(it)) ) + while ( (code = *it++) ) { if (code == '>') { @@ -1369,7 +1378,7 @@ //log_debug(" autoSize=NONE!"); // truncate long line, but keep expanding text box bool newlinefound = false; - while ( (code = utf8::decodeNextUnicodeCharacter(it)) ) + while ( (code = *it++ ) ) { if ( _embedFonts ) { @@ -1585,14 +1594,14 @@ #endif // TODO: pass environment to to_string ? // as_environment& env = get_environment(); - set_text_value(val.to_string().c_str()); + setTextValue(utf8::decodeCanonicalString(val.to_string())); } else { #ifdef DEBUG_DYNTEXT_VARIABLES log_msg(_("target sprite (%p) does NOT have a member named %s (no problem, we'll add it)"), (void*)sprite, _vm.getStringTable().value(key).c_str()); #endif - target->set_member(key, as_value(_text)); + target->set_member(key, as_value(utf8::encodeCanonicalString(_text))); } sprite_instance* sprite = target->to_movie(); @@ -1618,7 +1627,7 @@ { _variable_name = newname; _text_variable_registered = false; - //set_text_value(m_def->get_default_text().c_str()); + //setTextValue(m_def->get_default_text()); #ifdef DEBUG_DYNTEXT_VARIABLES log_debug("Calling updateText after change of variable name"); #endif Index: server/edit_text_character.h =================================================================== RCS file: /sources/gnash/gnash/server/edit_text_character.h,v retrieving revision 1.65 retrieving revision 1.66 diff -u -b -r1.65 -r1.66 --- server/edit_text_character.h 21 Jan 2008 20:55:50 -0000 1.65 +++ server/edit_text_character.h 5 Feb 2008 12:01:51 -0000 1.66 @@ -90,17 +90,10 @@ /// void set_variable_name(const std::string& newname); - /// Set our text to the given string. - // - /// This function will also update any registered variable - /// - void set_text_value(const char* new_text); - /// Set our text to the given string by effect of an update of a registered variable name // /// This cal only updates the text and is only meant to be called by ourselves /// or by sprite_instance when a registered TextVariable is updated. - /// void updateText(const std::string& s); /// Return value of our text. @@ -285,6 +278,19 @@ private: + /// Set our text to the given string. + // + /// This function will also update any registered variable + /// + void setTextValue(const std::wstring& wstr); + + /// Set our text to the given string by effect of an update of a registered variable name + // + /// This cal only updates the text and is only meant to be called by ourselves + /// or by sprite_instance when a registered TextVariable is updated. + /// + void updateText(const std::wstring& s); + /// Set focus void setFocus(); @@ -300,8 +306,10 @@ /// Call this function when willing to invoke the onKillFocus event handler void onKillFocus(); - /// The actual text - std::string _text; + /// The actual text. Because we have to deal with non-ascii characters (129-255) + /// this is a wide string; the cursor position and the position within the string + /// are then the same, which makes manipulating the string much easier. + std::wstring _text; /// immutable definition of this object, as read /// from the SWF stream. Assured to be not-NULL Index: server/event_id.h =================================================================== RCS file: /sources/gnash/gnash/server/event_id.h,v retrieving revision 1.15 retrieving revision 1.16 diff -u -b -r1.15 -r1.16 --- server/event_id.h 21 Jan 2008 20:55:50 -0000 1.15 +++ server/event_id.h 5 Feb 2008 12:01:51 -0000 1.16 @@ -29,11 +29,8 @@ #include "gnash.h" // for gnash::key namespace -#include <cwchar> - namespace gnash { - /// For keyDown and stuff like that. // /// Implementation is currently in action.cpp @@ -95,30 +92,48 @@ }; id_code m_id; - unsigned char m_key_code; - event_id() : m_id(INVALID), m_key_code(key::INVALID) {} + // keyCode must be the unique gnash key identifier + // gnash::key::code. + // edit_text_character has to be able to work out the + // ASCII value from keyCode, while other users need + // the SWF code or the Flash key code. + key::code keyCode; + + event_id() : m_id(INVALID), keyCode(key::INVALID) {} - event_id(id_code id, unsigned char c = key::INVALID) + event_id(id_code id, key::code c = key::INVALID) : m_id(id), - m_key_code(c) + keyCode(c) { // you must supply a key code for KEY_PRESS event // - // we do have a testcase with m_id == KEY_PRESS, and m_key_code==0(KEY_INVALID) + // we do have a testcase with m_id == KEY_PRESS, and keyCode==0(KEY_INVALID) // see key_event_test.swf(produced by Ming) // - //assert((m_key_code == key::INVALID && (m_id != KEY_PRESS)) - // || (m_key_code != key::INVALID && (m_id == KEY_PRESS))); + //assert((keyCode == key::INVALID && (m_id != KEY_PRESS)) + // || (keyCode != key::INVALID && (m_id == KEY_PRESS))); } - void setKeyCode(unsigned char key) + /// + /// @param SWFKey The SWF code matched to the event. This + /// must be converted to a unique gnash::key::code. + void setKeyCode(boost::uint8_t SWFkey) { - m_key_code = key; + // Lookup the SWFcode in the gnash::key::code table. + // Some are not unique (keypad numbers are the + // same as normal numbers), so we take the first match. + // As long as we can work out the SWFCode from the + // gnash::key::code it's all right. + int i = 0; + while (key::codeMap[i][key::SWF] != SWFkey && i < key::KEYCOUNT) i++; + + if (i == key::KEYCOUNT) keyCode = key::INVALID; + else keyCode = (key::code)i; } - bool operator==(const event_id& id) const { return m_id == id.m_id && m_key_code == id.m_key_code; } + bool operator==(const event_id& id) const { return m_id == id.m_id && keyCode == id.keyCode; } bool operator< (const event_id& id) const { @@ -126,7 +141,7 @@ if ( m_id > id.m_id ) return false; // m_id are equal, check key code - if ( m_key_code < id.m_key_code ) return true; + if ( keyCode < id.keyCode ) return true; return false; } Index: server/gnash.h =================================================================== RCS file: /sources/gnash/gnash/server/gnash.h,v retrieving revision 1.113 retrieving revision 1.114 diff -u -b -r1.113 -r1.114 --- server/gnash.h 21 Jan 2008 20:55:51 -0000 1.113 +++ server/gnash.h 5 Feb 2008 12:01:51 -0000 1.114 @@ -637,7 +637,15 @@ KEYCOUNT }; -const unsigned char codeMap[KEYCOUNT][3] = { +enum type +{ + SWF, + KEY, + ASCII, + TYPES +}; + +const unsigned char codeMap[KEYCOUNT][TYPES] = { //{swfKeyCode, keycode, asciiKeyCode} {0, 0, 0}, // INVALID = 0 {0, 0, 0}, // UNKNOWN1 Index: server/movie_root.cpp =================================================================== RCS file: /sources/gnash/gnash/server/movie_root.cpp,v retrieving revision 1.157 retrieving revision 1.158 diff -u -b -r1.157 -r1.158 --- server/movie_root.cpp 31 Jan 2008 13:41:32 -0000 1.157 +++ server/movie_root.cpp 5 Feb 2008 12:01:51 -0000 1.158 @@ -1133,7 +1133,8 @@ { // KEY_UP and KEY_DOWN events are unrelated to any key! ch->on_event(event_id(event_id::KEY_DOWN, key::INVALID)); - ch->on_event(event_id(event_id::KEY_PRESS, key::codeMap[k][0])); + // Pass the unique Gnash key code! + ch->on_event(event_id(event_id::KEY_PRESS, k)); } else { Index: libbase/utf8.cpp =================================================================== RCS file: /sources/gnash/gnash/libbase/utf8.cpp,v retrieving revision 1.7 retrieving revision 1.8 diff -u -b -r1.7 -r1.8 --- libbase/utf8.cpp 4 Feb 2008 15:16:54 -0000 1.7 +++ libbase/utf8.cpp 5 Feb 2008 12:01:51 -0000 1.8 @@ -1,15 +1,58 @@ -// utf8.cpp -- Thatcher Ulrich 2004 -*- coding: utf-8;-*- - -// This source code has been donated to the Public Domain. Do -// whatever you want with it. THE AUTHOR DOES NOT WARRANT THIS CODE. - -// Utility code for dealing with UTF-8 encoded text. +// utf8.cpp: utilities for converting to and from UTF-8 +// +// Copyright (C) 2008 Free Software Foundation, Inc. +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +// +// Based on the public domain work of Thatcher Ulrich <[EMAIL PROTECTED]> 2004 // // Much useful info at "UTF-8 and Unicode FAQ" http://www.cl.cam.ac.uk/~mgk25/unicode.html #include "utf8.h" +std::wstring +utf8::decodeCanonicalString(const std::string& str) +{ + std::wstring wstr = L""; + + std::string::const_iterator it = str.begin(); + while (boost::uint32_t code = utf8::decodeNextUnicodeCharacter(it)) + { + wstr.push_back((wchar_t) code); + } + + return wstr; + +} + +std::string +utf8::encodeCanonicalString(const std::wstring& wstr) +{ + std::string str = ""; + + std::wstring::const_iterator it = wstr.begin(); + while ( it != wstr.end()) + { + str.append(utf8::encodeUnicodeCharacter(*it++)); + } + + return str; + +} + boost::uint32_t utf8::decodeNextUnicodeCharacter(std::string::const_iterator& it) { boost::uint32_t uc; @@ -104,6 +147,7 @@ else { // Invalid. + it++; return INVALID; } } @@ -119,47 +163,47 @@ if (ucs_character <= 0x7F) { // Plain single-byte ASCII. - text += (char) ucs_character; + text.push_back(ucs_character); } else if (ucs_character <= 0x7FF) { // Two bytes. - text += 0xC0 | (ucs_character >> 6); - text += 0x80 | ((ucs_character >> 0) & 0x3F); + text.push_back(0xC0 | (ucs_character >> 6)); + text.push_back(0x80 | ((ucs_character >> 0) & 0x3F)); } else if (ucs_character <= 0xFFFF) { // Three bytes. - text += 0xE0 | (ucs_character >> 12); - text += 0x80 | ((ucs_character >> 6) & 0x3F); - text += 0x80 | ((ucs_character >> 0) & 0x3F); + text.push_back(0xE0 | (ucs_character >> 12)); + text.push_back(0x80 | ((ucs_character >> 6) & 0x3F)); + text.push_back(0x80 | ((ucs_character >> 0) & 0x3F)); } else if (ucs_character <= 0x1FFFFF) { // Four bytes. - text += 0xF0 | (ucs_character >> 18); - text += 0x80 | ((ucs_character >> 12) & 0x3F); - text += 0x80 | ((ucs_character >> 6) & 0x3F); - text += 0x80 | ((ucs_character >> 0) & 0x3F); + text.push_back(0xF0 | (ucs_character >> 18)); + text.push_back(0x80 | ((ucs_character >> 12) & 0x3F)); + text.push_back(0x80 | ((ucs_character >> 6) & 0x3F)); + text.push_back(0x80 | ((ucs_character >> 0) & 0x3F)); } else if (ucs_character <= 0x3FFFFFF) { // Five bytes. - text += 0xF8 | (ucs_character >> 24); - text += 0x80 | ((ucs_character >> 18) & 0x3F); - text += 0x80 | ((ucs_character >> 12) & 0x3F); - text += 0x80 | ((ucs_character >> 6) & 0x3F); - text += 0x80 | ((ucs_character >> 0) & 0x3F); + text.push_back(0xF8 | (ucs_character >> 24)); + text.push_back(0x80 | ((ucs_character >> 18) & 0x3F)); + text.push_back(0x80 | ((ucs_character >> 12) & 0x3F)); + text.push_back(0x80 | ((ucs_character >> 6) & 0x3F)); + text.push_back(0x80 | ((ucs_character >> 0) & 0x3F)); } else if (ucs_character <= 0x7FFFFFFF) { // Six bytes. - text += 0xFC | (ucs_character >> 30); - text += 0x80 | ((ucs_character >> 24) & 0x3F); - text += 0x80 | ((ucs_character >> 18) & 0x3F); - text += 0x80 | ((ucs_character >> 12) & 0x3F); - text += 0x80 | ((ucs_character >> 6) & 0x3F); - text += 0x80 | ((ucs_character >> 0) & 0x3F); + text.push_back(0xFC | (ucs_character >> 30)); + text.push_back(0x80 | ((ucs_character >> 24) & 0x3F)); + text.push_back(0x80 | ((ucs_character >> 18) & 0x3F)); + text.push_back(0x80 | ((ucs_character >> 12) & 0x3F)); + text.push_back(0x80 | ((ucs_character >> 6) & 0x3F)); + text.push_back(0x80 | ((ucs_character >> 0) & 0x3F)); } else { @@ -169,199 +213,6 @@ return text; } - -#ifdef UTF8_UNIT_TEST - -// Compile this test case with something like: -// -// c++ utf8.cpp -g -I.. -DUTF8_UNIT_TEST -o utf8_test -// -// or -// -// cl utf8.cpp -Zi -Od -DUTF8_UNIT_TEST -I.. -// -// If possible, try running the test program with the first arg -// pointing at the file: -// -// http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt -// -// and examine the results by eye to make sure they are acceptable to -// you. - - -#include "utility.h" -#include <cstdio> - - -bool check_equal(const char* utf8_in, const boost::uint32_t* ucs_in) -{ - for (;;) - { - boost::uint32_t next_ucs = *ucs_in++; - boost::uint32_t next_ucs_from_utf8 = utf8::decode_next_unicode_character(&utf8_in); - if (next_ucs != next_ucs_from_utf8) - { - return false; - } - if (next_ucs == 0) - { - assert(next_ucs_from_utf8 == 0); - break; - } - } - - return true; -} - - -void log_ascii(const char* line) -{ - for (;;) - { - unsigned char c = (unsigned char) *line++; - if (c == 0) - { - // End of line. - return; - } - else if (c != '\n' - && (c < 32 || c > 127)) - { - // Non-printable as plain ASCII. - printf("<0x%02X>", (int) c); - } - else - { - printf("%c", c); - } - } -} - - -void log_ucs(const boost::uint32_t* line) -{ - for (;;) - { - boost::uint32_t uc = *line++; - if (uc == 0) - { - // End of line. - return; - } - else if (uc != '\n' - && (uc < 32 || uc > 127)) - { - // Non-printable as plain ASCII. - printf("<U-%04X>", uc); - } - else - { - printf("%c", (char) uc); - } - } -} - - -int main(int argc, const char* argv[]) -{ - // Simple canned test. - { - const char* test8 = "Ignacio Castaño"; - const boost::uint32_t test32[] = - { - 0x49, 0x67, 0x6E, 0x61, 0x63, - 0x69, 0x6F, 0x20, 0x43, 0x61, - 0x73, 0x74, 0x61, 0xF1, 0x6F, - 0x00 - }; - - assert(check_equal(test8, test32)); - } - - // If user passed an arg, try reading the file as UTF-8 encoded text. - if (argc > 1) - { - const char* filename = argv[1]; - FILE* fp = fopen(filename, "rb"); - if (fp == NULL) - { - printf("Can't open file '%s'\n", filename); - return 1; - } - - // Read lines from the file, encode/decode them, and highlight discrepancies. - const int LINE_SIZE = 200; // max line size - char line_buffer_utf8[LINE_SIZE]; - char reencoded_utf8[6 * LINE_SIZE]; - boost::uint32_t line_buffer_ucs[LINE_SIZE]; - - int byte_counter = 0; - for (;;) - { - int c = fgetc(fp); - if (c == EOF) - { - // Done. - break; - } - line_buffer_utf8[byte_counter++] = c; - if (c == '\n' || byte_counter >= LINE_SIZE - 2) - { - // End of line. Process the line. - line_buffer_utf8[byte_counter++] = '\0'; // terminate. - - // Decode into UCS. - const char* p = line_buffer_utf8; - boost::uint32_t* q = line_buffer_ucs; - for (;;) - { - boost::uint32_t uc = utf8::decode_next_unicode_character(&p); - *q++ = uc; - - assert(q < line_buffer_ucs + LINE_SIZE); - assert(p < line_buffer_utf8 + LINE_SIZE); - - if (uc == 0) break; - } - - // Encode back into UTF-8. - q = line_buffer_ucs; - int index = 0; - for (;;) - { - boost::uint32_t uc = *q++; - assert(index < LINE_SIZE * 6 - 6); - int last_index = index; - utf8::encode_unicode_character(reencoded_utf8, &index, uc); - assert(index <= last_index + 6); - if (uc == 0) break; - } - -// This can be useful for debugging. -#if 0 - // Show the UCS and the re-encoded UTF-8. - log_ucs(line_buffer_ucs); - log_ascii(reencoded_utf8); -#endif // 0 - - assert(check_equal(line_buffer_utf8, line_buffer_ucs)); - assert(check_equal(reencoded_utf8, line_buffer_ucs)); - - // Start next line. - byte_counter = 0; - } - } - - fclose(fp); - } - - return 0; -} - - -#endif // UTF8_UNIT_TEST - - // Local Variables: // mode: C++ // c-basic-offset: 8 Index: libbase/utf8.h =================================================================== RCS file: /sources/gnash/gnash/libbase/utf8.h,v retrieving revision 1.5 retrieving revision 1.6 diff -u -b -r1.5 -r1.6 --- libbase/utf8.h 4 Feb 2008 15:16:54 -0000 1.5 +++ libbase/utf8.h 5 Feb 2008 12:01:52 -0000 1.6 @@ -1,28 +1,46 @@ -// utf8.h -- Thatcher Ulrich <[EMAIL PROTECTED]> 2004 - -// This source code has been donated to the Public Domain. Do -// whatever you want with it. THE AUTHOR DOES NOT WARRANT THIS CODE. - -// Utility code for dealing with UTF-8 encoded text. - +// utf8.h: utilities for converting to and from UTF-8 +// +// Copyright (C) 2008 Free Software Foundation, Inc. +// +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation; either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program; if not, write to the Free Software +// Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +// +// Based on the public domain work of Thatcher Ulrich <[EMAIL PROTECTED]> 2004 #ifndef UTF8_H #define UTF8_H -#include "tu_config.h" // needed ? +#include "tu_config.h" // For DSOEXPORT #include <string> - #include <boost/cstdint.hpp> // for boost::?int??_t namespace utf8 { + // Converts a UTF-8 encoded std::string with multibyte characters into + // a std::wstring. + DSOEXPORT std::wstring decodeCanonicalString(const std::string& str); + + // Converts a std::wstring into a UTF-8 encoded std::string. + DSOEXPORT std::string encodeCanonicalString(const std::wstring& wstr); + // Return the next Unicode character in the UTF-8 encoded // string. Invalid UTF-8 sequences produce a U+FFFD character // as output. Advances string iterator past the character // returned, unless the returned character is '\0', in which // case the iterator does not advance. - DSOEXPORT boost::uint32_t decodeNextUnicodeCharacter(std::string::const_iterator& it); + boost::uint32_t decodeNextUnicodeCharacter(std::string::const_iterator& it); // Encodes the given UCS character into the given UTF-8 // buffer. Writes the data starting at buffer[offset], and @@ -30,7 +48,7 @@ // // May write up to 6 bytes, so make sure there's room in the // buffer! - DSOEXPORT std::string encodeUnicodeCharacter(boost::uint32_t ucs_character); + std::string encodeUnicodeCharacter(boost::uint32_t ucs_character); } _______________________________________________ Gnash-commit mailing list Gnash-commit@gnu.org http://lists.gnu.org/mailman/listinfo/gnash-commit