CVSROOT: /sources/gnash Module name: gnash Changes by: Chad Musick <cmusick> 07/08/20 03:25:10
Modified files: server/vm : ASHandlers.cpp ASHandlers.h Log message: Adding the multibyte string operators. This behavior currently has no testcases. There may be a more efficient way to know rather than discover encoding, but I have not found it. These operators are deprected since SWF 5. CVSWeb URLs: http://cvs.savannah.gnu.org/viewcvs/gnash/server/vm/ASHandlers.cpp?cvsroot=gnash&r1=1.122&r2=1.123 http://cvs.savannah.gnu.org/viewcvs/gnash/server/vm/ASHandlers.h?cvsroot=gnash&r1=1.8&r2=1.9 Patches: Index: ASHandlers.cpp =================================================================== RCS file: /sources/gnash/gnash/server/vm/ASHandlers.cpp,v retrieving revision 1.122 retrieving revision 1.123 diff -u -b -r1.122 -r1.123 --- ASHandlers.cpp 19 Aug 2007 22:50:16 -0000 1.122 +++ ASHandlers.cpp 20 Aug 2007 03:25:09 -0000 1.123 @@ -17,7 +17,7 @@ // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA // -/* $Id: ASHandlers.cpp,v 1.122 2007/08/19 22:50:16 strk Exp $ */ +/* $Id: ASHandlers.cpp,v 1.123 2007/08/20 03:25:09 cmusick Exp $ */ #ifdef HAVE_CONFIG_H #include "config.h" @@ -55,6 +55,7 @@ #include <set> #include <vector> #include <utility> // for std::pair +#include <locale.h> using namespace std; @@ -1376,12 +1377,115 @@ env.top(0).set_int(tu_random::next_random() % max); } +as_encoding_guess_t +SWFHandlers::GuessEncoding(std::string &str, int &length, int *offsets) +{ + const char *cstr = str.c_str(); + const char *i = cstr; + int width = 0; // The remaining width, not the total. + bool is_sought = true; + int j; + int index = 0; + + length = 0; + // First, assume it's UTF8 and try to be wrong. + for (index = 0; is_sought && *i != '\0'; ++i, ++index) + { + j = static_cast<int> (*i); + + if (width) + { + --width; + if ((j & 0xB0) != 0x80) + is_sought = false; + continue; + } + ++length; + *(offsets + length - 1) = index; + + if ((j & 0xC0) == 0x80) + continue; // A 1 byte character. + else if ((j & 0xE0) == 0xC0) + width = 1; + else if ((j & 0xF0) == 0xE0) + width = 2; + else if ((j & 0xF8) == 0xF0) + width = 3; + else if (j & 0x80) + is_sought = false; + } + *(offsets + length - 1) = index; + if (!width && is_sought) // No width left, so it's almost certainly UTF8. + return ENCGUESS_UNICODE; + + is_sought = true; + width = 0; + length = 0; + bool was_odd = true; + bool was_even = true; + // Now, assume it's SHIFT_JIS and try to be wrong. + for (index = 0, i = cstr; is_sought && (*i != '\0'); ++i, ++index) + { + j = static_cast<int> (*i); + + if (width) + { + --width; + if ((j < 0x40) || ((j < 0x9F) && was_even) || + ((j > 0x9E) && was_odd) || (j == 0x7F)) + { + is_sought = false; + } + continue; + } + + ++length; + *(offsets + length - 1) = index; + + if ((j == 0x80) || (j == 0xA0) || (j >= 0xF0)) + { + is_sought = false; + break; + } + + if (((j >= 0x81) && (j <= 0x9F)) || ((j >= 0xE0) && (j <= 0xEF))) + { + width = 1; + was_odd = j & 0x01; + was_even = !was_odd; + } + + } + *(offsets + length - 1) = index; + if (!width && is_sought) // No width left, so it's probably SHIFT_JIS. + return ENCGUESS_JIS; + + // It's something else. + length = mbstowcs(NULL, cstr, 0); + if (length == -1) + length = strlen(cstr); + return ENCGUESS_OTHER; +} + void -SWFHandlers::ActionMbLength(ActionExec& /*thread*/) +SWFHandlers::ActionMbLength(ActionExec& thread) { // GNASH_REPORT_FUNCTION; -// as_environment& env = thread.env; - log_unimpl (__PRETTY_FUNCTION__); + as_environment& env = thread.env; + + thread.ensureStack(1); + string str = env.top(0).to_string(&env); + + if (str.empty()) + { + env.top(0).set_int(0); + } + else + { + int length; + (void) GuessEncoding(str, length, NULL); + env.top(0).set_int(length); + } } void @@ -1423,11 +1527,76 @@ } void -SWFHandlers::ActionMbSubString(ActionExec& /*thread*/) +SWFHandlers::ActionMbSubString(ActionExec& thread) { // GNASH_REPORT_FUNCTION; -// as_environment& env = thread.env; - log_unimpl (__PRETTY_FUNCTION__); + as_environment& env = thread.env; + + thread.ensureStack(3); + + int size = env.top(0).to_int(env); + int start = env.top(1).to_int(env); + as_value& string_val = env.top(2); + + env.drop(2); + + if (string_val.is_undefined() || string_val.is_null()) + { + log_error(_("Undefined or null string passed to ActionMBSubString, " + "returning undefined")); + env.top(0).set_undefined(); + return; + } + + if (size < 1) + { + if (size < 0) + { + IF_VERBOSE_ASCODING_ERRORS( + log_aserror(_("Length is less than 1 in ActionMbSubString, " + "returning empty string.")); + ); + } + env.top(0).set_string(""); + return; + } + + string str = string_val.to_string(&env); + int length = 0; + int offsets[str.length() + 1]; + + as_encoding_guess_t encoding = GuessEncoding(str, length, offsets); + + if (start < 1) + { + IF_VERBOSE_ASCODING_ERRORS( + log_aserror(_("Base is less then 1 in ActionMbSubString, " + "setting to 1.")); + ); + start = 1; + } + + // Adjust the start for our own use. + --start; + + if (size + start - 1 > length) + { + IF_VERBOSE_ASCODING_ERRORS( + log_aserror(_("base+size goes beyond input string in ActionMbSubString, " + "adjusting size")); + ); + size = length - start; + } + + if (encoding == ENCGUESS_OTHER) + { + env.top(0).set_string(str.substr(start, size)); + } + else + { + env.top(0).set_string(str.substr(offsets[start], offsets[size] - offsets[start] + 1)); + } + return; } void @@ -1439,11 +1608,26 @@ } void -SWFHandlers::ActionMbChr(ActionExec& /*thread*/) +SWFHandlers::ActionMbChr(ActionExec& thread) { // GNASH_REPORT_FUNCTION; -// as_environment& env = thread.env; - log_unimpl (__PRETTY_FUNCTION__); +// The correctness of this depends on the locale being correct, +// which it should be far more often than not, by choosing UTF8. + as_environment& env = thread.env; + + thread.ensureStack(1); + + wchar_t i = static_cast<wchar_t> (env.top(0).to_int(env)); + char str[MB_CUR_MAX + 1]; + memset(str, '\0', MB_CUR_MAX + 1); + if (wctomb(str, i) == -1) + { + env.top(0).set_undefined(); + } + else + { + env.top(0).set_string(str); + } } // also known as WaitForFrame2 Index: ASHandlers.h =================================================================== RCS file: /sources/gnash/gnash/server/vm/ASHandlers.h,v retrieving revision 1.8 retrieving revision 1.9 diff -u -b -r1.8 -r1.9 --- ASHandlers.h 1 Jul 2007 10:54:36 -0000 1.8 +++ ASHandlers.h 20 Aug 2007 03:25:09 -0000 1.9 @@ -49,6 +49,12 @@ ARG_FUNCTION2 } as_arg_t; +typedef enum { + ENCGUESS_UNICODE = 0, + ENCGUESS_JIS = 1, + ENCGUESS_OTHER = 2 +} as_encoding_guess_t; + // @@strk@@ should we move this to .cpp file ? it's only // use is within SWFHandlers, anyway... typedef void (*action_callback_t)(ActionExec& thread); @@ -154,6 +160,13 @@ static void CommonSetTarget(as_environment& env, const std::string& target_name); + /// Common code for guessing at the encoding of random text, between + // Shift-Jis, UTF8, and other. Puts the character count in length, + // and the offsets to the characters in offsets, if offsets is not NULL. + // If not NULL, offsets should be at least s.length(). + // offsets are not accurate if the return value is GUESSENC_OTHER + static as_encoding_guess_t GuessEncoding(std::string& s, int& length, int *offsets); + static void ActionEnd(ActionExec& thread); static void ActionNextFrame(ActionExec& thread); static void ActionPrevFrame(ActionExec& thread); _______________________________________________ Gnash-commit mailing list Gnash-commit@gnu.org http://lists.gnu.org/mailman/listinfo/gnash-commit