andrei Wed Jun 21 20:17:21 2006 UTC Modified files: /ZendEngine2 zend_constants.c zend_unicode.c zend_unicode.h /php-src/ext/unicode unicode.c /php-src/main/streams filter.c streams.c Log: Implement user conversion error handler support. Works as normal error handler, in that it can return false to make the default one take over. Handler signature is: user_handler($direction, $encoding, $char_byte, $offset, $message) Also removed support for using exceptions in default error handler.
http://cvs.php.net/viewvc.cgi/ZendEngine2/zend_constants.c?r1=1.86&r2=1.87&diff_format=u Index: ZendEngine2/zend_constants.c diff -u ZendEngine2/zend_constants.c:1.86 ZendEngine2/zend_constants.c:1.87 --- ZendEngine2/zend_constants.c:1.86 Sun Mar 26 01:48:33 2006 +++ ZendEngine2/zend_constants.c Wed Jun 21 20:17:21 2006 @@ -17,7 +17,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: zend_constants.c,v 1.86 2006/03/26 01:48:33 andrei Exp $ */ +/* $Id: zend_constants.c,v 1.87 2006/06/21 20:17:21 andrei Exp $ */ #include "zend.h" #include "zend_constants.h" @@ -124,7 +124,6 @@ REGISTER_MAIN_LONG_CONSTANT("U_CONV_ERROR_ESCAPE_JAVA", ZEND_CONV_ERROR_ESCAPE_JAVA, CONST_PERSISTENT | CONST_CS); REGISTER_MAIN_LONG_CONSTANT("U_CONV_ERROR_ESCAPE_XML_DEC", ZEND_CONV_ERROR_ESCAPE_XML_DEC, CONST_PERSISTENT | CONST_CS); REGISTER_MAIN_LONG_CONSTANT("U_CONV_ERROR_ESCAPE_XML_HEX", ZEND_CONV_ERROR_ESCAPE_XML_HEX, CONST_PERSISTENT | CONST_CS); - REGISTER_MAIN_LONG_CONSTANT("U_CONV_ERROR_EXCEPTION", ZEND_CONV_ERROR_EXCEPTION, CONST_PERSISTENT | CONST_CS); REGISTER_MAIN_LONG_CONSTANT("FROM_UNICODE", ZEND_FROM_UNICODE, CONST_PERSISTENT | CONST_CS); REGISTER_MAIN_LONG_CONSTANT("TO_UNICODE", ZEND_TO_UNICODE, CONST_PERSISTENT | CONST_CS); http://cvs.php.net/viewvc.cgi/ZendEngine2/zend_unicode.c?r1=1.22&r2=1.23&diff_format=u Index: ZendEngine2/zend_unicode.c diff -u ZendEngine2/zend_unicode.c:1.22 ZendEngine2/zend_unicode.c:1.23 --- ZendEngine2/zend_unicode.c:1.22 Thu May 4 21:22:17 2006 +++ ZendEngine2/zend_unicode.c Wed Jun 21 20:17:21 2006 @@ -390,8 +390,8 @@ } /* }}} */ -/* {{{ zend_raise_conversion_error_ex */ -ZEND_API void zend_raise_conversion_error_ex(char *message, UConverter *conv, zend_conv_direction dir, int error_char_offset, int use_exception TSRMLS_DC) +/* {{{ zend_default_conversion_error_handler */ +static void zend_default_conversion_error_handler(char *message, UConverter *conv, zend_conv_direction dir, int error_char_offset TSRMLS_DC) { const char *conv_name; UErrorCode status = U_ZERO_ERROR; @@ -399,15 +399,6 @@ if (!message) return; - if (!conv) { - if (use_exception) { - zend_throw_exception_ex(unicodeConversionException, 0 TSRMLS_CC, "%s", message); - } else { - zend_error(E_WARNING, "%s", message); - } - return; - } - conv_name = ucnv_getName(conv, &status); /* * UTODO @@ -426,11 +417,7 @@ ucnv_getInvalidUChars(conv, err_char, &err_char_len, &status); codepoint = (err_char_len < 2) ? err_char[0] : U16_GET_SUPPLEMENTARY(err_char[0], err_char[1]); - if (use_exception) { - zend_throw_exception_ex(unicodeConversionException, 0 TSRMLS_CC, message_fmt, message, conv_name?conv_name:"<unknown>", codepoint, error_char_offset-1); - } else { - zend_error(E_WARNING, message_fmt, message, conv_name?conv_name:"", codepoint, error_char_offset-1); - } + zend_error(E_WARNING, message_fmt, message, conv_name?conv_name:"", codepoint, error_char_offset-1); } else { char err_char[8]; /* UTF-8 uses up to 8 bytes */ char buf[32]; /* 4x number of error bytes */ @@ -446,11 +433,106 @@ p += 5; } - if (use_exception) { - zend_throw_exception_ex(unicodeConversionException, 0 TSRMLS_CC, message_fmt, message, conv_name?conv_name:"<unknown>", buf, error_char_offset-err_char_len); - } else { - zend_error(E_WARNING, message_fmt, message, conv_name?conv_name:"", buf, error_char_offset-err_char_len); + zend_error(E_WARNING, message_fmt, message, conv_name?conv_name:"", buf, error_char_offset-err_char_len); + } +} +/* }}} */ + +/* {{{ zend_call_conversion_error_handler */ +static void zend_call_conversion_error_handler(char *message, UConverter *conv, zend_conv_direction dir, int error_char_offset TSRMLS_DC) +{ + zval *z_message, *z_dir, *z_encoding, *z_char, *z_offset; + zval ***params; + zval *retval; + zval *orig_user_error_handler; + const char *conv_name; + UErrorCode status = U_ZERO_ERROR; + + ALLOC_INIT_ZVAL(z_message); + ALLOC_INIT_ZVAL(z_dir); + ALLOC_INIT_ZVAL(z_encoding); + ALLOC_INIT_ZVAL(z_char); + ALLOC_INIT_ZVAL(z_offset); + + if (message) { + ZVAL_STRING(z_message, message, 1); + } else { + ZVAL_NULL(z_message); + } + + ZVAL_LONG(z_dir, dir); + + conv_name = ucnv_getName(conv, &status); + /* + * UTODO + * use some other standard than MIME? or fallback onto IANA? or use + * internal converter name? ponder + * maybe pass Converter object, when it's implemented? + */ + conv_name = ucnv_getStandardName(conv_name, "MIME", &status); + ZVAL_STRING(z_encoding, (char *) conv_name, 1); + + if (dir == ZEND_FROM_UNICODE) { + UChar err_char[U16_MAX_LENGTH]; + int8_t err_char_len = sizeof(err_char); + + ucnv_getInvalidUChars(conv, err_char, &err_char_len, &status); + ZVAL_UNICODEL(z_char, err_char, err_char_len, 1); + ZVAL_LONG(z_offset, error_char_offset-1); + } else { + char err_char[8]; /* UTF-8 uses up to 8 bytes */ + int8_t err_char_len = sizeof(err_char); + + ucnv_getInvalidChars(conv, err_char, &err_char_len, &status); + ZVAL_STRINGL(z_char, err_char, err_char_len, 1); + ZVAL_LONG(z_offset, error_char_offset-err_char_len); + } + + params = (zval ***) emalloc(sizeof(zval **) * 6); + params[0] = &z_dir; + params[1] = &z_encoding; + params[2] = &z_char; + params[3] = &z_offset; + params[4] = &z_message; + + orig_user_error_handler = UG(conv_error_handler); + UG(conv_error_handler) = NULL; + + if (call_user_function_ex(EG(function_table), NULL, orig_user_error_handler, &retval, 5, params, 1, NULL TSRMLS_CC)==SUCCESS) { + if (retval) { + /* user error handler returned 'false', use built-in error handler */ + if (Z_TYPE_P(retval) == IS_BOOL && Z_LVAL_P(retval) == 0) { + zend_default_conversion_error_handler(message, conv, dir, error_char_offset TSRMLS_CC); + } + zval_ptr_dtor(&retval); } + } else if (!EG(exception)) { + /* The user error handler failed, use built-in error handler */ + zend_default_conversion_error_handler(message, conv, dir, error_char_offset TSRMLS_CC); + } + + if (!UG(conv_error_handler)) { + UG(conv_error_handler) = orig_user_error_handler; + } else { + zval_ptr_dtor(&orig_user_error_handler); + } + + efree(params); + zval_ptr_dtor(&z_dir); + zval_ptr_dtor(&z_encoding); + zval_ptr_dtor(&z_char); + zval_ptr_dtor(&z_offset); + zval_ptr_dtor(&z_message); +} +/* }}} */ + +/* {{{ zend_raise_conversion_error_ex */ +ZEND_API void zend_raise_conversion_error_ex(char *message, UConverter *conv, zend_conv_direction dir, int error_char_offset TSRMLS_DC) +{ + if (UG(conv_error_handler)) { + zend_call_conversion_error_handler(message, conv, dir, error_char_offset TSRMLS_CC); + } else { + zend_default_conversion_error_handler(message, conv, dir, error_char_offset TSRMLS_CC); } } /* }}} */ @@ -471,7 +553,7 @@ if (U_FAILURE(status)) { int32_t offset = u_countChar32(u, num_conv); - zend_raise_conversion_error_ex("Could not convert Unicode string to binary string", conv, ZEND_FROM_UNICODE, offset, (UG(from_error_mode) & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC); + zend_raise_conversion_error_ex("Could not convert Unicode string to binary string", conv, ZEND_FROM_UNICODE, offset TSRMLS_CC); if (s) { efree(s); } @@ -500,7 +582,7 @@ num_conv = zend_convert_to_unicode(conv, &u, &u_len, s, s_len, &status); if (U_FAILURE(status)) { - zend_raise_conversion_error_ex("Could not convert binary string to Unicode string", conv, ZEND_TO_UNICODE, num_conv, (UG(to_error_mode) & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC); + zend_raise_conversion_error_ex("Could not convert binary string to Unicode string", conv, ZEND_TO_UNICODE, num_conv TSRMLS_CC); if (u) { efree(u); } http://cvs.php.net/viewvc.cgi/ZendEngine2/zend_unicode.h?r1=1.16&r2=1.17&diff_format=u Index: ZendEngine2/zend_unicode.h diff -u ZendEngine2/zend_unicode.h:1.16 ZendEngine2/zend_unicode.h:1.17 --- ZendEngine2/zend_unicode.h:1.16 Thu May 4 21:22:17 2006 +++ ZendEngine2/zend_unicode.h Wed Jun 21 20:17:21 2006 @@ -87,9 +87,7 @@ ZEND_API int zend_is_valid_identifier(UChar *ident, int ident_len); ZEND_API int zend_normalize_identifier(UChar **dest, int *dest_len, UChar *ident, int ident_len, zend_bool fold_case); -#define zend_raise_conversion_error(message, exception) \ - zend_raise_conversion_error_ex(message, NULL, 0, 0, exception TSRMLS_CC) -ZEND_API void zend_raise_conversion_error_ex(char *message, UConverter *conv, zend_conv_direction dir, int error_char_offset, int use_exception TSRMLS_DC); +ZEND_API void zend_raise_conversion_error_ex(char *message, UConverter *conv, zend_conv_direction dir, int error_char_offset TSRMLS_DC); /* * Function to get a codepoint at position n. Iterates over codepoints starting from the http://cvs.php.net/viewvc.cgi/php-src/ext/unicode/unicode.c?r1=1.37&r2=1.38&diff_format=u Index: php-src/ext/unicode/unicode.c diff -u php-src/ext/unicode/unicode.c:1.37 php-src/ext/unicode/unicode.c:1.38 --- php-src/ext/unicode/unicode.c:1.37 Tue Jun 20 23:00:02 2006 +++ php-src/ext/unicode/unicode.c Wed Jun 21 20:17:21 2006 @@ -15,7 +15,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: unicode.c,v 1.37 2006/06/20 23:00:02 andrei Exp $ */ +/* $Id: unicode.c,v 1.38 2006/06/21 20:17:21 andrei Exp $ */ #include "php_unicode.h" #include "zend_unicode.h" @@ -62,7 +62,7 @@ status = U_ZERO_ERROR; num_conv = zend_convert_to_unicode(conv, &dest, &dest_len, str, str_len, &status); if (U_FAILURE(status)) { - zend_raise_conversion_error_ex("could not decode binary string", conv, ZEND_TO_UNICODE, num_conv, (flags & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC); + zend_raise_conversion_error_ex("could not decode binary string", conv, ZEND_TO_UNICODE, num_conv TSRMLS_CC); efree(dest); ucnv_close(conv); RETURN_FALSE; @@ -114,7 +114,7 @@ num_conv = zend_convert_from_unicode(conv, &dest, &dest_len, uni, uni_len, &status); if (U_FAILURE(status)) { int32_t offset = u_countChar32(uni, num_conv); - zend_raise_conversion_error_ex("could not encode Unicode string", conv, ZEND_FROM_UNICODE, offset, (flags & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC); + zend_raise_conversion_error_ex("could not encode Unicode string", conv, ZEND_FROM_UNICODE, offset TSRMLS_CC); efree(dest); ucnv_close(conv); RETURN_FALSE; http://cvs.php.net/viewvc.cgi/php-src/main/streams/filter.c?r1=1.32&r2=1.33&diff_format=u Index: php-src/main/streams/filter.c diff -u php-src/main/streams/filter.c:1.32 php-src/main/streams/filter.c:1.33 --- php-src/main/streams/filter.c:1.32 Fri May 19 10:23:43 2006 +++ php-src/main/streams/filter.c Wed Jun 21 20:17:21 2006 @@ -16,7 +16,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: filter.c,v 1.32 2006/05/19 10:23:43 tony2001 Exp $ */ +/* $Id: filter.c,v 1.33 2006/06/21 20:17:21 andrei Exp $ */ #include "php.h" #include "php_globals.h" @@ -750,7 +750,7 @@ if (U_FAILURE(status)) { int32_t offset = u_countChar32(bucket->buf.u, num_conv); - zend_raise_conversion_error_ex("Could not convert Unicode string to binary string", conv, ZEND_FROM_UNICODE, offset, (UG(from_error_mode) & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC); + zend_raise_conversion_error_ex("Could not convert Unicode string to binary string", conv, ZEND_FROM_UNICODE, offset TSRMLS_CC); } if (bucket->own_buf) { http://cvs.php.net/viewvc.cgi/php-src/main/streams/streams.c?r1=1.126&r2=1.127&diff_format=u Index: php-src/main/streams/streams.c diff -u php-src/main/streams/streams.c:1.126 php-src/main/streams/streams.c:1.127 --- php-src/main/streams/streams.c:1.126 Wed Jun 21 17:10:13 2006 +++ php-src/main/streams/streams.c Wed Jun 21 20:17:21 2006 @@ -19,7 +19,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: streams.c,v 1.126 2006/06/21 17:10:13 andrei Exp $ */ +/* $Id: streams.c,v 1.127 2006/06/21 20:17:21 andrei Exp $ */ #define _GNU_SOURCE #include "php.h" @@ -1222,7 +1222,7 @@ num_conv = zend_convert_from_unicode(ZEND_U_CONVERTER(UG(runtime_encoding_conv)), &str, &len, buf.u, buflen, &status); if (U_FAILURE(status)) { zend_raise_conversion_error_ex("Unable to convert data to be written", ZEND_U_CONVERTER(UG(runtime_encoding_conv)), - ZEND_FROM_UNICODE, num_conv, (UG(from_error_mode) & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC); + ZEND_FROM_UNICODE, num_conv TSRMLS_CC); } else { php_error_docref(NULL TSRMLS_CC, E_NOTICE, "%d character unicode buffer downcoded for binary stream runtime_encoding", ulen); } @@ -1270,7 +1270,7 @@ } else { /* Figure out how didwrite corresponds to the input buffer */ char *tmp = emalloc(didwrite + 1), *t = tmp; - UChar *s = buf_orig; + const UChar *s = buf_orig; UErrorCode status = U_ZERO_ERROR; ucnv_resetFromUnicode(ZEND_U_CONVERTER(UG(runtime_encoding_conv))); @@ -1570,7 +1570,7 @@ while ((b = php_stream_read_unicode(stream, inbuf_start, sizeof(inbuf_start))) > 0) { char *outbuf = outbuf_start; - UChar *inbuf = inbuf_start; + const UChar *inbuf = inbuf_start; UErrorCode status = U_ZERO_ERROR; int len; @@ -1580,7 +1580,7 @@ /* Memory overflow isn't a problem becuase MAX_BYTES_FOR_STRING was allocated, anything else is a more serious problem */ zend_raise_conversion_error_ex("Unable to convert Unicode character using output_encoding, at least one character was lost", - conv, ZEND_FROM_UNICODE, len, (UG(from_error_mode) & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC); + conv, ZEND_FROM_UNICODE, len TSRMLS_CC); } if (outbuf > outbuf_start) { PHPWRITE(outbuf_start, outbuf - outbuf_start); @@ -2594,8 +2594,7 @@ num_conv = zend_convert_from_unicode(UG(utf8_conv), &scheme, &scheme_len, path, (p - path) + delim_len, &status); if (U_FAILURE(status)) { if (options & REPORT_ERRORS) { - zend_raise_conversion_error_ex("Unable to convert filepath", UG(utf8_conv), ZEND_FROM_UNICODE, - num_conv, (UG(from_error_mode) & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC); + zend_raise_conversion_error_ex("Unable to convert filepath", UG(utf8_conv), ZEND_FROM_UNICODE, num_conv TSRMLS_CC); } *pathenc = NULL; *pathenc_len = 0; @@ -2634,7 +2633,7 @@ if (U_FAILURE(status)) { if (options & REPORT_ERRORS) { zend_raise_conversion_error_ex("Unable to convert filepath", ZEND_U_CONVERTER(UG(filesystem_encoding_conv)), - ZEND_FROM_UNICODE, num_conv, (UG(from_error_mode) & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC); + ZEND_FROM_UNICODE, num_conv TSRMLS_CC); } *pathenc = NULL; @@ -2676,7 +2675,7 @@ if (U_FAILURE(status)) { if (options & REPORT_ERRORS) { zend_raise_conversion_error_ex("Unable to convert filepath", ZEND_U_CONVERTER(UG(filesystem_encoding_conv)), - ZEND_TO_UNICODE, num_conv, (UG(to_error_mode) & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC); + ZEND_TO_UNICODE, num_conv TSRMLS_CC); } *pathdec = NULL;
-- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php