andrei          Wed Jun 21 20:17:21 2006 UTC

  Modified files:              
    /ZendEngine2        zend_constants.c zend_unicode.c zend_unicode.h 
    /php-src/ext/unicode        unicode.c 
    /php-src/main/streams       filter.c streams.c 
  Log:
  Implement user conversion error handler support. Works as normal error
  handler, in that it can return false to make the default one take over.
  Handler signature is:
    user_handler($direction, $encoding, $char_byte, $offset, $message)
  
  Also removed support for using exceptions in default error handler.
  
  
http://cvs.php.net/viewvc.cgi/ZendEngine2/zend_constants.c?r1=1.86&r2=1.87&diff_format=u
Index: ZendEngine2/zend_constants.c
diff -u ZendEngine2/zend_constants.c:1.86 ZendEngine2/zend_constants.c:1.87
--- ZendEngine2/zend_constants.c:1.86   Sun Mar 26 01:48:33 2006
+++ ZendEngine2/zend_constants.c        Wed Jun 21 20:17:21 2006
@@ -17,7 +17,7 @@
    +----------------------------------------------------------------------+
 */
 
-/* $Id: zend_constants.c,v 1.86 2006/03/26 01:48:33 andrei Exp $ */
+/* $Id: zend_constants.c,v 1.87 2006/06/21 20:17:21 andrei Exp $ */
 
 #include "zend.h"
 #include "zend_constants.h"
@@ -124,7 +124,6 @@
        REGISTER_MAIN_LONG_CONSTANT("U_CONV_ERROR_ESCAPE_JAVA", 
ZEND_CONV_ERROR_ESCAPE_JAVA, CONST_PERSISTENT | CONST_CS);
        REGISTER_MAIN_LONG_CONSTANT("U_CONV_ERROR_ESCAPE_XML_DEC", 
ZEND_CONV_ERROR_ESCAPE_XML_DEC, CONST_PERSISTENT | CONST_CS);
        REGISTER_MAIN_LONG_CONSTANT("U_CONV_ERROR_ESCAPE_XML_HEX", 
ZEND_CONV_ERROR_ESCAPE_XML_HEX, CONST_PERSISTENT | CONST_CS);
-       REGISTER_MAIN_LONG_CONSTANT("U_CONV_ERROR_EXCEPTION", 
ZEND_CONV_ERROR_EXCEPTION, CONST_PERSISTENT | CONST_CS);
 
        REGISTER_MAIN_LONG_CONSTANT("FROM_UNICODE", ZEND_FROM_UNICODE, 
CONST_PERSISTENT | CONST_CS);
        REGISTER_MAIN_LONG_CONSTANT("TO_UNICODE", ZEND_TO_UNICODE, 
CONST_PERSISTENT | CONST_CS);
http://cvs.php.net/viewvc.cgi/ZendEngine2/zend_unicode.c?r1=1.22&r2=1.23&diff_format=u
Index: ZendEngine2/zend_unicode.c
diff -u ZendEngine2/zend_unicode.c:1.22 ZendEngine2/zend_unicode.c:1.23
--- ZendEngine2/zend_unicode.c:1.22     Thu May  4 21:22:17 2006
+++ ZendEngine2/zend_unicode.c  Wed Jun 21 20:17:21 2006
@@ -390,8 +390,8 @@
 }
 /* }}} */
 
-/* {{{ zend_raise_conversion_error_ex */
-ZEND_API void zend_raise_conversion_error_ex(char *message, UConverter *conv, 
zend_conv_direction dir, int error_char_offset, int use_exception TSRMLS_DC)
+/* {{{ zend_default_conversion_error_handler */
+static void zend_default_conversion_error_handler(char *message, UConverter 
*conv, zend_conv_direction dir, int error_char_offset TSRMLS_DC)
 {
        const char *conv_name;
        UErrorCode status = U_ZERO_ERROR;
@@ -399,15 +399,6 @@
        if (!message)
                return;
 
-       if (!conv) {
-               if (use_exception) {
-                       zend_throw_exception_ex(unicodeConversionException, 0 
TSRMLS_CC, "%s", message);
-               } else {
-                       zend_error(E_WARNING, "%s", message);
-               }
-               return;
-       }
-
        conv_name = ucnv_getName(conv, &status);
        /*
         * UTODO
@@ -426,11 +417,7 @@
                ucnv_getInvalidUChars(conv, err_char, &err_char_len, &status);
                codepoint = (err_char_len < 2) ? err_char[0] : 
U16_GET_SUPPLEMENTARY(err_char[0], err_char[1]);
 
-               if (use_exception) {
-                       zend_throw_exception_ex(unicodeConversionException, 0 
TSRMLS_CC, message_fmt, message, conv_name?conv_name:"<unknown>", codepoint, 
error_char_offset-1);
-               } else {
-                       zend_error(E_WARNING, message_fmt, message, 
conv_name?conv_name:"", codepoint, error_char_offset-1);
-               }
+               zend_error(E_WARNING, message_fmt, message, 
conv_name?conv_name:"", codepoint, error_char_offset-1);
        } else {
                char err_char[8]; /* UTF-8 uses up to 8 bytes */
                char buf[32];     /* 4x number of error bytes */
@@ -446,11 +433,106 @@
                        p += 5;
                }
 
-               if (use_exception) {
-                       zend_throw_exception_ex(unicodeConversionException, 0 
TSRMLS_CC, message_fmt, message, conv_name?conv_name:"<unknown>", buf, 
error_char_offset-err_char_len);
-               } else {
-                       zend_error(E_WARNING, message_fmt, message, 
conv_name?conv_name:"", buf, error_char_offset-err_char_len);
+               zend_error(E_WARNING, message_fmt, message, 
conv_name?conv_name:"", buf, error_char_offset-err_char_len);
+       }
+}
+/* }}} */
+
+/* {{{ zend_call_conversion_error_handler */
+static void zend_call_conversion_error_handler(char *message, UConverter 
*conv, zend_conv_direction dir, int error_char_offset TSRMLS_DC)
+{
+       zval *z_message, *z_dir, *z_encoding, *z_char, *z_offset;
+       zval ***params;
+       zval *retval;
+       zval *orig_user_error_handler;
+       const char *conv_name;
+       UErrorCode status = U_ZERO_ERROR;
+
+       ALLOC_INIT_ZVAL(z_message);
+       ALLOC_INIT_ZVAL(z_dir);
+       ALLOC_INIT_ZVAL(z_encoding);
+       ALLOC_INIT_ZVAL(z_char);
+       ALLOC_INIT_ZVAL(z_offset);
+
+       if (message) {
+               ZVAL_STRING(z_message, message, 1);
+       } else {
+               ZVAL_NULL(z_message);
+       }
+
+       ZVAL_LONG(z_dir, dir);
+
+       conv_name = ucnv_getName(conv, &status);
+       /*
+        * UTODO
+        * use some other standard than MIME? or fallback onto IANA? or use
+        * internal converter name? ponder
+        * maybe pass Converter object, when it's implemented?
+        */
+       conv_name = ucnv_getStandardName(conv_name, "MIME", &status);
+       ZVAL_STRING(z_encoding, (char *) conv_name, 1);
+
+       if (dir == ZEND_FROM_UNICODE) {
+               UChar err_char[U16_MAX_LENGTH];
+               int8_t err_char_len = sizeof(err_char);
+
+               ucnv_getInvalidUChars(conv, err_char, &err_char_len, &status);
+               ZVAL_UNICODEL(z_char, err_char, err_char_len, 1);
+               ZVAL_LONG(z_offset, error_char_offset-1);
+       } else {
+               char err_char[8]; /* UTF-8 uses up to 8 bytes */
+               int8_t err_char_len = sizeof(err_char);
+
+               ucnv_getInvalidChars(conv, err_char, &err_char_len, &status);
+               ZVAL_STRINGL(z_char, err_char, err_char_len, 1);
+               ZVAL_LONG(z_offset, error_char_offset-err_char_len);
+       }
+
+       params = (zval ***) emalloc(sizeof(zval **) * 6);
+       params[0] = &z_dir;
+       params[1] = &z_encoding;
+       params[2] = &z_char;
+       params[3] = &z_offset;
+       params[4] = &z_message;
+
+       orig_user_error_handler = UG(conv_error_handler);
+       UG(conv_error_handler) = NULL;
+
+       if (call_user_function_ex(EG(function_table), NULL, 
orig_user_error_handler, &retval, 5, params, 1, NULL TSRMLS_CC)==SUCCESS) {
+               if (retval) {
+                       /* user error handler returned 'false', use built-in 
error handler */
+                       if (Z_TYPE_P(retval) == IS_BOOL && Z_LVAL_P(retval) == 
0) {
+                               zend_default_conversion_error_handler(message, 
conv, dir, error_char_offset TSRMLS_CC);
+                       }
+                       zval_ptr_dtor(&retval);
                }
+       } else if (!EG(exception)) {
+               /* The user error handler failed, use built-in error handler */
+               zend_default_conversion_error_handler(message, conv, dir, 
error_char_offset TSRMLS_CC);
+       }
+
+       if (!UG(conv_error_handler)) {
+               UG(conv_error_handler) = orig_user_error_handler;
+       } else {
+               zval_ptr_dtor(&orig_user_error_handler);
+       }
+
+       efree(params);
+       zval_ptr_dtor(&z_dir);
+       zval_ptr_dtor(&z_encoding);
+       zval_ptr_dtor(&z_char);
+       zval_ptr_dtor(&z_offset);
+       zval_ptr_dtor(&z_message);
+}
+/* }}} */
+
+/* {{{ zend_raise_conversion_error_ex */
+ZEND_API void zend_raise_conversion_error_ex(char *message, UConverter *conv, 
zend_conv_direction dir, int error_char_offset TSRMLS_DC)
+{
+       if (UG(conv_error_handler)) {
+               zend_call_conversion_error_handler(message, conv, dir, 
error_char_offset TSRMLS_CC);
+       } else {
+               zend_default_conversion_error_handler(message, conv, dir, 
error_char_offset TSRMLS_CC);
        }
 }
 /* }}} */
@@ -471,7 +553,7 @@
        if (U_FAILURE(status)) {
                int32_t offset = u_countChar32(u, num_conv);
 
-               zend_raise_conversion_error_ex("Could not convert Unicode 
string to binary string", conv, ZEND_FROM_UNICODE, offset, (UG(from_error_mode) 
& ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC);
+               zend_raise_conversion_error_ex("Could not convert Unicode 
string to binary string", conv, ZEND_FROM_UNICODE, offset TSRMLS_CC);
                if (s) {
                        efree(s);
                }
@@ -500,7 +582,7 @@
        num_conv = zend_convert_to_unicode(conv, &u, &u_len, s, s_len, &status);
 
        if (U_FAILURE(status)) {
-               zend_raise_conversion_error_ex("Could not convert binary string 
to Unicode string", conv, ZEND_TO_UNICODE, num_conv, (UG(to_error_mode) & 
ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC);
+               zend_raise_conversion_error_ex("Could not convert binary string 
to Unicode string", conv, ZEND_TO_UNICODE, num_conv TSRMLS_CC);
                if (u) {
                        efree(u);
                }
http://cvs.php.net/viewvc.cgi/ZendEngine2/zend_unicode.h?r1=1.16&r2=1.17&diff_format=u
Index: ZendEngine2/zend_unicode.h
diff -u ZendEngine2/zend_unicode.h:1.16 ZendEngine2/zend_unicode.h:1.17
--- ZendEngine2/zend_unicode.h:1.16     Thu May  4 21:22:17 2006
+++ ZendEngine2/zend_unicode.h  Wed Jun 21 20:17:21 2006
@@ -87,9 +87,7 @@
 ZEND_API int zend_is_valid_identifier(UChar *ident, int ident_len);
 ZEND_API int zend_normalize_identifier(UChar **dest, int *dest_len, UChar 
*ident, int ident_len, zend_bool fold_case);
 
-#define zend_raise_conversion_error(message, exception) \
-       zend_raise_conversion_error_ex(message, NULL, 0, 0, exception TSRMLS_CC)
-ZEND_API void zend_raise_conversion_error_ex(char *message, UConverter *conv, 
zend_conv_direction dir, int error_char_offset, int use_exception TSRMLS_DC);
+ZEND_API void zend_raise_conversion_error_ex(char *message, UConverter *conv, 
zend_conv_direction dir, int error_char_offset TSRMLS_DC);
 
 /*
  * Function to get a codepoint at position n. Iterates over codepoints 
starting from the
http://cvs.php.net/viewvc.cgi/php-src/ext/unicode/unicode.c?r1=1.37&r2=1.38&diff_format=u
Index: php-src/ext/unicode/unicode.c
diff -u php-src/ext/unicode/unicode.c:1.37 php-src/ext/unicode/unicode.c:1.38
--- php-src/ext/unicode/unicode.c:1.37  Tue Jun 20 23:00:02 2006
+++ php-src/ext/unicode/unicode.c       Wed Jun 21 20:17:21 2006
@@ -15,7 +15,7 @@
   +----------------------------------------------------------------------+
 */
 
-/* $Id: unicode.c,v 1.37 2006/06/20 23:00:02 andrei Exp $ */ 
+/* $Id: unicode.c,v 1.38 2006/06/21 20:17:21 andrei Exp $ */ 
 
 #include "php_unicode.h"
 #include "zend_unicode.h"
@@ -62,7 +62,7 @@
        status = U_ZERO_ERROR;
        num_conv = zend_convert_to_unicode(conv, &dest, &dest_len, str, 
str_len, &status);
        if (U_FAILURE(status)) {
-               zend_raise_conversion_error_ex("could not decode binary 
string", conv, ZEND_TO_UNICODE, num_conv, (flags & ZEND_CONV_ERROR_EXCEPTION) 
TSRMLS_CC);
+               zend_raise_conversion_error_ex("could not decode binary 
string", conv, ZEND_TO_UNICODE, num_conv TSRMLS_CC);
                efree(dest);
                ucnv_close(conv);       
                RETURN_FALSE;
@@ -114,7 +114,7 @@
        num_conv = zend_convert_from_unicode(conv, &dest, &dest_len, uni, 
uni_len, &status);
        if (U_FAILURE(status)) {
                int32_t offset = u_countChar32(uni, num_conv);
-               zend_raise_conversion_error_ex("could not encode Unicode 
string", conv, ZEND_FROM_UNICODE, offset, (flags & ZEND_CONV_ERROR_EXCEPTION) 
TSRMLS_CC);
+               zend_raise_conversion_error_ex("could not encode Unicode 
string", conv, ZEND_FROM_UNICODE, offset TSRMLS_CC);
                efree(dest);
                ucnv_close(conv);       
                RETURN_FALSE;
http://cvs.php.net/viewvc.cgi/php-src/main/streams/filter.c?r1=1.32&r2=1.33&diff_format=u
Index: php-src/main/streams/filter.c
diff -u php-src/main/streams/filter.c:1.32 php-src/main/streams/filter.c:1.33
--- php-src/main/streams/filter.c:1.32  Fri May 19 10:23:43 2006
+++ php-src/main/streams/filter.c       Wed Jun 21 20:17:21 2006
@@ -16,7 +16,7 @@
    +----------------------------------------------------------------------+
  */
 
-/* $Id: filter.c,v 1.32 2006/05/19 10:23:43 tony2001 Exp $ */
+/* $Id: filter.c,v 1.33 2006/06/21 20:17:21 andrei Exp $ */
 
 #include "php.h"
 #include "php_globals.h"
@@ -750,7 +750,7 @@
                        if (U_FAILURE(status)) {
                                int32_t offset = u_countChar32(bucket->buf.u, 
num_conv);
 
-                               zend_raise_conversion_error_ex("Could not 
convert Unicode string to binary string", conv, ZEND_FROM_UNICODE, offset, 
(UG(from_error_mode) & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC);
+                               zend_raise_conversion_error_ex("Could not 
convert Unicode string to binary string", conv, ZEND_FROM_UNICODE, offset 
TSRMLS_CC);
                        }
 
                        if (bucket->own_buf) {
http://cvs.php.net/viewvc.cgi/php-src/main/streams/streams.c?r1=1.126&r2=1.127&diff_format=u
Index: php-src/main/streams/streams.c
diff -u php-src/main/streams/streams.c:1.126 
php-src/main/streams/streams.c:1.127
--- php-src/main/streams/streams.c:1.126        Wed Jun 21 17:10:13 2006
+++ php-src/main/streams/streams.c      Wed Jun 21 20:17:21 2006
@@ -19,7 +19,7 @@
    +----------------------------------------------------------------------+
  */
 
-/* $Id: streams.c,v 1.126 2006/06/21 17:10:13 andrei Exp $ */
+/* $Id: streams.c,v 1.127 2006/06/21 20:17:21 andrei Exp $ */
 
 #define _GNU_SOURCE
 #include "php.h"
@@ -1222,7 +1222,7 @@
                num_conv = 
zend_convert_from_unicode(ZEND_U_CONVERTER(UG(runtime_encoding_conv)), &str, 
&len, buf.u, buflen, &status);
                if (U_FAILURE(status)) {
                        zend_raise_conversion_error_ex("Unable to convert data 
to be written", ZEND_U_CONVERTER(UG(runtime_encoding_conv)),
-                                                                       
ZEND_FROM_UNICODE, num_conv, (UG(from_error_mode) & ZEND_CONV_ERROR_EXCEPTION) 
TSRMLS_CC);
+                                                                       
ZEND_FROM_UNICODE, num_conv TSRMLS_CC);
                } else {
                        php_error_docref(NULL TSRMLS_CC, E_NOTICE, "%d 
character unicode buffer downcoded for binary stream runtime_encoding", ulen);
                }
@@ -1270,7 +1270,7 @@
                } else {
                        /* Figure out how didwrite corresponds to the input 
buffer */
                        char *tmp = emalloc(didwrite + 1), *t = tmp;
-                       UChar *s = buf_orig;
+                       const UChar *s = buf_orig;
                        UErrorCode status = U_ZERO_ERROR;
 
                        
ucnv_resetFromUnicode(ZEND_U_CONVERTER(UG(runtime_encoding_conv)));
@@ -1570,7 +1570,7 @@
 
                while ((b = php_stream_read_unicode(stream, inbuf_start, 
sizeof(inbuf_start))) > 0) {
                        char *outbuf = outbuf_start;
-                       UChar *inbuf = inbuf_start;
+                       const UChar *inbuf = inbuf_start;
                        UErrorCode status = U_ZERO_ERROR;
                        int len;
 
@@ -1580,7 +1580,7 @@
                                /* Memory overflow isn't a problem becuase 
MAX_BYTES_FOR_STRING was allocated,
                                   anything else is a more serious problem */
                                zend_raise_conversion_error_ex("Unable to 
convert Unicode character using output_encoding, at least one character was 
lost",
-                                                                       conv, 
ZEND_FROM_UNICODE, len, (UG(from_error_mode) & ZEND_CONV_ERROR_EXCEPTION) 
TSRMLS_CC);
+                                                                               
           conv, ZEND_FROM_UNICODE, len TSRMLS_CC);
                        }
                        if (outbuf > outbuf_start) {
                                PHPWRITE(outbuf_start, outbuf - outbuf_start);
@@ -2594,8 +2594,7 @@
                        num_conv = zend_convert_from_unicode(UG(utf8_conv), 
&scheme, &scheme_len, path, (p - path) + delim_len, &status);
                        if (U_FAILURE(status)) {
                                if (options & REPORT_ERRORS) {
-                                       zend_raise_conversion_error_ex("Unable 
to convert filepath", UG(utf8_conv), ZEND_FROM_UNICODE,
-                                                                               
        num_conv, (UG(from_error_mode) & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC);
+                                       zend_raise_conversion_error_ex("Unable 
to convert filepath", UG(utf8_conv), ZEND_FROM_UNICODE, num_conv TSRMLS_CC);
                                }
                                *pathenc = NULL;
                                *pathenc_len = 0;
@@ -2634,7 +2633,7 @@
        if (U_FAILURE(status)) {
                if (options & REPORT_ERRORS) {
                        zend_raise_conversion_error_ex("Unable to convert 
filepath", ZEND_U_CONVERTER(UG(filesystem_encoding_conv)),
-                                                       ZEND_FROM_UNICODE, 
num_conv, (UG(from_error_mode) & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC);
+                                                       ZEND_FROM_UNICODE, 
num_conv TSRMLS_CC);
                }
 
                *pathenc = NULL;
@@ -2676,7 +2675,7 @@
        if (U_FAILURE(status)) {
                if (options & REPORT_ERRORS) {
                        zend_raise_conversion_error_ex("Unable to convert 
filepath", ZEND_U_CONVERTER(UG(filesystem_encoding_conv)),
-                                                       ZEND_TO_UNICODE, 
num_conv, (UG(to_error_mode) & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC);
+                                                       ZEND_TO_UNICODE, 
num_conv TSRMLS_CC);
                }
 
                *pathdec = NULL;

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to