andrei Mon Mar 27 03:19:30 2006 UTC Modified files: /php-src/ext/unicode unicode.c Log: Rewrite unicode_encode() and unicode_decode() functions. Apply the new conversion error semantics. http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/unicode.c?r1=1.17&r2=1.18&diff_format=u Index: php-src/ext/unicode/unicode.c diff -u php-src/ext/unicode/unicode.c:1.17 php-src/ext/unicode/unicode.c:1.18 --- php-src/ext/unicode/unicode.c:1.17 Sun Mar 26 21:22:59 2006 +++ php-src/ext/unicode/unicode.c Mon Mar 27 03:19:30 2006 @@ -15,7 +15,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: unicode.c,v 1.17 2006/03/26 21:22:59 andrei Exp $ */ +/* $Id: unicode.c,v 1.18 2006/03/27 03:19:30 andrei Exp $ */ #include "php_unicode.h" #if HAVE_UNICODE @@ -23,92 +23,112 @@ void php_register_unicode_iterators(TSRMLS_D); -/* {{{ proto unicode unicode_decode(string input, string encoding) U - Takes a string in the source encoding and converts it to a UTF-16 unicode string, returning the result */ +/* {{{ proto unicode unicode_decode(binary input, string encoding [, int flags]) U + Takes a binary string converts it to a Unicode string using the specifed encoding */ static PHP_FUNCTION(unicode_decode) { - union { - void *vptr; - char *bin; - } input; - zend_uchar type; - int len; - char *encoding; - int enclen; + char *str, *enc; + int str_len, enc_len; + long flags; + UChar *dest; + int dest_len; UErrorCode status; UConverter *conv = NULL; - UChar *target; - int targetlen; + int num_conv; - if (FAILURE == zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ts", &input.vptr, &len, &type, &encoding, &enclen)) { + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Ss|l", &str, &str_len, &enc, &enc_len, &flags)) { return; } - if (type == IS_UNICODE) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "input string is already unicode"); - RETURN_FALSE; + if (ZEND_NUM_ARGS() > 2) { + if ((flags & 0xff) > ZEND_CONV_ERROR_LAST_ENUM) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "illegal value for conversion error mode"); + RETURN_FALSE; + } + } else { + flags = UG(to_error_mode); } status = U_ZERO_ERROR; - conv = ucnv_open(encoding, &status); - if (!conv) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "could not locate converter for %s", encoding); + conv = ucnv_open(enc, &status); + if (U_FAILURE(status)) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "could not create converter for '%s' encoding", enc); RETURN_FALSE; } + zend_set_converter_error_mode(conv, ZEND_TO_UNICODE, flags); + status = U_ZERO_ERROR; - zend_convert_to_unicode(conv, &target, &targetlen, input.bin, len, &status); + num_conv = zend_convert_to_unicode(conv, &dest, &dest_len, str, str_len, &status); if (U_FAILURE(status)) { - /* TODO: error handling semantics ? */ - php_error_docref(NULL TSRMLS_CC, E_WARNING, "conversion was not entirely successful: %d", status); + zend_raise_conversion_error_ex("could not decode binary string", conv, ZEND_TO_UNICODE, num_conv, (flags & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC); + efree(dest); + ucnv_close(conv); + RETURN_FALSE; } - RETVAL_UNICODEL(target, targetlen, 0); - ucnv_close(conv); -} -/* }}} */ -/* {{{ proto bool unicode_semantics() U - Check whether unicode semantics are enabled */ -static PHP_FUNCTION(unicode_semantics) -{ - RETURN_BOOL(UG(unicode)); + RETVAL_UNICODEL(dest, dest_len, 0); } /* }}} */ -/* {{{ proto string unicode_encode(unicode input, string encoding) U - Takes a unicode string and converts it to a string in the specified encoding */ +/* {{{ proto binary unicode_encode(unicode input, string encoding [, int flags]) U + Takes a Unicode string and converts it to a binary string using the specified encoding */ static PHP_FUNCTION(unicode_encode) { UChar *uni; - int len; - char *encoding; - int enclen; + char *enc; + int uni_len, enc_len; + long flags; + char *dest; + int dest_len; UErrorCode status; UConverter *conv = NULL; - char *target; - int targetlen; + int num_conv; - if (FAILURE == zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "us", &uni, &len, &encoding, &enclen)) { + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Us|l", &uni, &uni_len, &enc, &enc_len, &flags) == FAILURE) { return; } + if (ZEND_NUM_ARGS() > 2) { + if ((flags & 0xff) > ZEND_CONV_ERROR_LAST_ENUM) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "illegal value for conversion error mode"); + RETURN_FALSE; + } + } else { + flags = UG(from_error_mode); + } + status = U_ZERO_ERROR; - conv = ucnv_open(encoding, &status); - if (!conv) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "could not locate converter for %s", encoding); + conv = ucnv_open(enc, &status); + if (U_FAILURE(status)) { + php_error_docref(NULL TSRMLS_CC, E_WARNING, "could not create converter for '%s' encoding", enc); RETURN_FALSE; } + zend_set_converter_error_mode(conv, ZEND_FROM_UNICODE, flags); + zend_set_converter_subst_char(conv, UG(from_subst_char)); + status = U_ZERO_ERROR; - zend_convert_from_unicode(conv, &target, &targetlen, uni, len, &status); + num_conv = zend_convert_from_unicode(conv, &dest, &dest_len, uni, uni_len, &status); if (U_FAILURE(status)) { - /* TODO: error handling semantics ? */ - php_error_docref(NULL TSRMLS_CC, E_WARNING, "conversion was not entirely successful: %d", status); + int32_t offset = u_countChar32(uni, num_conv); + zend_raise_conversion_error_ex("could not encode Unicode string", conv, ZEND_FROM_UNICODE, offset, (flags & ZEND_CONV_ERROR_EXCEPTION) TSRMLS_CC); + efree(dest); + ucnv_close(conv); + RETURN_FALSE; } - RETVAL_STRINGL(target, targetlen, 0); - ucnv_close(conv); + + RETVAL_STRINGL(dest, dest_len, 0); +} +/* }}} */ + +/* {{{ proto bool unicode_semantics() U + Check whether unicode semantics are enabled */ +static PHP_FUNCTION(unicode_semantics) +{ + RETURN_BOOL(UG(unicode)); } /* }}} */
-- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php