andrei Mon Mar 27 03:19:30 2006 UTC
Modified files:
/php-src/ext/unicode unicode.c
Log:
Rewrite unicode_encode() and unicode_decode() functions. Apply the new
conversion error semantics.
http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/unicode.c?r1=1.17&r2=1.18&diff_format=u
Index: php-src/ext/unicode/unicode.c
diff -u php-src/ext/unicode/unicode.c:1.17 php-src/ext/unicode/unicode.c:1.18
--- php-src/ext/unicode/unicode.c:1.17 Sun Mar 26 21:22:59 2006
+++ php-src/ext/unicode/unicode.c Mon Mar 27 03:19:30 2006
@@ -15,7 +15,7 @@
+----------------------------------------------------------------------+
*/
-/* $Id: unicode.c,v 1.17 2006/03/26 21:22:59 andrei Exp $ */
+/* $Id: unicode.c,v 1.18 2006/03/27 03:19:30 andrei Exp $ */
#include "php_unicode.h"
#if HAVE_UNICODE
@@ -23,92 +23,112 @@
void php_register_unicode_iterators(TSRMLS_D);
-/* {{{ proto unicode unicode_decode(string input, string encoding) U
- Takes a string in the source encoding and converts it to a UTF-16 unicode
string, returning the result */
+/* {{{ proto unicode unicode_decode(binary input, string encoding [, int
flags]) U
+ Takes a binary string converts it to a Unicode string using the specifed
encoding */
static PHP_FUNCTION(unicode_decode)
{
- union {
- void *vptr;
- char *bin;
- } input;
- zend_uchar type;
- int len;
- char *encoding;
- int enclen;
+ char *str, *enc;
+ int str_len, enc_len;
+ long flags;
+ UChar *dest;
+ int dest_len;
UErrorCode status;
UConverter *conv = NULL;
- UChar *target;
- int targetlen;
+ int num_conv;
- if (FAILURE == zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ts",
&input.vptr, &len, &type, &encoding, &enclen)) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Ss|l", &str,
&str_len, &enc, &enc_len, &flags)) {
return;
}
- if (type == IS_UNICODE) {
- php_error_docref(NULL TSRMLS_CC, E_WARNING, "input string is
already unicode");
- RETURN_FALSE;
+ if (ZEND_NUM_ARGS() > 2) {
+ if ((flags & 0xff) > ZEND_CONV_ERROR_LAST_ENUM) {
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "illegal
value for conversion error mode");
+ RETURN_FALSE;
+ }
+ } else {
+ flags = UG(to_error_mode);
}
status = U_ZERO_ERROR;
- conv = ucnv_open(encoding, &status);
- if (!conv) {
- php_error_docref(NULL TSRMLS_CC, E_WARNING, "could not locate
converter for %s", encoding);
+ conv = ucnv_open(enc, &status);
+ if (U_FAILURE(status)) {
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "could not create
converter for '%s' encoding", enc);
RETURN_FALSE;
}
+ zend_set_converter_error_mode(conv, ZEND_TO_UNICODE, flags);
+
status = U_ZERO_ERROR;
- zend_convert_to_unicode(conv, &target, &targetlen, input.bin, len,
&status);
+ num_conv = zend_convert_to_unicode(conv, &dest, &dest_len, str,
str_len, &status);
if (U_FAILURE(status)) {
- /* TODO: error handling semantics ? */
- php_error_docref(NULL TSRMLS_CC, E_WARNING, "conversion was not
entirely successful: %d", status);
+ zend_raise_conversion_error_ex("could not decode binary
string", conv, ZEND_TO_UNICODE, num_conv, (flags & ZEND_CONV_ERROR_EXCEPTION)
TSRMLS_CC);
+ efree(dest);
+ ucnv_close(conv);
+ RETURN_FALSE;
}
- RETVAL_UNICODEL(target, targetlen, 0);
-
ucnv_close(conv);
-}
-/* }}} */
-/* {{{ proto bool unicode_semantics() U
- Check whether unicode semantics are enabled */
-static PHP_FUNCTION(unicode_semantics)
-{
- RETURN_BOOL(UG(unicode));
+ RETVAL_UNICODEL(dest, dest_len, 0);
}
/* }}} */
-/* {{{ proto string unicode_encode(unicode input, string encoding) U
- Takes a unicode string and converts it to a string in the specified
encoding */
+/* {{{ proto binary unicode_encode(unicode input, string encoding [, int
flags]) U
+ Takes a Unicode string and converts it to a binary string using the
specified encoding */
static PHP_FUNCTION(unicode_encode)
{
UChar *uni;
- int len;
- char *encoding;
- int enclen;
+ char *enc;
+ int uni_len, enc_len;
+ long flags;
+ char *dest;
+ int dest_len;
UErrorCode status;
UConverter *conv = NULL;
- char *target;
- int targetlen;
+ int num_conv;
- if (FAILURE == zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "us",
&uni, &len, &encoding, &enclen)) {
+ if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Us|l", &uni,
&uni_len, &enc, &enc_len, &flags) == FAILURE) {
return;
}
+ if (ZEND_NUM_ARGS() > 2) {
+ if ((flags & 0xff) > ZEND_CONV_ERROR_LAST_ENUM) {
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "illegal
value for conversion error mode");
+ RETURN_FALSE;
+ }
+ } else {
+ flags = UG(from_error_mode);
+ }
+
status = U_ZERO_ERROR;
- conv = ucnv_open(encoding, &status);
- if (!conv) {
- php_error_docref(NULL TSRMLS_CC, E_WARNING, "could not locate
converter for %s", encoding);
+ conv = ucnv_open(enc, &status);
+ if (U_FAILURE(status)) {
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "could not create
converter for '%s' encoding", enc);
RETURN_FALSE;
}
+ zend_set_converter_error_mode(conv, ZEND_FROM_UNICODE, flags);
+ zend_set_converter_subst_char(conv, UG(from_subst_char));
+
status = U_ZERO_ERROR;
- zend_convert_from_unicode(conv, &target, &targetlen, uni, len, &status);
+ num_conv = zend_convert_from_unicode(conv, &dest, &dest_len, uni,
uni_len, &status);
if (U_FAILURE(status)) {
- /* TODO: error handling semantics ? */
- php_error_docref(NULL TSRMLS_CC, E_WARNING, "conversion was not
entirely successful: %d", status);
+ int32_t offset = u_countChar32(uni, num_conv);
+ zend_raise_conversion_error_ex("could not encode Unicode
string", conv, ZEND_FROM_UNICODE, offset, (flags & ZEND_CONV_ERROR_EXCEPTION)
TSRMLS_CC);
+ efree(dest);
+ ucnv_close(conv);
+ RETURN_FALSE;
}
- RETVAL_STRINGL(target, targetlen, 0);
-
ucnv_close(conv);
+
+ RETVAL_STRINGL(dest, dest_len, 0);
+}
+/* }}} */
+
+/* {{{ proto bool unicode_semantics() U
+ Check whether unicode semantics are enabled */
+static PHP_FUNCTION(unicode_semantics)
+{
+ RETURN_BOOL(UG(unicode));
}
/* }}} */
--
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php