andrei          Mon Mar 27 03:19:30 2006 UTC

  Modified files:              
    /php-src/ext/unicode        unicode.c 
  Log:
  Rewrite unicode_encode() and unicode_decode() functions. Apply the new
  conversion error semantics.
  
  
http://cvs.php.net/viewcvs.cgi/php-src/ext/unicode/unicode.c?r1=1.17&r2=1.18&diff_format=u
Index: php-src/ext/unicode/unicode.c
diff -u php-src/ext/unicode/unicode.c:1.17 php-src/ext/unicode/unicode.c:1.18
--- php-src/ext/unicode/unicode.c:1.17  Sun Mar 26 21:22:59 2006
+++ php-src/ext/unicode/unicode.c       Mon Mar 27 03:19:30 2006
@@ -15,7 +15,7 @@
   +----------------------------------------------------------------------+
 */
 
-/* $Id: unicode.c,v 1.17 2006/03/26 21:22:59 andrei Exp $ */ 
+/* $Id: unicode.c,v 1.18 2006/03/27 03:19:30 andrei Exp $ */ 
 
 #include "php_unicode.h"
 #if HAVE_UNICODE
@@ -23,92 +23,112 @@
 
 void php_register_unicode_iterators(TSRMLS_D);
 
-/* {{{ proto unicode unicode_decode(string input, string encoding) U
-   Takes a string in the source encoding and converts it to a UTF-16 unicode 
string, returning the result */
+/* {{{ proto unicode unicode_decode(binary input, string encoding [, int 
flags]) U
+   Takes a binary string converts it to a Unicode string using the specifed 
encoding */
 static PHP_FUNCTION(unicode_decode)
 {
-       union {
-               void *vptr;
-               char *bin;
-       } input;
-       zend_uchar type;
-       int len;
-       char *encoding;
-       int enclen;
+       char *str, *enc;
+       int str_len, enc_len;
+       long flags;
+       UChar *dest;
+       int dest_len;
        UErrorCode status;
        UConverter *conv = NULL;
-       UChar *target;
-       int targetlen;
+       int num_conv;
 
-       if (FAILURE == zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ts", 
&input.vptr, &len, &type, &encoding, &enclen)) {
+       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Ss|l", &str, 
&str_len, &enc, &enc_len, &flags)) {
                return;
        }
 
-       if (type == IS_UNICODE) {
-               php_error_docref(NULL TSRMLS_CC, E_WARNING, "input string is 
already unicode");
-               RETURN_FALSE;
+       if (ZEND_NUM_ARGS() > 2) {
+               if ((flags & 0xff) > ZEND_CONV_ERROR_LAST_ENUM) {
+                       php_error_docref(NULL TSRMLS_CC, E_WARNING, "illegal 
value for conversion error mode");
+                       RETURN_FALSE;
+               }
+       } else {
+               flags = UG(to_error_mode);
        }
 
        status = U_ZERO_ERROR;
-       conv = ucnv_open(encoding, &status);
-       if (!conv) {
-               php_error_docref(NULL TSRMLS_CC, E_WARNING, "could not locate 
converter for %s", encoding);
+       conv = ucnv_open(enc, &status);
+       if (U_FAILURE(status)) {
+               php_error_docref(NULL TSRMLS_CC, E_WARNING, "could not create 
converter for '%s' encoding", enc);
                RETURN_FALSE;
        }
 
+       zend_set_converter_error_mode(conv, ZEND_TO_UNICODE, flags);
+
        status = U_ZERO_ERROR;
-       zend_convert_to_unicode(conv, &target, &targetlen, input.bin, len, 
&status);
+       num_conv = zend_convert_to_unicode(conv, &dest, &dest_len, str, 
str_len, &status);
        if (U_FAILURE(status)) {
-               /* TODO: error handling semantics ? */
-               php_error_docref(NULL TSRMLS_CC, E_WARNING, "conversion was not 
entirely successful: %d", status);
+               zend_raise_conversion_error_ex("could not decode binary 
string", conv, ZEND_TO_UNICODE, num_conv, (flags & ZEND_CONV_ERROR_EXCEPTION) 
TSRMLS_CC);
+               efree(dest);
+               ucnv_close(conv);       
+               RETURN_FALSE;
        }
-       RETVAL_UNICODEL(target, targetlen, 0);
-
        ucnv_close(conv);       
-}
-/* }}} */
 
-/* {{{ proto bool unicode_semantics() U
-   Check whether unicode semantics are enabled */
-static PHP_FUNCTION(unicode_semantics)
-{
-       RETURN_BOOL(UG(unicode));
+       RETVAL_UNICODEL(dest, dest_len, 0);
 }
 /* }}} */
 
-/* {{{ proto string unicode_encode(unicode input, string encoding) U
-   Takes a unicode string and converts it to a string in the specified 
encoding */
+/* {{{ proto binary unicode_encode(unicode input, string encoding [, int 
flags]) U
+   Takes a Unicode string and converts it to a binary string using the 
specified encoding */
 static PHP_FUNCTION(unicode_encode)
 {
        UChar *uni;
-       int len;
-       char *encoding;
-       int enclen;
+       char *enc;
+       int uni_len, enc_len;
+       long flags;
+       char *dest;
+       int dest_len;
        UErrorCode status;
        UConverter *conv = NULL;
-       char *target;
-       int targetlen;
+       int num_conv;
 
-       if (FAILURE == zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "us", 
&uni, &len, &encoding, &enclen)) {
+       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Us|l", &uni, 
&uni_len, &enc, &enc_len, &flags) == FAILURE) {
                return;
        }
 
+       if (ZEND_NUM_ARGS() > 2) {
+               if ((flags & 0xff) > ZEND_CONV_ERROR_LAST_ENUM) {
+                       php_error_docref(NULL TSRMLS_CC, E_WARNING, "illegal 
value for conversion error mode");
+                       RETURN_FALSE;
+               }
+       } else {
+               flags = UG(from_error_mode);
+       }
+
        status = U_ZERO_ERROR;
-       conv = ucnv_open(encoding, &status);
-       if (!conv) {
-               php_error_docref(NULL TSRMLS_CC, E_WARNING, "could not locate 
converter for %s", encoding);
+       conv = ucnv_open(enc, &status);
+       if (U_FAILURE(status)) {
+               php_error_docref(NULL TSRMLS_CC, E_WARNING, "could not create 
converter for '%s' encoding", enc);
                RETURN_FALSE;
        }
 
+       zend_set_converter_error_mode(conv, ZEND_FROM_UNICODE, flags);
+       zend_set_converter_subst_char(conv, UG(from_subst_char));
+
        status = U_ZERO_ERROR;
-       zend_convert_from_unicode(conv, &target, &targetlen, uni, len, &status);
+       num_conv = zend_convert_from_unicode(conv, &dest, &dest_len, uni, 
uni_len, &status);
        if (U_FAILURE(status)) {
-               /* TODO: error handling semantics ? */
-               php_error_docref(NULL TSRMLS_CC, E_WARNING, "conversion was not 
entirely successful: %d", status);
+               int32_t offset = u_countChar32(uni, num_conv);
+               zend_raise_conversion_error_ex("could not encode Unicode 
string", conv, ZEND_FROM_UNICODE, offset, (flags & ZEND_CONV_ERROR_EXCEPTION) 
TSRMLS_CC);
+               efree(dest);
+               ucnv_close(conv);       
+               RETURN_FALSE;
        }
-       RETVAL_STRINGL(target, targetlen, 0);
-
        ucnv_close(conv);       
+
+       RETVAL_STRINGL(dest, dest_len, 0);
+}
+/* }}} */
+
+/* {{{ proto bool unicode_semantics() U
+   Check whether unicode semantics are enabled */
+static PHP_FUNCTION(unicode_semantics)
+{
+       RETURN_BOOL(UG(unicode));
 }
 /* }}} */
 

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to