The basic idea is that we always return a result, whether we raise an error or not. The type of error we raise also differs: in case of U_ILLEGAR_CHAR_FOUND, we definitely want to stop all processing, return what we processed so far, and raise a more noticeable error. For U_INVALID_CHAR_FOUND/U_IRREGULAR_CHAR_FOUND we can raise an error of lower severity.

But yes, we do need a wrapper for conversion failure processing - something generic enough to be used by all converters.

-Andrei


On Aug 13, 2005, at 6:43 AM, Wez Furlong wrote:

wez        Sat Aug 13 09:43:32 2005 EDT

  Modified files:
    /php-src/ext/unicode    config.w32 unicode.c
  Log:
  Fixup config.w32.

Implement unicode_decode() and unicode_encode(), as described in README.UNICODE.

Still need to decide how to handle errors here, since there is no error return,
  and a conversion error is not necessarily fatal.



http://cvs.php.net/diff.php/php-src/ext/unicode/config.w32? r1=1.2&r2=1.3&ty=u
Index: php-src/ext/unicode/config.w32
diff -u php-src/ext/unicode/config.w32:1.2 php-src/ext/unicode/ config.w32:1.3
--- php-src/ext/unicode/config.w32:1.2    Fri Aug 12 05:10:04 2005
+++ php-src/ext/unicode/config.w32    Sat Aug 13 09:43:31 2005
@@ -1,9 +1,8 @@
-// $Id: config.w32,v 1.2 2005/08/12 09:10:04 sniper Exp $
+// $Id: config.w32,v 1.3 2005/08/13 13:43:31 wez Exp $
 // vim:ft=javascript

-ARG_ENABLE('unicode' , 'ICU API extension', 'no');
-if (PHP_UNICODE) {
-
+ARG_ENABLE('unicode' , 'ICU API extension', 'yes');
+if (PHP_UNICODE != 'no') {
   EXTENSION("unicode", "unicode.c unicode_filter.c locale.c");
   AC_DEFINE('HAVE_UNICODE', 1, 'ICU API extension');
 }
http://cvs.php.net/diff.php/php-src/ext/unicode/unicode.c? r1=1.2&r2=1.3&ty=u
Index: php-src/ext/unicode/unicode.c
diff -u php-src/ext/unicode/unicode.c:1.2 php-src/ext/unicode/ unicode.c:1.3
--- php-src/ext/unicode/unicode.c:1.2    Fri Aug 12 05:10:04 2005
+++ php-src/ext/unicode/unicode.c    Sat Aug 13 09:43:31 2005
@@ -11,19 +11,101 @@
| [EMAIL PROTECTED] so we can mail you a copy immediately. | +--------------------------------------------------------------------- -+ | Authors: Andrei Zmievski <[EMAIL PROTECTED]> | + | Wez Furlong <[EMAIL PROTECTED]> | +--------------------------------------------------------------------- -+
 */

-/* $Id: unicode.c,v 1.2 2005/08/12 09:10:04 sniper Exp $ */
+/* $Id: unicode.c,v 1.3 2005/08/13 13:43:31 wez Exp $ */

 #include "php_unicode.h"
-
 #if HAVE_UNICODE
+#include "zend_unicode.h"
+
+/* {{{ proto unicode unicode_decode(string $input, string $encoding)
+ Takes a string in the souce encoding and converts it to a UTF-16 unicode string, returning the result */
+static PHP_FUNCTION(unicode_decode)
+{
+    union {
+        void *vptr;
+        char *bin;
+    } input;
+    zend_uchar type;
+    int len;
+    char *encoding;
+    int enclen;
+    UErrorCode status;
+    UConverter *conv = NULL;
+    UChar *target;
+    int32_t targetlen;
+
+ if (FAILURE == zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ts", &input.vptr, &len, &type, &encoding, &enclen)) {
+        return;
+    }
+
+    if (type == IS_UNICODE) {
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "input string is already unicode");
+        RETURN_FALSE;
+    }
+
+    status = U_ZERO_ERROR;
+    conv = ucnv_open(encoding, &status);
+    if (!conv) {
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "could not locate converter for %s", encoding);
+        RETURN_FALSE;
+    }

+    status = U_ZERO_ERROR;
+ zend_convert_to_unicode(conv, &target, &targetlen, input.bin, len, &status);
+    if (U_FAILURE(status)) {
+        /* TODO: error handling semantics ? */
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "conversion was not entirely successful: %d", status);
+    }
+    RETVAL_UNICODEL(target, targetlen, 0);
+
+    ucnv_close(conv);
+}
+/* }}} */
+
+/* {{{ proto string unicode_encode(unicode $input, string $encoding)
+ Takes a unicode string and converts it to a string in the specified encoding */
+static PHP_FUNCTION(unicode_encode)
+{
+    UChar *uni;
+    int len;
+    char *encoding;
+    int enclen;
+    UErrorCode status;
+    UConverter *conv = NULL;
+    char *target;
+    int32_t targetlen;
+
+ if (FAILURE == zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "us", &uni, &len, &encoding, &enclen)) {
+        return;
+    }
+
+    status = U_ZERO_ERROR;
+    conv = ucnv_open(encoding, &status);
+    if (!conv) {
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "could not locate converter for %s", encoding);
+        RETURN_FALSE;
+    }
+
+    status = U_ZERO_ERROR;
+ zend_convert_from_unicode(conv, &target, &targetlen, uni, len, &status);
+    if (U_FAILURE(status)) {
+        /* TODO: error handling semantics ? */
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "conversion was not entirely successful: %d", status);
+    }
+    RETVAL_STRINGL(target, targetlen, 0);
+
+    ucnv_close(conv);
+}
 /* {{{ unicode_functions[] */
 function_entry unicode_functions[] = {
     PHP_FE(icu_loc_get_default, NULL)
     PHP_FE(icu_loc_set_default, NULL)
+    PHP_FE(unicode_decode, NULL)
+    PHP_FE(unicode_encode, NULL)
     { NULL, NULL, NULL }
 };
 /* }}} */

--
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php


--
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to