On Fri, 18 Nov 2011 13:12:01 -0000, Pierre Joye <pierre....@gmail.com> wrote:


Possibly the best course of action would be to follow the ICU API and create a new class in the intl extension with labelToASCII
(uidna_labelToASCII_UTF8), labelToUnicode (uidna_labelToUnicodeUTF8),
nameToASCII (uidna_nameToASCII_UTF8) and nameToUnicode
(uidna_nameToUnicodeUTF8).

If it is actually correct to use the new API, then I would rather do
it now and by updating the current implementation in INTL. There is no
need to add new functions or classes for that, we should support 2008
per default using the current IDN functions. Another possible bug is
the RTL support.

However, I don't know receptive people are to add such a change to PHP 5.4 at this point. Another option would be to do it in trunk exclusively and
sync PECL intl with it.

It is more a bug fix, which can be related to security as well
(spoofing attacks). If it is actually correct to use the new API and
fully 2003-2008 compliant, then I will use the new API directly.

I've patched intl to support the new ICU API, adding another parameter to the functions so they assume either the current behavior or call the new API.

Example:

var_dump(idn_to_ascii("www.fußball.com",
        IDNA_NONTRANSITIONAL_TO_ASCII, IDNA_VARIANT_UTS46));

array(3) {
  ["result"]=>
  string(22) "www.xn--fuball-cta.com"
  ["isTransitionalDifferent"]=>
  bool(true)
  ["errors"]=>
  int(0)
}

However there's a little problem. The new interface reports errors via the usual ICU mechanism AND it also uses a new structure to pass IDNA specific errors. Also, it reports whether the use of the transitional mechanism affects the result. So I see only two solutions so that that extra information is passed to the PHP user.

* Add a second new parameter, by reference, wherein this extra information is written. * Instead of returning a string, return an array with all the new information.

In the patched attached I've gone for the second one, but I'm now more inclined to the first. So if no one objects, I'm going to change that detail, add a few tests and commit to 5.3, 5.4 and trunk somewhere this next week.

--
Gustavo Lopes
Index: common/common_error.c
===================================================================
--- common/common_error.c       (revision 319587)
+++ common/common_error.c       (working copy)
@@ -232,7 +232,6 @@
        INTL_EXPOSE_CONST( U_REGEX_ERROR_LIMIT );
 
        /* The error code in the range 0x10400-0x104ff are reserved for IDNA 
related error codes */
-#if defined(U_IDNA_PROHIBITED_ERROR)
        INTL_EXPOSE_CONST( U_IDNA_PROHIBITED_ERROR );
        INTL_EXPOSE_CONST( U_IDNA_ERROR_START );
        INTL_EXPOSE_CONST( U_IDNA_UNASSIGNED_ERROR );
@@ -242,8 +241,8 @@
        INTL_EXPOSE_CONST( U_IDNA_VERIFICATION_ERROR );
        INTL_EXPOSE_CONST( U_IDNA_LABEL_TOO_LONG_ERROR );
        INTL_EXPOSE_CONST( U_IDNA_ZERO_LENGTH_LABEL_ERROR );
+       INTL_EXPOSE_CONST( U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR );
        INTL_EXPOSE_CONST( U_IDNA_ERROR_LIMIT );
-#endif
 
        /* Aliases for StringPrep */
        INTL_EXPOSE_CONST( U_STRINGPREP_PROHIBITED_ERROR );
Index: idn/idn.c
===================================================================
--- idn/idn.c   (revision 319587)
+++ idn/idn.c   (working copy)
@@ -29,14 +29,25 @@
 #include "ext/standard/php_string.h"
 
 #include "intl_error.h"
- #include "intl_convert.h"
+#include "intl_convert.h"
 /* }}} */
 
+#ifdef UIDNA_INFO_INITIALIZER
+#define HAVE_46_API 1 /* has UTS#46 API (introduced in ICU 4.6) */
+#endif
+
+enum {
+       INTL_IDN_VARIANT_2003 = 0,
+       INTL_IDN_VARIANT_UTS46
+};
+
 /* {{{ grapheme_register_constants
  * Register API constants
  */
 void idn_register_constants( INIT_FUNC_ARGS )
 {
+       /* OPTIONS */
+
        /* Option to prohibit processing of unassigned codepoints in the input 
and
           do not check if the input conforms to STD-3 ASCII rules. */
        REGISTER_LONG_CONSTANT("IDNA_DEFAULT", UIDNA_DEFAULT, CONST_CS | 
CONST_PERSISTENT);
@@ -46,6 +57,50 @@
 
        /* Option to check if input conforms to STD-3 ASCII rules */
        REGISTER_LONG_CONSTANT("IDNA_USE_STD3_RULES", UIDNA_USE_STD3_RULES, 
CONST_CS | CONST_PERSISTENT);
+
+#ifdef HAVE_46_API
+
+       /* Option to check for whether the input conforms to the BiDi rules.
+        * Ignored by the IDNA2003 implementation. (IDNA2003 always performs a 
BiDi check.) */
+       REGISTER_LONG_CONSTANT("IDNA_CHECK_BIDI", UIDNA_CHECK_BIDI, CONST_CS | 
CONST_PERSISTENT);
+
+       /* Option to check for whether the input conforms to the CONTEXTJ rules.
+        * Ignored by the IDNA2003 implementation. (The CONTEXTJ check is new 
in IDNA2008.) */
+       REGISTER_LONG_CONSTANT("IDNA_CHECK_CONTEXTJ", UIDNA_CHECK_CONTEXTJ, 
CONST_CS | CONST_PERSISTENT);
+
+       /* Option for nontransitional processing in ToASCII().
+        * By default, ToASCII() uses transitional processing.
+        * Ignored by the IDNA2003 implementation. */
+       REGISTER_LONG_CONSTANT("IDNA_NONTRANSITIONAL_TO_ASCII", 
UIDNA_NONTRANSITIONAL_TO_ASCII, CONST_CS | CONST_PERSISTENT);
+
+       /* Option for nontransitional processing in ToUnicode().
+        * By default, ToUnicode() uses transitional processing.
+        * Ignored by the IDNA2003 implementation. */
+       REGISTER_LONG_CONSTANT("IDNA_NONTRANSITIONAL_TO_UNICODE", 
UIDNA_NONTRANSITIONAL_TO_UNICODE, CONST_CS | CONST_PERSISTENT);
+#endif
+
+       /* VARIANTS */
+       REGISTER_LONG_CONSTANT("IDNA_VARIANT_2003", INTL_IDN_VARIANT_2003, 
CONST_CS | CONST_PERSISTENT);
+#ifdef HAVE_46_API
+       REGISTER_LONG_CONSTANT("IDNA_VARIANT_UTS46", INTL_IDN_VARIANT_UTS46, 
CONST_CS | CONST_PERSISTENT);
+#endif
+
+#ifdef HAVE_46_API
+       /* PINFO ERROR CODES */
+       REGISTER_LONG_CONSTANT("IDNA_ERROR_EMPTY_LABEL", 
UIDNA_ERROR_EMPTY_LABEL, CONST_CS | CONST_PERSISTENT);
+       REGISTER_LONG_CONSTANT("IDNA_ERROR_LABEL_TOO_LONG", 
UIDNA_ERROR_LABEL_TOO_LONG, CONST_CS | CONST_PERSISTENT);
+       REGISTER_LONG_CONSTANT("IDNA_ERROR_DOMAIN_NAME_TOO_LONG", 
UIDNA_ERROR_DOMAIN_NAME_TOO_LONG, CONST_CS | CONST_PERSISTENT);
+       REGISTER_LONG_CONSTANT("IDNA_ERROR_LEADING_HYPHEN", 
UIDNA_ERROR_LEADING_HYPHEN, CONST_CS | CONST_PERSISTENT);
+       REGISTER_LONG_CONSTANT("IDNA_ERROR_TRAILING_HYPHEN", 
UIDNA_ERROR_TRAILING_HYPHEN, CONST_CS | CONST_PERSISTENT);
+       REGISTER_LONG_CONSTANT("IDNA_ERROR_HYPHEN_3_4", UIDNA_ERROR_HYPHEN_3_4, 
CONST_CS | CONST_PERSISTENT);
+       REGISTER_LONG_CONSTANT("IDNA_ERROR_LEADING_COMBINING_MARK", 
UIDNA_ERROR_LEADING_COMBINING_MARK, CONST_CS | CONST_PERSISTENT);
+       REGISTER_LONG_CONSTANT("IDNA_ERROR_DISALLOWED", UIDNA_ERROR_DISALLOWED, 
CONST_CS | CONST_PERSISTENT);
+       REGISTER_LONG_CONSTANT("IDNA_ERROR_PUNYCODE", UIDNA_ERROR_PUNYCODE, 
CONST_CS | CONST_PERSISTENT);
+       REGISTER_LONG_CONSTANT("IDNA_ERROR_LABEL_HAS_DOT", 
UIDNA_ERROR_LABEL_HAS_DOT, CONST_CS | CONST_PERSISTENT);
+       REGISTER_LONG_CONSTANT("IDNA_ERROR_INVALID_ACE_LABEL", 
UIDNA_ERROR_INVALID_ACE_LABEL, CONST_CS | CONST_PERSISTENT);
+       REGISTER_LONG_CONSTANT("IDNA_ERROR_BIDI", UIDNA_ERROR_BIDI, CONST_CS | 
CONST_PERSISTENT);
+       REGISTER_LONG_CONSTANT("IDNA_ERROR_CONTEXTJ", UIDNA_ERROR_CONTEXTJ, 
CONST_CS | CONST_PERSISTENT);
+#endif
 }
 /* }}} */
 
@@ -54,11 +109,75 @@
        INTL_IDN_TO_UTF8
 };
 
-static void php_intl_idn_to(INTERNAL_FUNCTION_PARAMETERS, int mode)
+/* like INTL_CHECK_STATUS, but as a function and varying the name of the func 
*/
+static int php_intl_idn_check_status(UErrorCode err, const char *msg, int mode 
TSRMLS_DC)
 {
-       unsigned char* domain;
-       int domain_len;
-       long option = 0;
+       intl_error_set_code( NULL, err TSRMLS_CC );
+    if(U_FAILURE(err)) {
+               char *buff;
+               spprintf(&buff, 0, "%s: %s",
+                               mode == INTL_IDN_TO_ASCII ? "idn_to_ascii" : 
"idn_to_utf8",
+                               msg);
+               if (msg != NULL) {
+                       intl_error_set_custom_msg(NULL, buff, 1 TSRMLS_CC);
+                       efree(buff);
+               }
+        return FAILURE;
+    }
+
+       return SUCCESS;
+}
+
+#ifdef HAVE_46_API
+static void php_intl_idn_to_46(INTERNAL_FUNCTION_PARAMETERS,
+               const char *domain, int domain_len, uint32_t option, int mode)
+{
+       UErrorCode        status = U_ZERO_ERROR;
+       UIDNA             *uts46;
+       int32_t           len;
+       int32_t           buffer_capac = 255; /* no domain name may exceed this 
*/
+       char              *buffer = emalloc(buffer_capac);
+       UIDNAInfo         info = UIDNA_INFO_INITIALIZER;
+       
+       uts46 = uidna_openUTS46(option, &status);
+       if (php_intl_idn_check_status(status, "failed to open UIDNA instance",
+                       mode TSRMLS_CC) == FAILURE) {
+               efree(buffer);
+               RETURN_FALSE;
+       }
+
+       if (mode == INTL_IDN_TO_ASCII) {
+               len = uidna_nameToASCII_UTF8(uts46, domain, (int32_t)domain_len,
+                               buffer, buffer_capac, &info, &status);
+       } else {
+               len = uidna_nameToUnicodeUTF8(uts46, domain, 
(int32_t)domain_len,
+                               buffer, buffer_capac, &info, &status);
+       }
+       if (php_intl_idn_check_status(status, "failed to convert name",
+                       mode TSRMLS_CC) == FAILURE) {
+               uidna_close(uts46);
+               efree(buffer);
+               RETURN_FALSE;
+       }
+       if (len >= 255) {
+               php_error_docref(NULL TSRMLS_CC, E_ERROR, "ICU returned an 
unexpected error");
+       }
+
+       buffer[len] = '\0';
+
+       array_init(return_value);
+       add_assoc_stringl_ex(return_value, "result", sizeof("result"), buffer, 
len, 0);
+       add_assoc_bool_ex(return_value, "isTransitionalDifferent",
+                       sizeof("isTransitionalDifferent"), 
info.isTransitionalDifferent);
+       add_assoc_long_ex(return_value, "errors", sizeof("errors"), 
(long)info.errors);
+
+       uidna_close(uts46);
+}
+#endif
+
+static void php_intl_idn_to(INTERNAL_FUNCTION_PARAMETERS,
+               const char *domain, int domain_len, uint32_t option, int mode)
+{
        UChar* ustring = NULL;
        int ustring_len = 0;
        UErrorCode status;
@@ -67,18 +186,9 @@
        UChar     converted[MAXPATHLEN];
        int32_t   converted_ret_len;
 
-       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|l", (char 
**)&domain, &domain_len, &option) == FAILURE) {
-               return;
-       }
-
-       if (domain_len < 1) {
-               intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "idn_to_ascii: 
empty domain name", 0 TSRMLS_CC );
-               RETURN_FALSE;
-       }
-
        /* convert the string to UTF-16. */
        status = U_ZERO_ERROR;
-       intl_convert_utf8_to_utf16(&ustring, &ustring_len, (char*) domain, 
domain_len, &status );
+       intl_convert_utf8_to_utf16(&ustring, &ustring_len, domain, domain_len, 
&status);
 
        if (U_FAILURE(status)) {
                intl_error_set_code(NULL, status TSRMLS_CC);
@@ -123,11 +233,75 @@
        RETURN_STRINGL(((char *)converted_utf8), converted_utf8_len, 0);
 }
 
+static void php_intl_bad_args(const char *msg, int mode TSRMLS_DC)
+{
+       char *buff;
+       spprintf(&buff, 0, "%s: %s",
+                       mode == INTL_IDN_TO_ASCII ? "idn_to_ascii" : 
"idn_to_utf8",
+                       msg);
+       if (msg != NULL) {
+               intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, buff, 1 
TSRMLS_CC);
+               efree(buff);
+       }
+}
+
+static void php_intl_idn_handoff(INTERNAL_FUNCTION_PARAMETERS, int mode)
+{
+       char *domain;
+       int domain_len;
+       long option = 0,
+                variant = INTL_IDN_VARIANT_2003;
+
+       intl_error_reset(NULL TSRMLS_CC);
+
+       if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|ll",
+                       &domain, &domain_len, &option, &variant) == FAILURE) {
+               php_intl_bad_args("bad arguments", mode TSRMLS_CC);
+               RETURN_NULL(); /* don't set FALSE because that's not the way it 
was before... */
+       }
+
+#ifdef HAVE_46_API
+       if (variant != INTL_IDN_VARIANT_2003 && variant != 
INTL_IDN_VARIANT_UTS46) {
+               php_intl_bad_args("invalid variant, must be one of {"
+                       "INTL_IDN_VARIANT_2003, INTL_IDN_VARIANT_UTS46}", mode 
TSRMLS_CC);
+               RETURN_FALSE;
+       }
+#else
+       if (variant != INTL_IDN_VARIANT_2003) {
+               php_intl_bad_args("invalid variant, PHP was compiled against "
+                       "an old version of ICU and only supports 
INTL_IDN_VARIANT_2003",
+                       mode TSRMLS_CC);
+               RETURN_FALSE;
+       }
+#endif
+
+       if (domain_len < 1) {
+               php_intl_bad_args("empty domain name", mode TSRMLS_CC);
+               RETURN_FALSE;
+       }
+       if (domain_len > INT32_MAX - 1) {
+               php_intl_bad_args("domain name too large", mode TSRMLS_CC);
+               RETURN_FALSE;
+       }
+       /* don't check options; it wasn't checked before */
+       
+       if (variant == INTL_IDN_VARIANT_2003) {
+               php_intl_idn_to(INTERNAL_FUNCTION_PARAM_PASSTHRU,
+                               domain, domain_len, (uint32_t)option, mode);
+       }
+#ifdef HAVE_46_API
+       else {
+               php_intl_idn_to_46(INTERNAL_FUNCTION_PARAM_PASSTHRU, domain, 
domain_len,
+                               (uint32_t)option, mode);
+       }
+#endif
+}
+
 /* {{{ proto int idn_to_ascii(string domain[, int options])
    Converts an Unicode domain to ASCII representation, as defined in the IDNA 
RFC */
 PHP_FUNCTION(idn_to_ascii)
 {
-       php_intl_idn_to(INTERNAL_FUNCTION_PARAM_PASSTHRU, INTL_IDN_TO_ASCII);
+       php_intl_idn_handoff(INTERNAL_FUNCTION_PARAM_PASSTHRU, 
INTL_IDN_TO_ASCII);
 }
 /* }}} */
 
@@ -136,7 +310,7 @@
    Converts an ASCII representation of the domain to Unicode (UTF-8), as 
defined in the IDNA RFC */
 PHP_FUNCTION(idn_to_utf8)
 {
-       php_intl_idn_to(INTERNAL_FUNCTION_PARAM_PASSTHRU, INTL_IDN_TO_UTF8);
+       php_intl_idn_handoff(INTERNAL_FUNCTION_PARAM_PASSTHRU, 
INTL_IDN_TO_UTF8);
 }
 /* }}} */
 

-- 
PHP Internals - PHP Runtime Development Mailing List
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to