hirokawa                                 Mon, 18 Jul 2011 08:36:17 +0000

Revision: http://svn.php.net/viewvc?view=revision&revision=313366

Log:
added numeric entities encode/decode in hex format.

Changed paths:
    U   php/php-src/trunk/ext/mbstring/libmbfl/mbfl/mbfilter.c
    U   php/php-src/trunk/ext/mbstring/mbstring.c

Modified: php/php-src/trunk/ext/mbstring/libmbfl/mbfl/mbfilter.c
===================================================================
--- php/php-src/trunk/ext/mbstring/libmbfl/mbfl/mbfilter.c      2011-07-18 
08:21:48 UTC (rev 313365)
+++ php/php-src/trunk/ext/mbstring/libmbfl/mbfl/mbfilter.c      2011-07-18 
08:36:17 UTC (rev 313366)
@@ -2746,7 +2746,9 @@
                }
                break;
        case 2:
-               if (c >= 0x30 && c <= 0x39) {   /* '0' - '9' */
+               if (c == 0x78) {        /* 'x' */
+                       pc->status = 4;
+               } else if (c >= 0x30 && c <= 0x39) { /* '0' - '9' */
                        pc->cache = c - 0x30;
                        pc->status = 3;
                        pc->digit = 1;
@@ -2810,6 +2812,89 @@
                        (*pc->decoder->filter_function)(c, pc->decoder);
                }
                break;
+       case 4:
+               if (c >= 0x30 && c <= 0x39) { /* '0' - '9' */
+                       pc->cache = c - 0x30;
+                       pc->status = 5;
+                       pc->digit = 1;
+               } else if (c >= 0x41 && c <= 0x46) { /* 'A' - 'F'  */
+                       pc->cache = c - 0x41 + 10;
+                       pc->status = 5;
+                       pc->digit = 1;
+               } else if (c >= 0x61 && c <= 0x66) { /* 'a' - 'f'  */
+                       pc->cache = c - 0x61 + 10;
+                       pc->status = 5;
+                       pc->digit = 1;
+               } else {
+                       pc->status = 0;
+                       (*pc->decoder->filter_function)(0x26, pc->decoder);     
        /* '&' */
+                       (*pc->decoder->filter_function)(0x23, pc->decoder);     
        /* '#' */
+                       (*pc->decoder->filter_function)(0x78, pc->decoder);     
        /* 'x' */
+                       (*pc->decoder->filter_function)(c, pc->decoder);
+               }
+               break;
+       case 5:
+               s = 0;
+               f = 0;
+               if ((c >= 0x30 && c <= 0x39) ||
+                       (c >= 0x41 && c <= 0x46) ||
+                       (c >= 0x61 && c <= 0x66)) {     /* '0' - '9' or 'a' - 
'f'  */
+                       if (pc->digit > 9) {
+                               pc->status = 0;
+                               s = pc->cache;
+                               f = 1;
+                       } else {
+                               if (c >= 0x30 && c <= 0x39) {
+                                       s = pc->cache*16 + (c - 0x30);
+                               } else if (c >= 0x41 && c <= 0x46)  {
+                                       s = pc->cache*16 + (c - 0x41 + 10);
+                               } else {
+                                       s = pc->cache*16 + (c - 0x61 + 10);
+                               }
+                               pc->cache = s;
+                               pc->digit++;
+                       }
+               } else {
+                       pc->status = 0;
+                       s = pc->cache;
+                       f = 1;
+                       n = 0;
+                       size = pc->mapsize;
+                       while (n < size) {
+                               mapelm = &(pc->convmap[n*4]);
+                               d = s - mapelm[2];
+                               if (d >= mapelm[0] && d <= mapelm[1]) {
+                                       f = 0;
+                                       (*pc->decoder->filter_function)(d, 
pc->decoder);
+                                       if (c != 0x3b) {        /* ';' */
+                                               
(*pc->decoder->filter_function)(c, pc->decoder);
+                                       }
+                                       break;
+                               }
+                               n++;
+                       }
+               }
+               if (f) {
+                       (*pc->decoder->filter_function)(0x26, pc->decoder);     
        /* '&' */
+                       (*pc->decoder->filter_function)(0x23, pc->decoder);     
        /* '#' */
+                       (*pc->decoder->filter_function)(0x78, pc->decoder);     
        /* 'x' */
+                       r = 1;
+                       n = pc->digit;
+                       while (n > 0) {
+                               r *= 16;
+                               n--;
+                       }
+                       s %= r;
+                       r /= 16;
+                       while (r > 0) {
+                               d = s/r;
+                               s %= r;
+                               r /= 16;
+                               
(*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
+                       }
+                       (*pc->decoder->filter_function)(c, pc->decoder);
+               }
+               break;
        default:
                if (c == 0x26) {        /* '&' */
                        pc->status = 1;
@@ -2822,6 +2907,53 @@
        return c;
 }

+static int
+collector_encode_hex_htmlnumericentity(int c, void *data)
+{
+       struct collector_htmlnumericentity_data *pc = (struct 
collector_htmlnumericentity_data *)data;
+       int f, n, s, r, d, size, *mapelm;
+
+       size = pc->mapsize;
+       f = 0;
+       n = 0;
+       while (n < size) {
+               mapelm = &(pc->convmap[n*4]);
+               if (c >= mapelm[0] && c <= mapelm[1]) {
+                       s = (c + mapelm[2]) & mapelm[3];
+                       if (s >= 0) {
+                               (*pc->decoder->filter_function)(0x26, 
pc->decoder);     /* '&' */
+                               (*pc->decoder->filter_function)(0x23, 
pc->decoder);     /* '#' */
+                               (*pc->decoder->filter_function)(0x78, 
pc->decoder);     /* 'x' */
+                               r = 0x1000000;
+                               s %= r;
+                               while (r > 0) {
+                                       d = s/r;
+                                       if (d || f) {
+                                               f = 1;
+                                               s %= r;
+                                               
(*pc->decoder->filter_function)(mbfl_hexchar_table[d], pc->decoder);
+                                       }
+                                       r /= 16;
+                               }
+                               if (!f) {
+                                       f = 1;
+                                       
(*pc->decoder->filter_function)(mbfl_hexchar_table[0], pc->decoder);
+                               }
+                               (*pc->decoder->filter_function)(0x3b, 
pc->decoder);             /* ';' */
+                       }
+               }
+               if (f) {
+                       break;
+               }
+               n++;
+       }
+       if (!f) {
+               (*pc->decoder->filter_function)(c, pc->decoder);
+       }
+
+       return c;
+}
+
 mbfl_string *
 mbfl_html_numeric_entity(
     mbfl_string *string,
@@ -2850,15 +2982,20 @@
            string->no_encoding,
            mbfl_memory_device_output, 0, &device);
        /* wchar filter */
-       if (type == 0) {
+       if (type == 0) { /* decimal output */
                encoder = mbfl_convert_filter_new(
                    string->no_encoding,
                    mbfl_no_encoding_wchar,
                    collector_encode_htmlnumericentity, 0, &pc);
-       } else {
+       } else if (type == 2) { /* hex output */
                encoder = mbfl_convert_filter_new(
                    string->no_encoding,
                    mbfl_no_encoding_wchar,
+                   collector_encode_hex_htmlnumericentity, 0, &pc);
+       } else { /* type == 1: decimal/hex input */
+               encoder = mbfl_convert_filter_new(
+                   string->no_encoding,
+                   mbfl_no_encoding_wchar,
                    collector_decode_htmlnumericentity, 0, &pc);
        }
        if (pc.decoder == NULL || encoder == NULL) {

Modified: php/php-src/trunk/ext/mbstring/mbstring.c
===================================================================
--- php/php-src/trunk/ext/mbstring/mbstring.c   2011-07-18 08:21:48 UTC (rev 
313365)
+++ php/php-src/trunk/ext/mbstring/mbstring.c   2011-07-18 08:36:17 UTC (rev 
313366)
@@ -412,6 +412,7 @@
        ZEND_ARG_INFO(0, string)
        ZEND_ARG_INFO(0, convmap)
        ZEND_ARG_INFO(0, encoding)
+       ZEND_ARG_INFO(0, is_hex)
 ZEND_END_ARG_INFO()

 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_numericentity, 0, 0, 2)
@@ -3682,10 +3683,11 @@
        HashTable *target_hash;
        size_t argc = ZEND_NUM_ARGS();
        int i, *convmap, *mapelm, mapsize=0;
+       zend_bool is_hex = 0;
        mbfl_string string, result, *ret;
        enum mbfl_no_encoding no_encoding;

-       if (zend_parse_parameters(argc TSRMLS_CC, "szs", &str, &str_len, 
&zconvmap, &encoding, &encoding_len) == FAILURE) {
+       if (zend_parse_parameters(argc TSRMLS_CC, "sz|sb", &str, &str_len, 
&zconvmap, &encoding, &encoding_len, &is_hex) == FAILURE) {
                return;
        }

@@ -3696,7 +3698,7 @@
        string.len = str_len;

        /* encoding */
-       if (argc == 3) {
+       if ((argc == 3 || argc == 4) && encoding_len > 0) {
                no_encoding = mbfl_name2no_encoding(encoding);
                if (no_encoding == mbfl_no_encoding_invalid) {
                        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown 
encoding \"%s\"", encoding);
@@ -3706,6 +3708,12 @@
                }
        }

+       if (argc == 4) {
+               if (type == 0 && is_hex) {
+                       type = 2; /* output in hex format */
+               }
+       }
+
        /* conversion map */
        convmap = NULL;
        if (Z_TYPE_P(zconvmap) == IS_ARRAY) {
@@ -3743,7 +3751,7 @@
 }
 /* }}} */

-/* {{{ proto string mb_encode_numericentity(string string, array convmap [, 
string encoding])
+/* {{{ proto string mb_encode_numericentity(string string, array convmap [, 
string encoding [, bool is_hex]])
    Converts specified characters to HTML numeric entities */
 PHP_FUNCTION(mb_encode_numericentity)
 {

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to