moriyoshi                                Wed, 29 Jul 2009 04:44:08 +0000

Revision: http://svn.php.net/viewvc?view=revision&revision=286483

Log:
* Fix bug #48645 (mb_convert_encoding() doesn't understand hexadecimal 
html-entities)

Bug: http://bugs.php.net/48645 (Assigned) mb_convert_encoding() doesn't 
understand hexadecimal html-entities
      
Changed paths:
    _U  php/php-src/branches/PHP_5_2/
    U   php/php-src/branches/PHP_5_2/NEWS
    U   
php/php-src/branches/PHP_5_2/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c
    A   php/php-src/branches/PHP_5_2/ext/mbstring/tests/bug48645.phpt
    _U  php/php-src/branches/PHP_5_3/
    U   
php/php-src/branches/PHP_5_3/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c
    A   php/php-src/branches/PHP_5_3/ext/mbstring/tests/bug48645.phpt
    U   php/php-src/trunk/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c
    A   php/php-src/trunk/ext/mbstring/tests/bug48645.phpt

Property changes on: php/php-src/branches/PHP_5_2
___________________________________________________________________
Modified: svn:mergeinfo
   - /php/php-src/branches/PHP_5_3:284120
   + /php/php-src/branches/PHP_5_3:284120
/php/php-src/trunk:284726

Modified: php/php-src/branches/PHP_5_2/NEWS
===================================================================
--- php/php-src/branches/PHP_5_2/NEWS	2009-07-29 04:29:30 UTC (rev 286482)
+++ php/php-src/branches/PHP_5_2/NEWS	2009-07-29 04:44:08 UTC (rev 286483)
@@ -1,6 +1,7 @@
 PHP                                                                        NEWS
 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
 ?? ??? 2009, PHP 5.2.11
+- Fixed bug #48645 (mb_convert_encoding() doesn't understand hexadecimal html-entities). (Moriyoshi)
 - Fixed regression in cURL extension that prevented flush of data to output
   defined as a file handle. (Ilia)


Modified: php/php-src/branches/PHP_5_2/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c
===================================================================
--- php/php-src/branches/PHP_5_2/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c	2009-07-29 04:29:30 UTC (rev 286482)
+++ php/php-src/branches/PHP_5_2/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c	2009-07-29 04:44:08 UTC (rev 286483)
@@ -186,18 +186,58 @@
 		}
 	} else {
 		if (c == ';') {
-			buffer[filter->status] = 0;
 			if (buffer[1]=='#') {
-				/* numeric entity */
-				for (pos=2; pos<filter->status; pos++) {
-					ent = ent*10 + (buffer[pos] - '0');
+				if (filter->status > 2 && (buffer[2] == 'x' || buffer[2] == 'X')) {
+					if (filter->status > 3) {
+						/* numeric entity */
+						for (pos=3; pos<filter->status; pos++) {
+							int v =  buffer[pos];
+							if (v >= '0' && v <= '9') {
+								v = v - '0';
+							} else if (v >= 'A' && v <= 'F') {
+								v = v - 'A' + 10;
+							} else if (v >= 'a' && v <= 'f') {
+								v = v - 'a' + 10;
+							} else {
+								ent = -1;
+								break;
+							}
+							ent = ent * 16 + v;
+						}
+					} else {
+						ent = -1;
+					}
+				} else {
+					/* numeric entity */
+					if (filter->status > 2) {
+						for (pos=2; pos<filter->status; pos++) {
+							int v = buffer[pos];
+							if (v >= '0' && v <= '9') {
+								v = v - '0';
+							} else {
+								ent = -1;
+								break;
+							}
+							ent = ent*10 + v;
+						}
+					} else {
+						ent = -1;
+					}
 				}
-				CK((*filter->output_function)(ent, filter->data));
+				if (ent >= 0 && ent < 0x110000) {
+					CK((*filter->output_function)(ent, filter->data));
+				} else {
+					for (pos = 0; pos < filter->status; pos++) {
+						CK((*filter->output_function)(buffer[pos], filter->data));
+					}
+					CK((*filter->output_function)(c, filter->data));
+				}
 				filter->status = 0;
 				/*php_error_docref("ref.mbstring" TSRMLS_CC, E_NOTICE, "mbstring decoded '%s'=%d", buffer, ent);*/
 			} else {
 				/* named entity */
-			        entity = (mbfl_html_entity_entry *)mbfl_html_entity_list;
+				buffer[filter->status] = 0;
+				entity = (mbfl_html_entity_entry *)mbfl_html_entity_list;
 				while (entity->name) {
 					if (!strcmp(buffer+1, entity->name))	{
 						ent = entity->code;

Added: php/php-src/branches/PHP_5_2/ext/mbstring/tests/bug48645.phpt
===================================================================
--- php/php-src/branches/PHP_5_2/ext/mbstring/tests/bug48645.phpt	                        (rev 0)
+++ php/php-src/branches/PHP_5_2/ext/mbstring/tests/bug48645.phpt	2009-07-29 04:44:08 UTC (rev 286483)
@@ -0,0 +1,162 @@
+--TEST--
+Bug #48645 (mb_convert_encoding() doesn't understand hexadecimal html-entities)
+--SKIPIF--
+<?php extension_loaded('mbstring') or die('skip mbstring not available'); ?>
+--FILE--
+<?php
+var_dump(bin2hex(mb_convert_encoding("&#x0;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x1;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x2;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x3;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x4;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x5;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x6;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x7;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x8;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x9;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xA;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xB;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xC;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xD;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xE;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xF;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xa;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xb;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xc;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xd;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xe;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xf;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x/;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x:;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x@;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x`;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xG;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xg;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X0;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X1;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X2;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X3;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X4;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X5;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X6;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X7;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X8;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X9;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#XA;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#XB;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#XC;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#XD;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#XE;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#XF;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#Xa;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#Xb;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#Xc;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#Xd;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#Xe;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#Xf;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X/;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X:;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X@;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X`;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#XG;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#Xg;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#0;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#1;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#2;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#3;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#4;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#5;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#6;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#7;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#8;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#9;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#/;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#:;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x10ffff;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x110000;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X10ffff;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X110000;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#1114111;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#1114112;", "UTF-8", "HTML-ENTITIES")));
+?>
+--EXPECT--
+string(2) "00"
+string(2) "01"
+string(2) "02"
+string(2) "03"
+string(2) "04"
+string(2) "05"
+string(2) "06"
+string(2) "07"
+string(2) "08"
+string(2) "09"
+string(2) "0a"
+string(2) "0b"
+string(2) "0c"
+string(2) "0d"
+string(2) "0e"
+string(2) "0f"
+string(2) "0a"
+string(2) "0b"
+string(2) "0c"
+string(2) "0d"
+string(2) "0e"
+string(2) "0f"
+string(10) "2623782f3b"
+string(10) "2623783a3b"
+string(10) "262378403b"
+string(10) "262378603b"
+string(10) "262378473b"
+string(10) "262378673b"
+string(8) "2623783b"
+string(2) "00"
+string(2) "01"
+string(2) "02"
+string(2) "03"
+string(2) "04"
+string(2) "05"
+string(2) "06"
+string(2) "07"
+string(2) "08"
+string(2) "09"
+string(2) "0a"
+string(2) "0b"
+string(2) "0c"
+string(2) "0d"
+string(2) "0e"
+string(2) "0f"
+string(2) "0a"
+string(2) "0b"
+string(2) "0c"
+string(2) "0d"
+string(2) "0e"
+string(2) "0f"
+string(10) "2623582f3b"
+string(10) "2623583a3b"
+string(10) "262358403b"
+string(10) "262358603b"
+string(10) "262358473b"
+string(10) "262358673b"
+string(8) "2623583b"
+string(2) "00"
+string(2) "01"
+string(2) "02"
+string(2) "03"
+string(2) "04"
+string(2) "05"
+string(2) "06"
+string(2) "07"
+string(2) "08"
+string(2) "09"
+string(8) "26232f3b"
+string(8) "26233a3b"
+string(6) "26233b"
+string(8) "f48fbfbf"
+string(20) "2623783131303030303b"
+string(8) "f48fbfbf"
+string(20) "2623583131303030303b"
+string(8) "f48fbfbf"
+string(20) "2623313131343131323b"


Property changes on: php/php-src/branches/PHP_5_3
___________________________________________________________________
Added: svn:mergeinfo
   + /php/php-src/trunk:284726

Modified: php/php-src/branches/PHP_5_3/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c
===================================================================
--- php/php-src/branches/PHP_5_3/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c	2009-07-29 04:29:30 UTC (rev 286482)
+++ php/php-src/branches/PHP_5_3/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c	2009-07-29 04:44:08 UTC (rev 286483)
@@ -186,18 +186,58 @@
 		}
 	} else {
 		if (c == ';') {
-			buffer[filter->status] = 0;
 			if (buffer[1]=='#') {
-				/* numeric entity */
-				for (pos=2; pos<filter->status; pos++) {
-					ent = ent*10 + (buffer[pos] - '0');
+				if (filter->status > 2 && (buffer[2] == 'x' || buffer[2] == 'X')) {
+					if (filter->status > 3) {
+						/* numeric entity */
+						for (pos=3; pos<filter->status; pos++) {
+							int v =  buffer[pos];
+							if (v >= '0' && v <= '9') {
+								v = v - '0';
+							} else if (v >= 'A' && v <= 'F') {
+								v = v - 'A' + 10;
+							} else if (v >= 'a' && v <= 'f') {
+								v = v - 'a' + 10;
+							} else {
+								ent = -1;
+								break;
+							}
+							ent = ent * 16 + v;
+						}
+					} else {
+						ent = -1;
+					}
+				} else {
+					/* numeric entity */
+					if (filter->status > 2) {
+						for (pos=2; pos<filter->status; pos++) {
+							int v = buffer[pos];
+							if (v >= '0' && v <= '9') {
+								v = v - '0';
+							} else {
+								ent = -1;
+								break;
+							}
+							ent = ent*10 + v;
+						}
+					} else {
+						ent = -1;
+					}
 				}
-				CK((*filter->output_function)(ent, filter->data));
+				if (ent >= 0 && ent < 0x110000) {
+					CK((*filter->output_function)(ent, filter->data));
+				} else {
+					for (pos = 0; pos < filter->status; pos++) {
+						CK((*filter->output_function)(buffer[pos], filter->data));
+					}
+					CK((*filter->output_function)(c, filter->data));
+				}
 				filter->status = 0;
 				/*php_error_docref("ref.mbstring" TSRMLS_CC, E_NOTICE, "mbstring decoded '%s'=%d", buffer, ent);*/
 			} else {
 				/* named entity */
-			        entity = (mbfl_html_entity_entry *)mbfl_html_entity_list;
+				buffer[filter->status] = 0;
+				entity = (mbfl_html_entity_entry *)mbfl_html_entity_list;
 				while (entity->name) {
 					if (!strcmp(buffer+1, entity->name))	{
 						ent = entity->code;

Added: php/php-src/branches/PHP_5_3/ext/mbstring/tests/bug48645.phpt
===================================================================
--- php/php-src/branches/PHP_5_3/ext/mbstring/tests/bug48645.phpt	                        (rev 0)
+++ php/php-src/branches/PHP_5_3/ext/mbstring/tests/bug48645.phpt	2009-07-29 04:44:08 UTC (rev 286483)
@@ -0,0 +1,162 @@
+--TEST--
+Bug #48645 (mb_convert_encoding() doesn't understand hexadecimal html-entities)
+--SKIPIF--
+<?php extension_loaded('mbstring') or die('skip mbstring not available'); ?>
+--FILE--
+<?php
+var_dump(bin2hex(mb_convert_encoding("&#x0;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x1;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x2;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x3;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x4;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x5;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x6;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x7;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x8;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x9;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xA;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xB;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xC;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xD;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xE;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xF;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xa;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xb;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xc;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xd;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xe;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xf;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x/;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x:;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x@;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x`;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xG;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xg;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X0;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X1;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X2;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X3;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X4;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X5;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X6;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X7;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X8;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X9;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#XA;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#XB;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#XC;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#XD;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#XE;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#XF;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#Xa;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#Xb;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#Xc;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#Xd;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#Xe;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#Xf;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X/;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X:;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X@;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X`;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#XG;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#Xg;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#0;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#1;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#2;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#3;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#4;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#5;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#6;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#7;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#8;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#9;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#/;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#:;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x10ffff;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x110000;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X10ffff;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X110000;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#1114111;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#1114112;", "UTF-8", "HTML-ENTITIES")));
+?>
+--EXPECT--
+string(2) "00"
+string(2) "01"
+string(2) "02"
+string(2) "03"
+string(2) "04"
+string(2) "05"
+string(2) "06"
+string(2) "07"
+string(2) "08"
+string(2) "09"
+string(2) "0a"
+string(2) "0b"
+string(2) "0c"
+string(2) "0d"
+string(2) "0e"
+string(2) "0f"
+string(2) "0a"
+string(2) "0b"
+string(2) "0c"
+string(2) "0d"
+string(2) "0e"
+string(2) "0f"
+string(10) "2623782f3b"
+string(10) "2623783a3b"
+string(10) "262378403b"
+string(10) "262378603b"
+string(10) "262378473b"
+string(10) "262378673b"
+string(8) "2623783b"
+string(2) "00"
+string(2) "01"
+string(2) "02"
+string(2) "03"
+string(2) "04"
+string(2) "05"
+string(2) "06"
+string(2) "07"
+string(2) "08"
+string(2) "09"
+string(2) "0a"
+string(2) "0b"
+string(2) "0c"
+string(2) "0d"
+string(2) "0e"
+string(2) "0f"
+string(2) "0a"
+string(2) "0b"
+string(2) "0c"
+string(2) "0d"
+string(2) "0e"
+string(2) "0f"
+string(10) "2623582f3b"
+string(10) "2623583a3b"
+string(10) "262358403b"
+string(10) "262358603b"
+string(10) "262358473b"
+string(10) "262358673b"
+string(8) "2623583b"
+string(2) "00"
+string(2) "01"
+string(2) "02"
+string(2) "03"
+string(2) "04"
+string(2) "05"
+string(2) "06"
+string(2) "07"
+string(2) "08"
+string(2) "09"
+string(8) "26232f3b"
+string(8) "26233a3b"
+string(6) "26233b"
+string(8) "f48fbfbf"
+string(20) "2623783131303030303b"
+string(8) "f48fbfbf"
+string(20) "2623583131303030303b"
+string(8) "f48fbfbf"
+string(20) "2623313131343131323b"

Modified: php/php-src/trunk/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c
===================================================================
--- php/php-src/trunk/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c	2009-07-29 04:29:30 UTC (rev 286482)
+++ php/php-src/trunk/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c	2009-07-29 04:44:08 UTC (rev 286483)
@@ -186,18 +186,58 @@
 		}
 	} else {
 		if (c == ';') {
-			buffer[filter->status] = 0;
 			if (buffer[1]=='#') {
-				/* numeric entity */
-				for (pos=2; pos<filter->status; pos++) {
-					ent = ent*10 + (buffer[pos] - '0');
+				if (filter->status > 2 && (buffer[2] == 'x' || buffer[2] == 'X')) {
+					if (filter->status > 3) {
+						/* numeric entity */
+						for (pos=3; pos<filter->status; pos++) {
+							int v =  buffer[pos];
+							if (v >= '0' && v <= '9') {
+								v = v - '0';
+							} else if (v >= 'A' && v <= 'F') {
+								v = v - 'A' + 10;
+							} else if (v >= 'a' && v <= 'f') {
+								v = v - 'a' + 10;
+							} else {
+								ent = -1;
+								break;
+							}
+							ent = ent * 16 + v;
+						}
+					} else {
+						ent = -1;
+					}
+				} else {
+					/* numeric entity */
+					if (filter->status > 2) {
+						for (pos=2; pos<filter->status; pos++) {
+							int v = buffer[pos];
+							if (v >= '0' && v <= '9') {
+								v = v - '0';
+							} else {
+								ent = -1;
+								break;
+							}
+							ent = ent*10 + v;
+						}
+					} else {
+						ent = -1;
+					}
 				}
-				CK((*filter->output_function)(ent, filter->data));
+				if (ent >= 0 && ent < 0x110000) {
+					CK((*filter->output_function)(ent, filter->data));
+				} else {
+					for (pos = 0; pos < filter->status; pos++) {
+						CK((*filter->output_function)(buffer[pos], filter->data));
+					}
+					CK((*filter->output_function)(c, filter->data));
+				}
 				filter->status = 0;
 				/*php_error_docref("ref.mbstring" TSRMLS_CC, E_NOTICE, "mbstring decoded '%s'=%d", buffer, ent);*/
 			} else {
 				/* named entity */
-			        entity = (mbfl_html_entity_entry *)mbfl_html_entity_list;
+				buffer[filter->status] = 0;
+				entity = (mbfl_html_entity_entry *)mbfl_html_entity_list;
 				while (entity->name) {
 					if (!strcmp(buffer+1, entity->name))	{
 						ent = entity->code;

Added: php/php-src/trunk/ext/mbstring/tests/bug48645.phpt
===================================================================
--- php/php-src/trunk/ext/mbstring/tests/bug48645.phpt	                        (rev 0)
+++ php/php-src/trunk/ext/mbstring/tests/bug48645.phpt	2009-07-29 04:44:08 UTC (rev 286483)
@@ -0,0 +1,162 @@
+--TEST--
+Bug #48645 (mb_convert_encoding() doesn't understand hexadecimal html-entities)
+--SKIPIF--
+<?php extension_loaded('mbstring') or die('skip mbstring not available'); ?>
+--FILE--
+<?php
+var_dump(bin2hex(mb_convert_encoding("&#x0;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x1;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x2;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x3;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x4;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x5;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x6;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x7;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x8;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x9;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xA;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xB;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xC;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xD;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xE;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xF;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xa;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xb;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xc;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xd;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xe;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xf;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x/;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x:;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x@;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x`;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xG;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#xg;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X0;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X1;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X2;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X3;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X4;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X5;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X6;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X7;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X8;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X9;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#XA;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#XB;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#XC;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#XD;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#XE;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#XF;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#Xa;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#Xb;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#Xc;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#Xd;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#Xe;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#Xf;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X/;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X:;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X@;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X`;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#XG;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#Xg;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#0;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#1;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#2;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#3;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#4;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#5;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#6;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#7;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#8;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#9;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#/;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#:;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x10ffff;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#x110000;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X10ffff;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#X110000;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#1114111;", "UTF-8", "HTML-ENTITIES")));
+var_dump(bin2hex(mb_convert_encoding("&#1114112;", "UTF-8", "HTML-ENTITIES")));
+?>
+--EXPECT--
+string(2) "00"
+string(2) "01"
+string(2) "02"
+string(2) "03"
+string(2) "04"
+string(2) "05"
+string(2) "06"
+string(2) "07"
+string(2) "08"
+string(2) "09"
+string(2) "0a"
+string(2) "0b"
+string(2) "0c"
+string(2) "0d"
+string(2) "0e"
+string(2) "0f"
+string(2) "0a"
+string(2) "0b"
+string(2) "0c"
+string(2) "0d"
+string(2) "0e"
+string(2) "0f"
+string(10) "2623782f3b"
+string(10) "2623783a3b"
+string(10) "262378403b"
+string(10) "262378603b"
+string(10) "262378473b"
+string(10) "262378673b"
+string(8) "2623783b"
+string(2) "00"
+string(2) "01"
+string(2) "02"
+string(2) "03"
+string(2) "04"
+string(2) "05"
+string(2) "06"
+string(2) "07"
+string(2) "08"
+string(2) "09"
+string(2) "0a"
+string(2) "0b"
+string(2) "0c"
+string(2) "0d"
+string(2) "0e"
+string(2) "0f"
+string(2) "0a"
+string(2) "0b"
+string(2) "0c"
+string(2) "0d"
+string(2) "0e"
+string(2) "0f"
+string(10) "2623582f3b"
+string(10) "2623583a3b"
+string(10) "262358403b"
+string(10) "262358603b"
+string(10) "262358473b"
+string(10) "262358673b"
+string(8) "2623583b"
+string(2) "00"
+string(2) "01"
+string(2) "02"
+string(2) "03"
+string(2) "04"
+string(2) "05"
+string(2) "06"
+string(2) "07"
+string(2) "08"
+string(2) "09"
+string(8) "26232f3b"
+string(8) "26233a3b"
+string(6) "26233b"
+string(8) "f48fbfbf"
+string(20) "2623783131303030303b"
+string(8) "f48fbfbf"
+string(20) "2623583131303030303b"
+string(8) "f48fbfbf"
+string(20) "2623313131343131323b"
-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to