moriyoshi Wed, 29 Jul 2009 04:44:08 +0000 Revision: http://svn.php.net/viewvc?view=revision&revision=286483
Log: * Fix bug #48645 (mb_convert_encoding() doesn't understand hexadecimal html-entities) Bug: http://bugs.php.net/48645 (Assigned) mb_convert_encoding() doesn't understand hexadecimal html-entities Changed paths: _U php/php-src/branches/PHP_5_2/ U php/php-src/branches/PHP_5_2/NEWS U php/php-src/branches/PHP_5_2/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c A php/php-src/branches/PHP_5_2/ext/mbstring/tests/bug48645.phpt _U php/php-src/branches/PHP_5_3/ U php/php-src/branches/PHP_5_3/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c A php/php-src/branches/PHP_5_3/ext/mbstring/tests/bug48645.phpt U php/php-src/trunk/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c A php/php-src/trunk/ext/mbstring/tests/bug48645.phpt
Property changes on: php/php-src/branches/PHP_5_2 ___________________________________________________________________ Modified: svn:mergeinfo - /php/php-src/branches/PHP_5_3:284120 + /php/php-src/branches/PHP_5_3:284120 /php/php-src/trunk:284726 Modified: php/php-src/branches/PHP_5_2/NEWS =================================================================== --- php/php-src/branches/PHP_5_2/NEWS 2009-07-29 04:29:30 UTC (rev 286482) +++ php/php-src/branches/PHP_5_2/NEWS 2009-07-29 04:44:08 UTC (rev 286483) @@ -1,6 +1,7 @@ PHP NEWS ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ?? ??? 2009, PHP 5.2.11 +- Fixed bug #48645 (mb_convert_encoding() doesn't understand hexadecimal html-entities). (Moriyoshi) - Fixed regression in cURL extension that prevented flush of data to output defined as a file handle. (Ilia) Modified: php/php-src/branches/PHP_5_2/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c =================================================================== --- php/php-src/branches/PHP_5_2/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c 2009-07-29 04:29:30 UTC (rev 286482) +++ php/php-src/branches/PHP_5_2/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c 2009-07-29 04:44:08 UTC (rev 286483) @@ -186,18 +186,58 @@ } } else { if (c == ';') { - buffer[filter->status] = 0; if (buffer[1]=='#') { - /* numeric entity */ - for (pos=2; pos<filter->status; pos++) { - ent = ent*10 + (buffer[pos] - '0'); + if (filter->status > 2 && (buffer[2] == 'x' || buffer[2] == 'X')) { + if (filter->status > 3) { + /* numeric entity */ + for (pos=3; pos<filter->status; pos++) { + int v = buffer[pos]; + if (v >= '0' && v <= '9') { + v = v - '0'; + } else if (v >= 'A' && v <= 'F') { + v = v - 'A' + 10; + } else if (v >= 'a' && v <= 'f') { + v = v - 'a' + 10; + } else { + ent = -1; + break; + } + ent = ent * 16 + v; + } + } else { + ent = -1; + } + } else { + /* numeric entity */ + if (filter->status > 2) { + for (pos=2; pos<filter->status; pos++) { + int v = buffer[pos]; + if (v >= '0' && v <= '9') { + v = v - '0'; + } else { + ent = -1; + break; + } + ent = ent*10 + v; + } + } else { + ent = -1; + } } - CK((*filter->output_function)(ent, filter->data)); + if (ent >= 0 && ent < 0x110000) { + CK((*filter->output_function)(ent, filter->data)); + } else { + for (pos = 0; pos < filter->status; pos++) { + CK((*filter->output_function)(buffer[pos], filter->data)); + } + CK((*filter->output_function)(c, filter->data)); + } filter->status = 0; /*php_error_docref("ref.mbstring" TSRMLS_CC, E_NOTICE, "mbstring decoded '%s'=%d", buffer, ent);*/ } else { /* named entity */ - entity = (mbfl_html_entity_entry *)mbfl_html_entity_list; + buffer[filter->status] = 0; + entity = (mbfl_html_entity_entry *)mbfl_html_entity_list; while (entity->name) { if (!strcmp(buffer+1, entity->name)) { ent = entity->code; Added: php/php-src/branches/PHP_5_2/ext/mbstring/tests/bug48645.phpt =================================================================== --- php/php-src/branches/PHP_5_2/ext/mbstring/tests/bug48645.phpt (rev 0) +++ php/php-src/branches/PHP_5_2/ext/mbstring/tests/bug48645.phpt 2009-07-29 04:44:08 UTC (rev 286483) @@ -0,0 +1,162 @@ +--TEST-- +Bug #48645 (mb_convert_encoding() doesn't understand hexadecimal html-entities) +--SKIPIF-- +<?php extension_loaded('mbstring') or die('skip mbstring not available'); ?> +--FILE-- +<?php +var_dump(bin2hex(mb_convert_encoding("�", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("	", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("
", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("
", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("
", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("
", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#x/;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#x:;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#x@;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#x`;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#xG;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#xg;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#x;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("�", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("	", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("
", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("
", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("
", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("
", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#X/;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#X:;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#X@;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#X`;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#XG;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#Xg;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#X;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("�", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("	", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#/;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#:;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("�", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("�", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("�", "UTF-8", "HTML-ENTITIES"))); +?> +--EXPECT-- +string(2) "00" +string(2) "01" +string(2) "02" +string(2) "03" +string(2) "04" +string(2) "05" +string(2) "06" +string(2) "07" +string(2) "08" +string(2) "09" +string(2) "0a" +string(2) "0b" +string(2) "0c" +string(2) "0d" +string(2) "0e" +string(2) "0f" +string(2) "0a" +string(2) "0b" +string(2) "0c" +string(2) "0d" +string(2) "0e" +string(2) "0f" +string(10) "2623782f3b" +string(10) "2623783a3b" +string(10) "262378403b" +string(10) "262378603b" +string(10) "262378473b" +string(10) "262378673b" +string(8) "2623783b" +string(2) "00" +string(2) "01" +string(2) "02" +string(2) "03" +string(2) "04" +string(2) "05" +string(2) "06" +string(2) "07" +string(2) "08" +string(2) "09" +string(2) "0a" +string(2) "0b" +string(2) "0c" +string(2) "0d" +string(2) "0e" +string(2) "0f" +string(2) "0a" +string(2) "0b" +string(2) "0c" +string(2) "0d" +string(2) "0e" +string(2) "0f" +string(10) "2623582f3b" +string(10) "2623583a3b" +string(10) "262358403b" +string(10) "262358603b" +string(10) "262358473b" +string(10) "262358673b" +string(8) "2623583b" +string(2) "00" +string(2) "01" +string(2) "02" +string(2) "03" +string(2) "04" +string(2) "05" +string(2) "06" +string(2) "07" +string(2) "08" +string(2) "09" +string(8) "26232f3b" +string(8) "26233a3b" +string(6) "26233b" +string(8) "f48fbfbf" +string(20) "2623783131303030303b" +string(8) "f48fbfbf" +string(20) "2623583131303030303b" +string(8) "f48fbfbf" +string(20) "2623313131343131323b" Property changes on: php/php-src/branches/PHP_5_3 ___________________________________________________________________ Added: svn:mergeinfo + /php/php-src/trunk:284726 Modified: php/php-src/branches/PHP_5_3/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c =================================================================== --- php/php-src/branches/PHP_5_3/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c 2009-07-29 04:29:30 UTC (rev 286482) +++ php/php-src/branches/PHP_5_3/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c 2009-07-29 04:44:08 UTC (rev 286483) @@ -186,18 +186,58 @@ } } else { if (c == ';') { - buffer[filter->status] = 0; if (buffer[1]=='#') { - /* numeric entity */ - for (pos=2; pos<filter->status; pos++) { - ent = ent*10 + (buffer[pos] - '0'); + if (filter->status > 2 && (buffer[2] == 'x' || buffer[2] == 'X')) { + if (filter->status > 3) { + /* numeric entity */ + for (pos=3; pos<filter->status; pos++) { + int v = buffer[pos]; + if (v >= '0' && v <= '9') { + v = v - '0'; + } else if (v >= 'A' && v <= 'F') { + v = v - 'A' + 10; + } else if (v >= 'a' && v <= 'f') { + v = v - 'a' + 10; + } else { + ent = -1; + break; + } + ent = ent * 16 + v; + } + } else { + ent = -1; + } + } else { + /* numeric entity */ + if (filter->status > 2) { + for (pos=2; pos<filter->status; pos++) { + int v = buffer[pos]; + if (v >= '0' && v <= '9') { + v = v - '0'; + } else { + ent = -1; + break; + } + ent = ent*10 + v; + } + } else { + ent = -1; + } } - CK((*filter->output_function)(ent, filter->data)); + if (ent >= 0 && ent < 0x110000) { + CK((*filter->output_function)(ent, filter->data)); + } else { + for (pos = 0; pos < filter->status; pos++) { + CK((*filter->output_function)(buffer[pos], filter->data)); + } + CK((*filter->output_function)(c, filter->data)); + } filter->status = 0; /*php_error_docref("ref.mbstring" TSRMLS_CC, E_NOTICE, "mbstring decoded '%s'=%d", buffer, ent);*/ } else { /* named entity */ - entity = (mbfl_html_entity_entry *)mbfl_html_entity_list; + buffer[filter->status] = 0; + entity = (mbfl_html_entity_entry *)mbfl_html_entity_list; while (entity->name) { if (!strcmp(buffer+1, entity->name)) { ent = entity->code; Added: php/php-src/branches/PHP_5_3/ext/mbstring/tests/bug48645.phpt =================================================================== --- php/php-src/branches/PHP_5_3/ext/mbstring/tests/bug48645.phpt (rev 0) +++ php/php-src/branches/PHP_5_3/ext/mbstring/tests/bug48645.phpt 2009-07-29 04:44:08 UTC (rev 286483) @@ -0,0 +1,162 @@ +--TEST-- +Bug #48645 (mb_convert_encoding() doesn't understand hexadecimal html-entities) +--SKIPIF-- +<?php extension_loaded('mbstring') or die('skip mbstring not available'); ?> +--FILE-- +<?php +var_dump(bin2hex(mb_convert_encoding("�", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("	", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("
", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("
", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("
", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("
", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#x/;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#x:;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#x@;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#x`;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#xG;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#xg;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#x;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("�", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("	", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("
", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("
", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("
", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("
", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#X/;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#X:;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#X@;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#X`;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#XG;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#Xg;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#X;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("�", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("	", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#/;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#:;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("�", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("�", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("�", "UTF-8", "HTML-ENTITIES"))); +?> +--EXPECT-- +string(2) "00" +string(2) "01" +string(2) "02" +string(2) "03" +string(2) "04" +string(2) "05" +string(2) "06" +string(2) "07" +string(2) "08" +string(2) "09" +string(2) "0a" +string(2) "0b" +string(2) "0c" +string(2) "0d" +string(2) "0e" +string(2) "0f" +string(2) "0a" +string(2) "0b" +string(2) "0c" +string(2) "0d" +string(2) "0e" +string(2) "0f" +string(10) "2623782f3b" +string(10) "2623783a3b" +string(10) "262378403b" +string(10) "262378603b" +string(10) "262378473b" +string(10) "262378673b" +string(8) "2623783b" +string(2) "00" +string(2) "01" +string(2) "02" +string(2) "03" +string(2) "04" +string(2) "05" +string(2) "06" +string(2) "07" +string(2) "08" +string(2) "09" +string(2) "0a" +string(2) "0b" +string(2) "0c" +string(2) "0d" +string(2) "0e" +string(2) "0f" +string(2) "0a" +string(2) "0b" +string(2) "0c" +string(2) "0d" +string(2) "0e" +string(2) "0f" +string(10) "2623582f3b" +string(10) "2623583a3b" +string(10) "262358403b" +string(10) "262358603b" +string(10) "262358473b" +string(10) "262358673b" +string(8) "2623583b" +string(2) "00" +string(2) "01" +string(2) "02" +string(2) "03" +string(2) "04" +string(2) "05" +string(2) "06" +string(2) "07" +string(2) "08" +string(2) "09" +string(8) "26232f3b" +string(8) "26233a3b" +string(6) "26233b" +string(8) "f48fbfbf" +string(20) "2623783131303030303b" +string(8) "f48fbfbf" +string(20) "2623583131303030303b" +string(8) "f48fbfbf" +string(20) "2623313131343131323b" Modified: php/php-src/trunk/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c =================================================================== --- php/php-src/trunk/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c 2009-07-29 04:29:30 UTC (rev 286482) +++ php/php-src/trunk/ext/mbstring/libmbfl/filters/mbfilter_htmlent.c 2009-07-29 04:44:08 UTC (rev 286483) @@ -186,18 +186,58 @@ } } else { if (c == ';') { - buffer[filter->status] = 0; if (buffer[1]=='#') { - /* numeric entity */ - for (pos=2; pos<filter->status; pos++) { - ent = ent*10 + (buffer[pos] - '0'); + if (filter->status > 2 && (buffer[2] == 'x' || buffer[2] == 'X')) { + if (filter->status > 3) { + /* numeric entity */ + for (pos=3; pos<filter->status; pos++) { + int v = buffer[pos]; + if (v >= '0' && v <= '9') { + v = v - '0'; + } else if (v >= 'A' && v <= 'F') { + v = v - 'A' + 10; + } else if (v >= 'a' && v <= 'f') { + v = v - 'a' + 10; + } else { + ent = -1; + break; + } + ent = ent * 16 + v; + } + } else { + ent = -1; + } + } else { + /* numeric entity */ + if (filter->status > 2) { + for (pos=2; pos<filter->status; pos++) { + int v = buffer[pos]; + if (v >= '0' && v <= '9') { + v = v - '0'; + } else { + ent = -1; + break; + } + ent = ent*10 + v; + } + } else { + ent = -1; + } } - CK((*filter->output_function)(ent, filter->data)); + if (ent >= 0 && ent < 0x110000) { + CK((*filter->output_function)(ent, filter->data)); + } else { + for (pos = 0; pos < filter->status; pos++) { + CK((*filter->output_function)(buffer[pos], filter->data)); + } + CK((*filter->output_function)(c, filter->data)); + } filter->status = 0; /*php_error_docref("ref.mbstring" TSRMLS_CC, E_NOTICE, "mbstring decoded '%s'=%d", buffer, ent);*/ } else { /* named entity */ - entity = (mbfl_html_entity_entry *)mbfl_html_entity_list; + buffer[filter->status] = 0; + entity = (mbfl_html_entity_entry *)mbfl_html_entity_list; while (entity->name) { if (!strcmp(buffer+1, entity->name)) { ent = entity->code; Added: php/php-src/trunk/ext/mbstring/tests/bug48645.phpt =================================================================== --- php/php-src/trunk/ext/mbstring/tests/bug48645.phpt (rev 0) +++ php/php-src/trunk/ext/mbstring/tests/bug48645.phpt 2009-07-29 04:44:08 UTC (rev 286483) @@ -0,0 +1,162 @@ +--TEST-- +Bug #48645 (mb_convert_encoding() doesn't understand hexadecimal html-entities) +--SKIPIF-- +<?php extension_loaded('mbstring') or die('skip mbstring not available'); ?> +--FILE-- +<?php +var_dump(bin2hex(mb_convert_encoding("�", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("	", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("
", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("
", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("
", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("
", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#x/;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#x:;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#x@;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#x`;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#xG;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#xg;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#x;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("�", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("	", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("
", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("
", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("
", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("
", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#X/;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#X:;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#X@;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#X`;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#XG;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#Xg;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#X;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("�", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("	", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#/;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#:;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("&#;", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("�", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("�", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("", "UTF-8", "HTML-ENTITIES"))); +var_dump(bin2hex(mb_convert_encoding("�", "UTF-8", "HTML-ENTITIES"))); +?> +--EXPECT-- +string(2) "00" +string(2) "01" +string(2) "02" +string(2) "03" +string(2) "04" +string(2) "05" +string(2) "06" +string(2) "07" +string(2) "08" +string(2) "09" +string(2) "0a" +string(2) "0b" +string(2) "0c" +string(2) "0d" +string(2) "0e" +string(2) "0f" +string(2) "0a" +string(2) "0b" +string(2) "0c" +string(2) "0d" +string(2) "0e" +string(2) "0f" +string(10) "2623782f3b" +string(10) "2623783a3b" +string(10) "262378403b" +string(10) "262378603b" +string(10) "262378473b" +string(10) "262378673b" +string(8) "2623783b" +string(2) "00" +string(2) "01" +string(2) "02" +string(2) "03" +string(2) "04" +string(2) "05" +string(2) "06" +string(2) "07" +string(2) "08" +string(2) "09" +string(2) "0a" +string(2) "0b" +string(2) "0c" +string(2) "0d" +string(2) "0e" +string(2) "0f" +string(2) "0a" +string(2) "0b" +string(2) "0c" +string(2) "0d" +string(2) "0e" +string(2) "0f" +string(10) "2623582f3b" +string(10) "2623583a3b" +string(10) "262358403b" +string(10) "262358603b" +string(10) "262358473b" +string(10) "262358673b" +string(8) "2623583b" +string(2) "00" +string(2) "01" +string(2) "02" +string(2) "03" +string(2) "04" +string(2) "05" +string(2) "06" +string(2) "07" +string(2) "08" +string(2) "09" +string(8) "26232f3b" +string(8) "26233a3b" +string(6) "26233b" +string(8) "f48fbfbf" +string(20) "2623783131303030303b" +string(8) "f48fbfbf" +string(20) "2623583131303030303b" +string(8) "f48fbfbf" +string(20) "2623313131343131323b"
-- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php