cataphract Tue, 25 Jan 2011 10:57:07 +0000 Revision: http://svn.php.net/viewvc?view=revision&revision=307728
Log: - Fixed CHARSET_UNICODE_COMPAT (ISO-8859-1 is compatible in the relevant sense). - Fixed usage of zend_multibyte_get_internal_encoding (its return cannot be cast to char*). - Change tests to reflect that charset detection now relies on internal_encoding, not on current_internal_encoding. NOTE: This fixes the changes in rev 306077, but it remains that that change introduced a BC break. I assumed it was intentional Changed paths: U php/php-src/trunk/UPGRADING U php/php-src/trunk/ext/standard/html.c U php/php-src/trunk/ext/standard/html_tables/html_table_gen.php U php/php-src/trunk/ext/standard/html_tables.h U php/php-src/trunk/ext/standard/tests/strings/htmlentities05.phpt U php/php-src/trunk/ext/standard/tests/strings/htmlentities06.phpt U php/php-src/trunk/ext/standard/tests/strings/htmlentities07.phpt U php/php-src/trunk/ext/standard/tests/strings/htmlentities08.phpt U php/php-src/trunk/ext/standard/tests/strings/htmlentities09.phpt U php/php-src/trunk/ext/standard/tests/strings/htmlentities16.phpt
Modified: php/php-src/trunk/UPGRADING =================================================================== --- php/php-src/trunk/UPGRADING 2011-01-25 05:47:58 UTC (rev 307727) +++ php/php-src/trunk/UPGRADING 2011-01-25 10:57:07 UTC (rev 307728) @@ -148,6 +148,9 @@ behavior follows the recommendations of Unicode Technical Report #36. - htmlspecialchars_decode/html_entity_decode now decode ' if the document type is ENT_XML1, ENT_XHTML, or ENT_HTML5. +- Charset detection with $charset == '' no longer turns to mbstring's + internal encoding defined through mb_internal_encoding(). Only the encoding + defined through the ini setting mbstring.internal_encoding is considered. - number_format() no longer truncates multibyte decimal points and thousand separators to the first byte. - The third parameter ($matches) to preg_match_all() is now optional. If Modified: php/php-src/trunk/ext/standard/html.c =================================================================== --- php/php-src/trunk/ext/standard/html.c 2011-01-25 05:47:58 UTC (rev 307727) +++ php/php-src/trunk/ext/standard/html.c 2011-01-25 10:57:07 UTC (rev 307728) @@ -367,6 +367,7 @@ int i; enum entity_charset charset = cs_utf_8; int len = 0; + const zend_encoding *zenc; /* Default is now UTF-8 */ if (charset_hint == NULL) @@ -376,9 +377,20 @@ goto det_charset; } - charset_hint = (char*)zend_multibyte_get_internal_encoding(TSRMLS_C); - if (charset_hint != NULL && (len=strlen(charset_hint)) != 0) { - goto det_charset; + zenc = zend_multibyte_get_internal_encoding(TSRMLS_C); + if (zenc != NULL) { + charset_hint = zend_multibyte_get_encoding_name(zenc); + if (charset_hint != NULL && (len=strlen(charset_hint)) != 0) { + if ((len == 4) /* sizeof (none|auto|pass) */ && + (!memcmp("pass", charset_hint, 4) || + !memcmp("auto", charset_hint, 4) || + !memcmp("auto", charset_hint, 4))) { + charset_hint = NULL; + len = 0; + } else { + goto det_charset; + } + } } charset_hint = SG(default_charset); Modified: php/php-src/trunk/ext/standard/html_tables/html_table_gen.php =================================================================== --- php/php-src/trunk/ext/standard/html_tables/html_table_gen.php 2011-01-25 05:47:58 UTC (rev 307727) +++ php/php-src/trunk/ext/standard/html_tables/html_table_gen.php 2011-01-25 10:57:07 UTC (rev 307728) @@ -56,7 +56,7 @@ cs_gb2312, cs_big5hkscs, cs_sjis, cs_eucjp, cs_numelems /* used to count the number of charsets */ }; -#define CHARSET_UNICODE_COMPAT(cs) ((cs) <= cs_utf_8) +#define CHARSET_UNICODE_COMPAT(cs) ((cs) <= cs_8859_1) #define CHARSET_SINGLE_BYTE(cs) ((cs) > cs_utf_8 && (cs) < cs_big5) #define CHARSET_PARTIAL_SUPPORT(cs) ((cs) >= cs_big5) Modified: php/php-src/trunk/ext/standard/html_tables.h =================================================================== --- php/php-src/trunk/ext/standard/html_tables.h 2011-01-25 05:47:58 UTC (rev 307727) +++ php/php-src/trunk/ext/standard/html_tables.h 2011-01-25 10:57:07 UTC (rev 307728) @@ -33,7 +33,7 @@ cs_gb2312, cs_big5hkscs, cs_sjis, cs_eucjp, cs_numelems /* used to count the number of charsets */ }; -#define CHARSET_UNICODE_COMPAT(cs) ((cs) <= cs_utf_8) +#define CHARSET_UNICODE_COMPAT(cs) ((cs) <= cs_8859_1) #define CHARSET_SINGLE_BYTE(cs) ((cs) > cs_utf_8 && (cs) < cs_big5) #define CHARSET_PARTIAL_SUPPORT(cs) ((cs) >= cs_big5) Modified: php/php-src/trunk/ext/standard/tests/strings/htmlentities05.phpt =================================================================== --- php/php-src/trunk/ext/standard/tests/strings/htmlentities05.phpt 2011-01-25 05:47:58 UTC (rev 307727) +++ php/php-src/trunk/ext/standard/tests/strings/htmlentities05.phpt 2011-01-25 10:57:07 UTC (rev 307728) @@ -2,19 +2,12 @@ htmlentities() test 5 (mbstring / cp1252) --INI-- output_handler= +mbstring.internal_encoding=cp1252 --SKIPIF-- <?php extension_loaded("mbstring") or die("skip mbstring not available\n"); - mb_internal_encoding('cp1252'); - $php_errormsg = NULL; - @htmlentities("\x82\x86\x99\x9f", ENT_QUOTES, ''); - if ($php_errormsg) { - die("skip cp1252 chracter set is not supported on this platform.\n"); - } -?> --FILE-- <?php - mb_internal_encoding('cp1252'); print mb_internal_encoding()."\n"; var_dump(htmlentities("\x82\x86\x99\x9f", ENT_QUOTES, '')); var_dump(htmlentities("\x80\xa2\xa3\xa4\xa5", ENT_QUOTES, '')); Modified: php/php-src/trunk/ext/standard/tests/strings/htmlentities06.phpt =================================================================== --- php/php-src/trunk/ext/standard/tests/strings/htmlentities06.phpt 2011-01-25 05:47:58 UTC (rev 307727) +++ php/php-src/trunk/ext/standard/tests/strings/htmlentities06.phpt 2011-01-25 10:57:07 UTC (rev 307728) @@ -2,15 +2,10 @@ htmlentities() test 6 (mbstring / ISO-8859-15) --INI-- output_handler= +mbstring.internal_encoding=ISO-8859-15 --SKIPIF-- <?php extension_loaded("mbstring") or die("skip mbstring not available\n"); - @mb_internal_encoding('ISO-8859-15'); - @htmlentities("\xbc\xbd\xbe", ENT_QUOTES, ''); - if (@$php_errormsg) { - die("skip ISO-8859-15 chracter set is not supported on this platform.\n"); - } -?> --FILE-- <?php mb_internal_encoding('ISO-8859-15'); Modified: php/php-src/trunk/ext/standard/tests/strings/htmlentities07.phpt =================================================================== --- php/php-src/trunk/ext/standard/tests/strings/htmlentities07.phpt 2011-01-25 05:47:58 UTC (rev 307727) +++ php/php-src/trunk/ext/standard/tests/strings/htmlentities07.phpt 2011-01-25 10:57:07 UTC (rev 307728) @@ -2,16 +2,10 @@ htmlentities() test 7 (mbstring / ISO-8859-1) --INI-- output_handler= +mbstring.internal_encoding=ISO-8859-1 --SKIPIF-- <?php extension_loaded("mbstring") or die("skip mbstring not available\n"); - mb_internal_encoding('ISO-8859-1'); - $php_errormsg = NULL; - @htmlentities("\xe4\xf6\xfc", ENT_QUOTES, ''); - if ($php_errormsg) { - die("skip ISO-8859-1 chracter set is not supported on this platform.\n"); - } -?> --FILE-- <?php mb_internal_encoding('ISO-8859-1'); Modified: php/php-src/trunk/ext/standard/tests/strings/htmlentities08.phpt =================================================================== --- php/php-src/trunk/ext/standard/tests/strings/htmlentities08.phpt 2011-01-25 05:47:58 UTC (rev 307727) +++ php/php-src/trunk/ext/standard/tests/strings/htmlentities08.phpt 2011-01-25 10:57:07 UTC (rev 307728) @@ -2,16 +2,11 @@ htmlentities() test 8 (mbstring / EUC-JP) --INI-- output_handler= +error_reporting=~E_STRICT +mbstring.internal_encoding=EUC-JP --SKIPIF-- <?php extension_loaded("mbstring") or die("skip mbstring not available\n"); - mb_internal_encoding('EUC-JP'); - $php_errormsg = NULL; - @htmlentities("\xa1\xa2\xa1\xa3\xa1\xa4", ENT_QUOTES, ''); - if ($php_errormsg) { - die("skip EUC-JP chracter set is not supported on this platform.\n"); - } -?> --FILE-- <?php mb_internal_encoding('EUC-JP'); Modified: php/php-src/trunk/ext/standard/tests/strings/htmlentities09.phpt =================================================================== --- php/php-src/trunk/ext/standard/tests/strings/htmlentities09.phpt 2011-01-25 05:47:58 UTC (rev 307727) +++ php/php-src/trunk/ext/standard/tests/strings/htmlentities09.phpt 2011-01-25 10:57:07 UTC (rev 307728) @@ -2,16 +2,11 @@ htmlentities() test 9 (mbstring / Shift_JIS) --INI-- output_handler= +error_reporting=~E_STRICT +mbstring.internal_encoding=Shift_JIS --SKIPIF-- <?php extension_loaded("mbstring") or die("skip mbstring not available\n"); - mb_internal_encoding('Shift_JIS'); - $php_errormsg = NULL; - @htmlentities("\x81\x41\x81\x42\x81\x43", ENT_QUOTES, ''); - if ($php_errormsg) { - die("skip Shift_JIS chracter set is not supported on this platform.\n"); - } -?> --FILE-- <?php mb_internal_encoding('Shift_JIS'); Modified: php/php-src/trunk/ext/standard/tests/strings/htmlentities16.phpt =================================================================== --- php/php-src/trunk/ext/standard/tests/strings/htmlentities16.phpt 2011-01-25 05:47:58 UTC (rev 307727) +++ php/php-src/trunk/ext/standard/tests/strings/htmlentities16.phpt 2011-01-25 10:57:07 UTC (rev 307728) @@ -2,17 +2,13 @@ htmlentities() test 16 (mbstring / cp1251) --INI-- output_handler= +mbstring.internal_encoding=cp1251 --SKIPIF-- <?php extension_loaded("mbstring") or die("skip mbstring not available\n"); - if (!@mb_internal_encoding('cp1251') || - @htmlentities("\x88\xa9\xd2\xcf\xd3\xcb\xcf\xdb\xce\xd9\xca", ENT_QUOTES, '') == '') { - die("skip cp1251 character set is not available in this build.\n"); - } ?> --FILE-- <?php -mb_internal_encoding('cp1251'); $str = "\x88\xa9\xf0\xee\xf1\xea\xee\xf8\xed\xfb\xe9"; var_dump(bin2hex($str), bin2hex(htmlentities($str, ENT_QUOTES, ''))); var_dump(htmlentities($str, ENT_QUOTES | ENT_HTML5, ''));
-- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php