rasmus Tue, 23 Mar 2010 18:08:06 +0000 Revision: http://svn.php.net/viewvc?view=revision&revision=296685
Log: Switch default_charset, if not specified, from ISO-8859-1 to UTF-8 I have been wanting to make this change for years, but there is a small chance of BC issues, so it shouldn't go into a minor release. Changed paths: U php/php-src/trunk/NEWS U php/php-src/trunk/ext/standard/html.c U php/php-src/trunk/ext/standard/tests/strings/bug44703.phpt UU php/php-src/trunk/ext/standard/tests/strings/get_html_translation_table_basic1.phpt UU php/php-src/trunk/ext/standard/tests/strings/get_html_translation_table_basic2.phpt UU php/php-src/trunk/ext/standard/tests/strings/get_html_translation_table_variation1.phpt U php/php-src/trunk/ext/standard/tests/strings/htmlentities.phpt U php/php-src/trunk/php.ini-development U php/php-src/trunk/php.ini-production Modified: php/php-src/trunk/NEWS =================================================================== --- php/php-src/trunk/NEWS 2010-03-23 18:01:11 UTC (rev 296684) +++ php/php-src/trunk/NEWS 2010-03-23 18:08:06 UTC (rev 296685) @@ -1,6 +1,7 @@ PHP NEWS ||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||| ?? ??? 201?, PHP 5.3.99 +- default_charset if not specified is now UTF-8 instead of ISO-8859-1. (Rasmus) ?? ??? 20??, PHP 5.3.3 - Upgraded bundled PCRE to version 8.01. (Ilia) Modified: php/php-src/trunk/ext/standard/html.c =================================================================== --- php/php-src/trunk/ext/standard/html.c 2010-03-23 18:01:11 UTC (rev 296684) +++ php/php-src/trunk/ext/standard/html.c 2010-03-23 18:08:06 UTC (rev 296685) @@ -711,17 +711,17 @@ /* {{{ entity_charset determine_charset * returns the charset identifier based on current locale or a hint. - * defaults to iso-8859-1 */ + * defaults to UTF-8 */ static enum entity_charset determine_charset(char *charset_hint TSRMLS_DC) { int i; - enum entity_charset charset = cs_8859_1; + enum entity_charset charset = cs_utf_8; int len = 0; zval *uf_result = NULL; - /* Guarantee default behaviour for backwards compatibility */ + /* Default is now UTF-8 */ if (charset_hint == NULL) - return cs_8859_1; + return cs_utf_8; if ((len = strlen(charset_hint)) != 0) { goto det_charset; @@ -855,7 +855,7 @@ } } if (!found) { - php_error_docref(NULL TSRMLS_CC, E_WARNING, "charset `%s' not supported, assuming iso-8859-1", + php_error_docref(NULL TSRMLS_CC, E_WARNING, "charset `%s' not supported, assuming utf-8", charset_hint); } } Modified: php/php-src/trunk/ext/standard/tests/strings/bug44703.phpt =================================================================== --- php/php-src/trunk/ext/standard/tests/strings/bug44703.phpt 2010-03-23 18:01:11 UTC (rev 296684) +++ php/php-src/trunk/ext/standard/tests/strings/bug44703.phpt 2010-03-23 18:08:06 UTC (rev 296685) @@ -22,25 +22,25 @@ ?> --EXPECTF-- -Warning: htmlspecialchars(): charset `1' not supported, assuming iso-8859-1 in %s on line %d +Warning: htmlspecialchars(): charset `1' not supported, assuming utf-8 in %s on line %d string(35) "<a href='test'>Test</a>" -Warning: htmlspecialchars(): charset `12' not supported, assuming iso-8859-1 in %s on line %d +Warning: htmlspecialchars(): charset `12' not supported, assuming utf-8 in %s on line %d string(35) "<a href='test'>Test</a>" -Warning: htmlspecialchars(): charset `125' not supported, assuming iso-8859-1 in %s on line %d +Warning: htmlspecialchars(): charset `125' not supported, assuming utf-8 in %s on line %d string(35) "<a href='test'>Test</a>" string(35) "<a href='test'>Test</a>" -Warning: htmlspecialchars(): charset `12526' not supported, assuming iso-8859-1 in %s on line %d +Warning: htmlspecialchars(): charset `12526' not supported, assuming utf-8 in %s on line %d string(35) "<a href='test'>Test</a>" string(8) "<>" -Warning: htmlspecialchars(): charset `8666' not supported, assuming iso-8859-1 in %s on line %d +Warning: htmlspecialchars(): charset `8666' not supported, assuming utf-8 in %s on line %d string(8) "<>" string(8) "<>" string(8) "<>" string(8) "<>" -Warning: htmlspecialchars(): charset `aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa' not supported, assuming iso-8859-1 in %s on line %d +Warning: htmlspecialchars(): charset `aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa' not supported, assuming utf-8 in %s on line %d string(8) "<>" Modified: php/php-src/trunk/ext/standard/tests/strings/get_html_translation_table_basic1.phpt =================================================================== (Binary files differ) Property changes on: php/php-src/trunk/ext/standard/tests/strings/get_html_translation_table_basic1.phpt ___________________________________________________________________ Deleted: svn:eol-style - native Added: mime-type + application/octet-stream Added: svn:mime-type + application/octet-stream Modified: php/php-src/trunk/ext/standard/tests/strings/get_html_translation_table_basic2.phpt =================================================================== (Binary files differ) Property changes on: php/php-src/trunk/ext/standard/tests/strings/get_html_translation_table_basic2.phpt ___________________________________________________________________ Deleted: svn:eol-style - native Added: svn:mime-type + application/octet-stream Modified: php/php-src/trunk/ext/standard/tests/strings/get_html_translation_table_variation1.phpt =================================================================== (Binary files differ) Property changes on: php/php-src/trunk/ext/standard/tests/strings/get_html_translation_table_variation1.phpt ___________________________________________________________________ Deleted: svn:eol-style - native Added: svn:mime-type + application/octet-stream Modified: php/php-src/trunk/ext/standard/tests/strings/htmlentities.phpt =================================================================== --- php/php-src/trunk/ext/standard/tests/strings/htmlentities.phpt 2010-03-23 18:01:11 UTC (rev 296684) +++ php/php-src/trunk/ext/standard/tests/strings/htmlentities.phpt 2010-03-23 18:08:06 UTC (rev 296685) @@ -1,16 +1,16 @@ --TEST-- -HTML entities +HTML entities of ISO-8859 chars --INI-- output_handler= --FILE-- <?php setlocale (LC_CTYPE, "C"); -$sc_encoded = htmlspecialchars ("<>\"&��\n"); +$sc_encoded = htmlspecialchars ("<>\"&��\n",ENT_COMPAT,"ISO-8859-1"); echo $sc_encoded; -$ent_encoded = htmlentities ("<>\"&��\n"); +$ent_encoded = htmlentities ("<>\"&��\n",ENT_COMPAT,"ISO-8859-1"); echo $ent_encoded; -echo html_entity_decode($sc_encoded); -echo html_entity_decode($ent_encoded); +echo html_entity_decode($sc_encoded,ENT_COMPAT,"ISO-8859-1"); +echo html_entity_decode($ent_encoded,ENT_COMPAT,"ISO-8859-1"); ?> --EXPECT-- <>"&�� Modified: php/php-src/trunk/php.ini-development =================================================================== --- php/php-src/trunk/php.ini-development 2010-03-23 18:01:11 UTC (rev 296684) +++ php/php-src/trunk/php.ini-development 2010-03-23 18:08:06 UTC (rev 296685) @@ -769,7 +769,7 @@ ; PHP's default character set is set to empty. ; http://php.net/default-charset -;default_charset = "iso-8859-1" +;default_charset = "UTF-8" ; Always populate the $HTTP_RAW_POST_DATA variable. PHP's default behavior is ; to disable this feature. Modified: php/php-src/trunk/php.ini-production =================================================================== --- php/php-src/trunk/php.ini-production 2010-03-23 18:01:11 UTC (rev 296684) +++ php/php-src/trunk/php.ini-production 2010-03-23 18:08:06 UTC (rev 296685) @@ -769,7 +769,7 @@ ; PHP's default character set is set to empty. ; http://php.net/default-charset -;default_charset = "iso-8859-1" +;default_charset = "UTF-8" ; Always populate the $HTTP_RAW_POST_DATA variable. PHP's default behavior is ; to disable this feature.
-- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php