rasmus Tue, 23 Mar 2010 18:08:06 +0000
Revision: http://svn.php.net/viewvc?view=revision&revision=296685
Log:
Switch default_charset, if not specified, from ISO-8859-1 to UTF-8
I have been wanting to make this change for years, but there is a small
chance of BC issues, so it shouldn't go into a minor release.
Changed paths:
U php/php-src/trunk/NEWS
U php/php-src/trunk/ext/standard/html.c
U php/php-src/trunk/ext/standard/tests/strings/bug44703.phpt
UU
php/php-src/trunk/ext/standard/tests/strings/get_html_translation_table_basic1.phpt
UU
php/php-src/trunk/ext/standard/tests/strings/get_html_translation_table_basic2.phpt
UU
php/php-src/trunk/ext/standard/tests/strings/get_html_translation_table_variation1.phpt
U php/php-src/trunk/ext/standard/tests/strings/htmlentities.phpt
U php/php-src/trunk/php.ini-development
U php/php-src/trunk/php.ini-production
Modified: php/php-src/trunk/NEWS
===================================================================
--- php/php-src/trunk/NEWS 2010-03-23 18:01:11 UTC (rev 296684)
+++ php/php-src/trunk/NEWS 2010-03-23 18:08:06 UTC (rev 296685)
@@ -1,6 +1,7 @@
PHP NEWS
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
?? ??? 201?, PHP 5.3.99
+- default_charset if not specified is now UTF-8 instead of ISO-8859-1. (Rasmus)
?? ??? 20??, PHP 5.3.3
- Upgraded bundled PCRE to version 8.01. (Ilia)
Modified: php/php-src/trunk/ext/standard/html.c
===================================================================
--- php/php-src/trunk/ext/standard/html.c 2010-03-23 18:01:11 UTC (rev
296684)
+++ php/php-src/trunk/ext/standard/html.c 2010-03-23 18:08:06 UTC (rev
296685)
@@ -711,17 +711,17 @@
/* {{{ entity_charset determine_charset
* returns the charset identifier based on current locale or a hint.
- * defaults to iso-8859-1 */
+ * defaults to UTF-8 */
static enum entity_charset determine_charset(char *charset_hint TSRMLS_DC)
{
int i;
- enum entity_charset charset = cs_8859_1;
+ enum entity_charset charset = cs_utf_8;
int len = 0;
zval *uf_result = NULL;
- /* Guarantee default behaviour for backwards compatibility */
+ /* Default is now UTF-8 */
if (charset_hint == NULL)
- return cs_8859_1;
+ return cs_utf_8;
if ((len = strlen(charset_hint)) != 0) {
goto det_charset;
@@ -855,7 +855,7 @@
}
}
if (!found) {
- php_error_docref(NULL TSRMLS_CC, E_WARNING, "charset
`%s' not supported, assuming iso-8859-1",
+ php_error_docref(NULL TSRMLS_CC, E_WARNING, "charset
`%s' not supported, assuming utf-8",
charset_hint);
}
}
Modified: php/php-src/trunk/ext/standard/tests/strings/bug44703.phpt
===================================================================
--- php/php-src/trunk/ext/standard/tests/strings/bug44703.phpt 2010-03-23
18:01:11 UTC (rev 296684)
+++ php/php-src/trunk/ext/standard/tests/strings/bug44703.phpt 2010-03-23
18:08:06 UTC (rev 296685)
@@ -22,25 +22,25 @@
?>
--EXPECTF--
-Warning: htmlspecialchars(): charset `1' not supported, assuming iso-8859-1 in
%s on line %d
+Warning: htmlspecialchars(): charset `1' not supported, assuming utf-8 in %s
on line %d
string(35) "<a href='test'>Test</a>"
-Warning: htmlspecialchars(): charset `12' not supported, assuming iso-8859-1
in %s on line %d
+Warning: htmlspecialchars(): charset `12' not supported, assuming utf-8 in %s
on line %d
string(35) "<a href='test'>Test</a>"
-Warning: htmlspecialchars(): charset `125' not supported, assuming iso-8859-1
in %s on line %d
+Warning: htmlspecialchars(): charset `125' not supported, assuming utf-8 in %s
on line %d
string(35) "<a href='test'>Test</a>"
string(35) "<a href='test'>Test</a>"
-Warning: htmlspecialchars(): charset `12526' not supported, assuming
iso-8859-1 in %s on line %d
+Warning: htmlspecialchars(): charset `12526' not supported, assuming utf-8 in
%s on line %d
string(35) "<a href='test'>Test</a>"
string(8) "<>"
-Warning: htmlspecialchars(): charset `8666' not supported, assuming iso-8859-1
in %s on line %d
+Warning: htmlspecialchars(): charset `8666' not supported, assuming utf-8 in
%s on line %d
string(8) "<>"
string(8) "<>"
string(8) "<>"
string(8) "<>"
-Warning: htmlspecialchars(): charset
`aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'
not supported, assuming iso-8859-1 in %s on line %d
+Warning: htmlspecialchars(): charset
`aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'
not supported, assuming utf-8 in %s on line %d
string(8) "<>"
Modified:
php/php-src/trunk/ext/standard/tests/strings/get_html_translation_table_basic1.phpt
===================================================================
(Binary files differ)
Property changes on:
php/php-src/trunk/ext/standard/tests/strings/get_html_translation_table_basic1.phpt
___________________________________________________________________
Deleted: svn:eol-style
- native
Added: mime-type
+ application/octet-stream
Added: svn:mime-type
+ application/octet-stream
Modified:
php/php-src/trunk/ext/standard/tests/strings/get_html_translation_table_basic2.phpt
===================================================================
(Binary files differ)
Property changes on:
php/php-src/trunk/ext/standard/tests/strings/get_html_translation_table_basic2.phpt
___________________________________________________________________
Deleted: svn:eol-style
- native
Added: svn:mime-type
+ application/octet-stream
Modified:
php/php-src/trunk/ext/standard/tests/strings/get_html_translation_table_variation1.phpt
===================================================================
(Binary files differ)
Property changes on:
php/php-src/trunk/ext/standard/tests/strings/get_html_translation_table_variation1.phpt
___________________________________________________________________
Deleted: svn:eol-style
- native
Added: svn:mime-type
+ application/octet-stream
Modified: php/php-src/trunk/ext/standard/tests/strings/htmlentities.phpt
===================================================================
--- php/php-src/trunk/ext/standard/tests/strings/htmlentities.phpt
2010-03-23 18:01:11 UTC (rev 296684)
+++ php/php-src/trunk/ext/standard/tests/strings/htmlentities.phpt
2010-03-23 18:08:06 UTC (rev 296685)
@@ -1,16 +1,16 @@
--TEST--
-HTML entities
+HTML entities of ISO-8859 chars
--INI--
output_handler=
--FILE--
<?php
setlocale (LC_CTYPE, "C");
-$sc_encoded = htmlspecialchars ("<>\"&��\n");
+$sc_encoded = htmlspecialchars ("<>\"&��\n",ENT_COMPAT,"ISO-8859-1");
echo $sc_encoded;
-$ent_encoded = htmlentities ("<>\"&��\n");
+$ent_encoded = htmlentities ("<>\"&��\n",ENT_COMPAT,"ISO-8859-1");
echo $ent_encoded;
-echo html_entity_decode($sc_encoded);
-echo html_entity_decode($ent_encoded);
+echo html_entity_decode($sc_encoded,ENT_COMPAT,"ISO-8859-1");
+echo html_entity_decode($ent_encoded,ENT_COMPAT,"ISO-8859-1");
?>
--EXPECT--
<>"&��
Modified: php/php-src/trunk/php.ini-development
===================================================================
--- php/php-src/trunk/php.ini-development 2010-03-23 18:01:11 UTC (rev
296684)
+++ php/php-src/trunk/php.ini-development 2010-03-23 18:08:06 UTC (rev
296685)
@@ -769,7 +769,7 @@
; PHP's default character set is set to empty.
; http://php.net/default-charset
-;default_charset = "iso-8859-1"
+;default_charset = "UTF-8"
; Always populate the $HTTP_RAW_POST_DATA variable. PHP's default behavior is
; to disable this feature.
Modified: php/php-src/trunk/php.ini-production
===================================================================
--- php/php-src/trunk/php.ini-production 2010-03-23 18:01:11 UTC (rev
296684)
+++ php/php-src/trunk/php.ini-production 2010-03-23 18:08:06 UTC (rev
296685)
@@ -769,7 +769,7 @@
; PHP's default character set is set to empty.
; http://php.net/default-charset
-;default_charset = "iso-8859-1"
+;default_charset = "UTF-8"
; Always populate the $HTTP_RAW_POST_DATA variable. PHP's default behavior is
; to disable this feature.
--
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php