rasmus                                   Tue, 23 Mar 2010 18:08:06 +0000

Revision: http://svn.php.net/viewvc?view=revision&revision=296685

Log:
Switch default_charset, if not specified, from ISO-8859-1 to UTF-8
I have been wanting to make this change for years, but there is a small
chance of BC issues, so it shouldn't go into a minor release.

Changed paths:
    U   php/php-src/trunk/NEWS
    U   php/php-src/trunk/ext/standard/html.c
    U   php/php-src/trunk/ext/standard/tests/strings/bug44703.phpt
    UU  
php/php-src/trunk/ext/standard/tests/strings/get_html_translation_table_basic1.phpt
    UU  
php/php-src/trunk/ext/standard/tests/strings/get_html_translation_table_basic2.phpt
    UU  
php/php-src/trunk/ext/standard/tests/strings/get_html_translation_table_variation1.phpt
    U   php/php-src/trunk/ext/standard/tests/strings/htmlentities.phpt
    U   php/php-src/trunk/php.ini-development
    U   php/php-src/trunk/php.ini-production

Modified: php/php-src/trunk/NEWS
===================================================================
--- php/php-src/trunk/NEWS      2010-03-23 18:01:11 UTC (rev 296684)
+++ php/php-src/trunk/NEWS      2010-03-23 18:08:06 UTC (rev 296685)
@@ -1,6 +1,7 @@
 PHP                                                                        NEWS
 |||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
 ?? ??? 201?, PHP 5.3.99
+- default_charset if not specified is now UTF-8 instead of ISO-8859-1. (Rasmus)

 ?? ??? 20??, PHP 5.3.3
 - Upgraded bundled PCRE to version 8.01. (Ilia)

Modified: php/php-src/trunk/ext/standard/html.c
===================================================================
--- php/php-src/trunk/ext/standard/html.c       2010-03-23 18:01:11 UTC (rev 
296684)
+++ php/php-src/trunk/ext/standard/html.c       2010-03-23 18:08:06 UTC (rev 
296685)
@@ -711,17 +711,17 @@

 /* {{{ entity_charset determine_charset
  * returns the charset identifier based on current locale or a hint.
- * defaults to iso-8859-1 */
+ * defaults to UTF-8 */
 static enum entity_charset determine_charset(char *charset_hint TSRMLS_DC)
 {
        int i;
-       enum entity_charset charset = cs_8859_1;
+       enum entity_charset charset = cs_utf_8;
        int len = 0;
        zval *uf_result = NULL;

-       /* Guarantee default behaviour for backwards compatibility */
+       /* Default is now UTF-8 */
        if (charset_hint == NULL)
-               return cs_8859_1;
+               return cs_utf_8;

        if ((len = strlen(charset_hint)) != 0) {
                goto det_charset;
@@ -855,7 +855,7 @@
                        }
                }
                if (!found) {
-                       php_error_docref(NULL TSRMLS_CC, E_WARNING, "charset 
`%s' not supported, assuming iso-8859-1",
+                       php_error_docref(NULL TSRMLS_CC, E_WARNING, "charset 
`%s' not supported, assuming utf-8",
                                        charset_hint);
                }
        }

Modified: php/php-src/trunk/ext/standard/tests/strings/bug44703.phpt
===================================================================
--- php/php-src/trunk/ext/standard/tests/strings/bug44703.phpt  2010-03-23 
18:01:11 UTC (rev 296684)
+++ php/php-src/trunk/ext/standard/tests/strings/bug44703.phpt  2010-03-23 
18:08:06 UTC (rev 296685)
@@ -22,25 +22,25 @@

 ?>
 --EXPECTF--
-Warning: htmlspecialchars(): charset `1' not supported, assuming iso-8859-1 in 
%s on line %d
+Warning: htmlspecialchars(): charset `1' not supported, assuming utf-8 in %s 
on line %d
 string(35) "<a href='test'>Test</a>"

-Warning: htmlspecialchars(): charset `12' not supported, assuming iso-8859-1 
in %s on line %d
+Warning: htmlspecialchars(): charset `12' not supported, assuming utf-8 in %s 
on line %d
 string(35) "<a href='test'>Test</a>"

-Warning: htmlspecialchars(): charset `125' not supported, assuming iso-8859-1 
in %s on line %d
+Warning: htmlspecialchars(): charset `125' not supported, assuming utf-8 in %s 
on line %d
 string(35) "<a href='test'>Test</a>"
 string(35) "<a href='test'>Test</a>"

-Warning: htmlspecialchars(): charset `12526' not supported, assuming 
iso-8859-1 in %s on line %d
+Warning: htmlspecialchars(): charset `12526' not supported, assuming utf-8 in 
%s on line %d
 string(35) "<a href='test'>Test</a>"
 string(8) "<>"

-Warning: htmlspecialchars(): charset `8666' not supported, assuming iso-8859-1 
in %s on line %d
+Warning: htmlspecialchars(): charset `8666' not supported, assuming utf-8 in 
%s on line %d
 string(8) "<>"
 string(8) "<>"
 string(8) "<>"
 string(8) "<>"

-Warning: htmlspecialchars(): charset 
`aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'
 not supported, assuming iso-8859-1 in %s on line %d
+Warning: htmlspecialchars(): charset 
`aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'
 not supported, assuming utf-8 in %s on line %d
 string(8) "<>"

Modified: 
php/php-src/trunk/ext/standard/tests/strings/get_html_translation_table_basic1.phpt
===================================================================
(Binary files differ)


Property changes on: 
php/php-src/trunk/ext/standard/tests/strings/get_html_translation_table_basic1.phpt
___________________________________________________________________
Deleted: svn:eol-style
   - native
Added: mime-type
   + application/octet-stream
Added: svn:mime-type
   + application/octet-stream

Modified: 
php/php-src/trunk/ext/standard/tests/strings/get_html_translation_table_basic2.phpt
===================================================================
(Binary files differ)


Property changes on: 
php/php-src/trunk/ext/standard/tests/strings/get_html_translation_table_basic2.phpt
___________________________________________________________________
Deleted: svn:eol-style
   - native
Added: svn:mime-type
   + application/octet-stream

Modified: 
php/php-src/trunk/ext/standard/tests/strings/get_html_translation_table_variation1.phpt
===================================================================
(Binary files differ)


Property changes on: 
php/php-src/trunk/ext/standard/tests/strings/get_html_translation_table_variation1.phpt
___________________________________________________________________
Deleted: svn:eol-style
   - native
Added: svn:mime-type
   + application/octet-stream

Modified: php/php-src/trunk/ext/standard/tests/strings/htmlentities.phpt
===================================================================
--- php/php-src/trunk/ext/standard/tests/strings/htmlentities.phpt      
2010-03-23 18:01:11 UTC (rev 296684)
+++ php/php-src/trunk/ext/standard/tests/strings/htmlentities.phpt      
2010-03-23 18:08:06 UTC (rev 296685)
@@ -1,16 +1,16 @@
 --TEST--
-HTML entities
+HTML entities of ISO-8859 chars
 --INI--
 output_handler=
 --FILE--
 <?php
 setlocale (LC_CTYPE, "C");
-$sc_encoded = htmlspecialchars ("<>\"&��\n");
+$sc_encoded = htmlspecialchars ("<>\"&��\n",ENT_COMPAT,"ISO-8859-1");
 echo $sc_encoded;
-$ent_encoded = htmlentities ("<>\"&��\n");
+$ent_encoded = htmlentities ("<>\"&��\n",ENT_COMPAT,"ISO-8859-1");
 echo $ent_encoded;
-echo html_entity_decode($sc_encoded);
-echo html_entity_decode($ent_encoded);
+echo html_entity_decode($sc_encoded,ENT_COMPAT,"ISO-8859-1");
+echo html_entity_decode($ent_encoded,ENT_COMPAT,"ISO-8859-1");
 ?>
 --EXPECT--
 &lt;&gt;&quot;&amp;��

Modified: php/php-src/trunk/php.ini-development
===================================================================
--- php/php-src/trunk/php.ini-development       2010-03-23 18:01:11 UTC (rev 
296684)
+++ php/php-src/trunk/php.ini-development       2010-03-23 18:08:06 UTC (rev 
296685)
@@ -769,7 +769,7 @@

 ; PHP's default character set is set to empty.
 ; http://php.net/default-charset
-;default_charset = "iso-8859-1"
+;default_charset = "UTF-8"

 ; Always populate the $HTTP_RAW_POST_DATA variable. PHP's default behavior is
 ; to disable this feature.

Modified: php/php-src/trunk/php.ini-production
===================================================================
--- php/php-src/trunk/php.ini-production        2010-03-23 18:01:11 UTC (rev 
296684)
+++ php/php-src/trunk/php.ini-production        2010-03-23 18:08:06 UTC (rev 
296685)
@@ -769,7 +769,7 @@

 ; PHP's default character set is set to empty.
 ; http://php.net/default-charset
-;default_charset = "iso-8859-1"
+;default_charset = "UTF-8"

 ; Always populate the $HTTP_RAW_POST_DATA variable. PHP's default behavior is
 ; to disable this feature.

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to