lbarnaud Wed Nov 26 03:00:06 2008 UTC Added files: (Branch: PHP_5_3) /php-src/ext/standard/tests/strings htmlentities-utf-2.phpt
Modified files: /php-src/ext/standard/tests/strings htmlentities-utf.phpt /php-src/ext/standard html.c html.h Log: MFH: Added ENT_IGNORE as a compatibility flag for htmlentities() and htmlspecialchars() to skip multibyte sequences intead of returning an empty string (as iconv's //IGNORE). These functions will still never return an invalid or incomplete multibyte sequence. Fixes #43896
http://cvs.php.net/viewvc.cgi/php-src/ext/standard/tests/strings/htmlentities-utf.phpt?r1=1.1.4.2&r2=1.1.4.3&diff_format=u Index: php-src/ext/standard/tests/strings/htmlentities-utf.phpt diff -u php-src/ext/standard/tests/strings/htmlentities-utf.phpt:1.1.4.2 php-src/ext/standard/tests/strings/htmlentities-utf.phpt:1.1.4.3 --- php-src/ext/standard/tests/strings/htmlentities-utf.phpt:1.1.4.2 Wed Oct 3 05:05:37 2007 +++ php-src/ext/standard/tests/strings/htmlentities-utf.phpt Wed Nov 26 03:00:06 2008 @@ -4,8 +4,12 @@ output_handler= --FILE-- <?php -setlocale (LC_CTYPE, "C"); -$strings = array("<", "\xD0", "\xD0\x90", "\xD0\x90\xD0", "\xD0\x90\xD0\xB0", "\xE0", "A\xE0", "\xE0\x80", "\xE0\x80\xBE"); [EMAIL PROTECTED] (LC_CTYPE, "C"); +$strings = array(b"<", b"\xD0", b"\xD0\x90", b"\xD0\x90\xD0", b"\xD0\x90\xD0\xB0", b"\xE0", b"A\xE0", b"\xE0\x80", b"\xE0\x79", b"\xE0\x80\xBE", + b"Voil\xE0", b"Clich\xE9s", + b"\xFE", b"\xFE\x41", b"\xC3\xA9", b"\xC3\x79", b"\xF7\xBF\xBF\xBF", b"\xFB\xBF\xBF\xBF\xBF", b"\xFD\xBF\xBF\xBF\xBF\xBF", + b"\x41\xF7\xF7\x42", b"\x42\xFB\xFB\x42", b"\x43\xFD\xFD\x42", b"\x44\xF7\xF7", b"\x45\xFB\xFB", b"\x46\xFD\xFD" + ); foreach($strings as $string) { $sc_encoded = htmlspecialchars ($string, ENT_QUOTES, "utf-8"); var_dump(bin2hex($sc_encoded)); @@ -13,22 +17,54 @@ var_dump(bin2hex($ent_encoded)); } ?> ---EXPECT-- -string(8) "266c743b" -string(8) "266c743b" -string(0) "" -string(0) "" -string(4) "d090" -string(4) "d090" -string(0) "" -string(0) "" -string(8) "d090d0b0" -string(8) "d090d0b0" -string(0) "" -string(0) "" -string(0) "" -string(0) "" -string(0) "" -string(0) "" -string(8) "2667743b" -string(8) "2667743b" \ No newline at end of file +--EXPECTF-- +%unicode|string%(8) "266c743b" +%unicode|string%(8) "266c743b" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(4) "d090" +%unicode|string%(4) "d090" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(8) "d090d0b0" +%unicode|string%(8) "d090d0b0" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(8) "2667743b" +%unicode|string%(8) "2667743b" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(4) "c3a9" +%unicode|string%(16) "266561637574653b" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(8) "f7bfbfbf" +%unicode|string%(8) "f7bfbfbf" +%unicode|string%(10) "fbbfbfbfbf" +%unicode|string%(10) "fbbfbfbfbf" +%unicode|string%(12) "fdbfbfbfbfbf" +%unicode|string%(12) "fdbfbfbfbfbf" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" +%unicode|string%(0) "" http://cvs.php.net/viewvc.cgi/php-src/ext/standard/html.c?r1=1.111.2.2.2.14.2.13&r2=1.111.2.2.2.14.2.14&diff_format=u Index: php-src/ext/standard/html.c diff -u php-src/ext/standard/html.c:1.111.2.2.2.14.2.13 php-src/ext/standard/html.c:1.111.2.2.2.14.2.14 --- php-src/ext/standard/html.c:1.111.2.2.2.14.2.13 Wed Nov 26 02:43:16 2008 +++ php-src/ext/standard/html.c Wed Nov 26 03:00:06 2008 @@ -18,7 +18,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: html.c,v 1.111.2.2.2.14.2.13 2008/11/26 02:43:16 lbarnaud Exp $ */ +/* $Id: html.c,v 1.111.2.2.2.14.2.14 2008/11/26 03:00:06 lbarnaud Exp $ */ /* * HTML entity resources: @@ -485,6 +485,7 @@ #define CHECK_LEN(pos, chars_need) \ if((str_len - (pos)) < chars_need) { \ + *newpos = pos; \ *status = FAILURE; \ return 0; \ } @@ -529,6 +530,7 @@ more = 0; if(stat) { /* we didn't finish the UTF sequence correctly */ + --pos; *status = FAILURE; } break; @@ -1135,6 +1137,9 @@ if(status == FAILURE) { /* invalid MB sequence */ + if (quote_style & ENT_HTML_IGNORE_ERRORS) { + continue; + } efree(replaced); if(!PG(display_errors)) { php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid multibyte sequence in argument"); @@ -1293,6 +1298,7 @@ REGISTER_LONG_CONSTANT("ENT_COMPAT", ENT_COMPAT, CONST_PERSISTENT|CONST_CS); REGISTER_LONG_CONSTANT("ENT_QUOTES", ENT_QUOTES, CONST_PERSISTENT|CONST_CS); REGISTER_LONG_CONSTANT("ENT_NOQUOTES", ENT_NOQUOTES, CONST_PERSISTENT|CONST_CS); + REGISTER_LONG_CONSTANT("ENT_IGNORE", ENT_IGNORE, CONST_PERSISTENT|CONST_CS); } /* }}} */ http://cvs.php.net/viewvc.cgi/php-src/ext/standard/html.h?r1=1.20.2.1.2.3.2.1&r2=1.20.2.1.2.3.2.2&diff_format=u Index: php-src/ext/standard/html.h diff -u php-src/ext/standard/html.h:1.20.2.1.2.3.2.1 php-src/ext/standard/html.h:1.20.2.1.2.3.2.2 --- php-src/ext/standard/html.h:1.20.2.1.2.3.2.1 Mon Dec 31 07:17:15 2007 +++ php-src/ext/standard/html.h Wed Nov 26 03:00:06 2008 @@ -16,7 +16,7 @@ +----------------------------------------------------------------------+ */ -/* $Id: html.h,v 1.20.2.1.2.3.2.1 2007/12/31 07:17:15 sebastian Exp $ */ +/* $Id: html.h,v 1.20.2.1.2.3.2.2 2008/11/26 03:00:06 lbarnaud Exp $ */ #ifndef HTML_H #define HTML_H @@ -24,10 +24,12 @@ #define ENT_HTML_QUOTE_NONE 0 #define ENT_HTML_QUOTE_SINGLE 1 #define ENT_HTML_QUOTE_DOUBLE 2 +#define ENT_HTML_IGNORE_ERRORS 4 #define ENT_COMPAT ENT_HTML_QUOTE_DOUBLE #define ENT_QUOTES (ENT_HTML_QUOTE_DOUBLE | ENT_HTML_QUOTE_SINGLE) #define ENT_NOQUOTES ENT_HTML_QUOTE_NONE +#define ENT_IGNORE ENT_HTML_IGNORE_ERRORS void register_html_constants(INIT_FUNC_ARGS); http://cvs.php.net/viewvc.cgi/php-src/ext/standard/tests/strings/htmlentities-utf-2.phpt?view=markup&rev=1.1 Index: php-src/ext/standard/tests/strings/htmlentities-utf-2.phpt +++ php-src/ext/standard/tests/strings/htmlentities-utf-2.phpt --TEST-- HTML entities with invalid chars and ENT_IGNORE --INI-- output_handler= --FILE-- <?php @setlocale (LC_CTYPE, "C"); $strings = array(b"<", b"\xD0", b"\xD0\x90", b"\xD0\x90\xD0", b"\xD0\x90\xD0\xB0", b"\xE0", b"A\xE0", b"\xE0\x80", b"\xE0\x79", b"\xE0\x80\xBE", b"Voil\xE0", b"Clich\xE9s", b"\xFE", b"\xFE\x41", b"\xC3\xA9", b"\xC3\x79", b"\xF7\xBF\xBF\xBF", b"\xFB\xBF\xBF\xBF\xBF", b"\xFD\xBF\xBF\xBF\xBF\xBF", b"\x41\xF7\xF7\x42", b"\x42\xFB\xFB\x42", b"\x43\xFD\xFD\x42", b"\x44\xF7\xF7", b"\x45\xFB\xFB", b"\x46\xFD\xFD" ); foreach($strings as $string) { $sc_encoded = htmlspecialchars ($string, ENT_QUOTES | ENT_IGNORE, "utf-8"); var_dump(bin2hex($sc_encoded)); $ent_encoded = htmlentities ($string, ENT_QUOTES | ENT_IGNORE, "utf-8"); var_dump(bin2hex($ent_encoded)); } ?> --EXPECTF-- %unicode|string%(8) "266c743b" %unicode|string%(8) "266c743b" %unicode|string%(0) "" %unicode|string%(0) "" %unicode|string%(4) "d090" %unicode|string%(4) "d090" %unicode|string%(4) "d090" %unicode|string%(4) "d090" %unicode|string%(8) "d090d0b0" %unicode|string%(8) "d090d0b0" %unicode|string%(0) "" %unicode|string%(0) "" %unicode|string%(2) "41" %unicode|string%(2) "41" %unicode|string%(0) "" %unicode|string%(0) "" %unicode|string%(2) "79" %unicode|string%(2) "79" %unicode|string%(8) "2667743b" %unicode|string%(8) "2667743b" %unicode|string%(8) "566f696c" %unicode|string%(8) "566f696c" %unicode|string%(12) "436c69636873" %unicode|string%(12) "436c69636873" %unicode|string%(0) "" %unicode|string%(0) "" %unicode|string%(2) "41" %unicode|string%(2) "41" %unicode|string%(4) "c3a9" %unicode|string%(16) "266561637574653b" %unicode|string%(2) "79" %unicode|string%(2) "79" %unicode|string%(8) "f7bfbfbf" %unicode|string%(8) "f7bfbfbf" %unicode|string%(10) "fbbfbfbfbf" %unicode|string%(10) "fbbfbfbfbf" %unicode|string%(12) "fdbfbfbfbfbf" %unicode|string%(12) "fdbfbfbfbfbf" %unicode|string%(4) "4142" %unicode|string%(4) "4142" %unicode|string%(4) "4242" %unicode|string%(4) "4242" %unicode|string%(4) "4342" %unicode|string%(4) "4342" %unicode|string%(2) "44" %unicode|string%(2) "44" %unicode|string%(2) "45" %unicode|string%(2) "45" %unicode|string%(2) "46" %unicode|string%(2) "46"
-- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php