lbarnaud Wed Nov 26 02:57:32 2008 UTC
Added files:
/php-src/ext/standard/tests/strings htmlentities-utf-2.phpt
Modified files:
/php-src/ext/standard/tests/strings htmlentities-utf.phpt
/php-src/ext/standard html.c html.h
Log:
[DOC] Added ENT_IGNORE as a compatibility flag for htmlentities() and
htmlspecialchars() to skip multibyte sequences intead of returning an
empty string (as iconv's //IGNORE). These functions will still never
return an invalid or incomplete multibyte sequence.
Example: htmlspecialchars("...", ENT_QUOTES | ENT_COMPAT, "utf-8");
http://cvs.php.net/viewvc.cgi/php-src/ext/standard/tests/strings/htmlentities-utf.phpt?r1=1.4&r2=1.5&diff_format=u
Index: php-src/ext/standard/tests/strings/htmlentities-utf.phpt
diff -u php-src/ext/standard/tests/strings/htmlentities-utf.phpt:1.4
php-src/ext/standard/tests/strings/htmlentities-utf.phpt:1.5
--- php-src/ext/standard/tests/strings/htmlentities-utf.phpt:1.4 Fri Jun
13 00:18:57 2008
+++ php-src/ext/standard/tests/strings/htmlentities-utf.phpt Wed Nov 26
02:57:31 2008
@@ -4,8 +4,12 @@
output_handler=
--FILE--
<?php
-setlocale (LC_CTYPE, "C");
-$strings = array("<", "\xD0", "\xD0\x90", "\xD0\x90\xD0", "\xD0\x90\xD0\xB0",
"\xE0", "A\xE0", "\xE0\x80", "\xE0\x80\xBE");
[EMAIL PROTECTED] (LC_CTYPE, "C");
+$strings = array(b"<", b"\xD0", b"\xD0\x90", b"\xD0\x90\xD0",
b"\xD0\x90\xD0\xB0", b"\xE0", b"A\xE0", b"\xE0\x80", b"\xE0\x79",
b"\xE0\x80\xBE",
+ b"Voil\xE0", b"Clich\xE9s",
+ b"\xFE", b"\xFE\x41", b"\xC3\xA9", b"\xC3\x79", b"\xF7\xBF\xBF\xBF",
b"\xFB\xBF\xBF\xBF\xBF", b"\xFD\xBF\xBF\xBF\xBF\xBF",
+ b"\x41\xF7\xF7\x42", b"\x42\xFB\xFB\x42", b"\x43\xFD\xFD\x42",
b"\x44\xF7\xF7", b"\x45\xFB\xFB", b"\x46\xFD\xFD"
+ );
foreach($strings as $string) {
$sc_encoded = htmlspecialchars ($string, ENT_QUOTES, "utf-8");
var_dump(bin2hex($sc_encoded));
@@ -13,22 +17,54 @@
var_dump(bin2hex($ent_encoded));
}
?>
---EXPECT--
-unicode(8) "266c743b"
-unicode(8) "266c743b"
-unicode(0) ""
-unicode(0) ""
-unicode(4) "d090"
-unicode(4) "d090"
-unicode(0) ""
-unicode(0) ""
-unicode(8) "d090d0b0"
-unicode(8) "d090d0b0"
-unicode(0) ""
-unicode(0) ""
-unicode(0) ""
-unicode(0) ""
-unicode(0) ""
-unicode(0) ""
-unicode(8) "2667743b"
-unicode(8) "2667743b"
+--EXPECTF--
+%unicode|string%(8) "266c743b"
+%unicode|string%(8) "266c743b"
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(4) "d090"
+%unicode|string%(4) "d090"
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(8) "d090d0b0"
+%unicode|string%(8) "d090d0b0"
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(8) "2667743b"
+%unicode|string%(8) "2667743b"
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(4) "c3a9"
+%unicode|string%(16) "266561637574653b"
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(8) "f7bfbfbf"
+%unicode|string%(8) "f7bfbfbf"
+%unicode|string%(10) "fbbfbfbfbf"
+%unicode|string%(10) "fbbfbfbfbf"
+%unicode|string%(12) "fdbfbfbfbfbf"
+%unicode|string%(12) "fdbfbfbfbfbf"
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
+%unicode|string%(0) ""
http://cvs.php.net/viewvc.cgi/php-src/ext/standard/html.c?r1=1.137&r2=1.138&diff_format=u
Index: php-src/ext/standard/html.c
diff -u php-src/ext/standard/html.c:1.137 php-src/ext/standard/html.c:1.138
--- php-src/ext/standard/html.c:1.137 Wed Nov 26 02:43:43 2008
+++ php-src/ext/standard/html.c Wed Nov 26 02:57:32 2008
@@ -18,7 +18,7 @@
+----------------------------------------------------------------------+
*/
-/* $Id: html.c,v 1.137 2008/11/26 02:43:43 lbarnaud Exp $ */
+/* $Id: html.c,v 1.138 2008/11/26 02:57:32 lbarnaud Exp $ */
/*
* HTML entity resources:
@@ -491,6 +491,7 @@
#define CHECK_LEN(pos, chars_need) \
if((str_len - (pos)) < chars_need) { \
+ *newpos = pos; \
*status = FAILURE; \
return 0;
\
}
@@ -535,6 +536,7 @@
more = 0;
if(stat) {
/* we didn't finish the
UTF sequence correctly */
+ --pos;
*status = FAILURE;
}
break;
@@ -1138,6 +1140,9 @@
if(status == FAILURE) {
/* invalid MB sequence */
+ if (quote_style & ENT_HTML_IGNORE_ERRORS) {
+ continue;
+ }
efree(replaced);
if(!PG(display_errors)) {
php_error_docref(NULL TSRMLS_CC, E_WARNING,
"Invalid multibyte sequence in argument");
@@ -1319,6 +1324,7 @@
REGISTER_LONG_CONSTANT("ENT_COMPAT", ENT_COMPAT,
CONST_PERSISTENT|CONST_CS);
REGISTER_LONG_CONSTANT("ENT_QUOTES", ENT_QUOTES,
CONST_PERSISTENT|CONST_CS);
REGISTER_LONG_CONSTANT("ENT_NOQUOTES", ENT_NOQUOTES,
CONST_PERSISTENT|CONST_CS);
+ REGISTER_LONG_CONSTANT("ENT_IGNORE", ENT_IGNORE,
CONST_PERSISTENT|CONST_CS);
}
/* }}} */
http://cvs.php.net/viewvc.cgi/php-src/ext/standard/html.h?r1=1.25&r2=1.26&diff_format=u
Index: php-src/ext/standard/html.h
diff -u php-src/ext/standard/html.h:1.25 php-src/ext/standard/html.h:1.26
--- php-src/ext/standard/html.h:1.25 Mon Dec 31 07:12:15 2007
+++ php-src/ext/standard/html.h Wed Nov 26 02:57:32 2008
@@ -16,7 +16,7 @@
+----------------------------------------------------------------------+
*/
-/* $Id: html.h,v 1.25 2007/12/31 07:12:15 sebastian Exp $ */
+/* $Id: html.h,v 1.26 2008/11/26 02:57:32 lbarnaud Exp $ */
#ifndef HTML_H
#define HTML_H
@@ -24,10 +24,12 @@
#define ENT_HTML_QUOTE_NONE 0
#define ENT_HTML_QUOTE_SINGLE 1
#define ENT_HTML_QUOTE_DOUBLE 2
+#define ENT_HTML_IGNORE_ERRORS 4
#define ENT_COMPAT ENT_HTML_QUOTE_DOUBLE
#define ENT_QUOTES (ENT_HTML_QUOTE_DOUBLE | ENT_HTML_QUOTE_SINGLE)
#define ENT_NOQUOTES ENT_HTML_QUOTE_NONE
+#define ENT_IGNORE ENT_HTML_IGNORE_ERRORS
void register_html_constants(INIT_FUNC_ARGS);
http://cvs.php.net/viewvc.cgi/php-src/ext/standard/tests/strings/htmlentities-utf-2.phpt?view=markup&rev=1.1
Index: php-src/ext/standard/tests/strings/htmlentities-utf-2.phpt
+++ php-src/ext/standard/tests/strings/htmlentities-utf-2.phpt
--TEST--
HTML entities with invalid chars and ENT_IGNORE
--INI--
output_handler=
--FILE--
<?php
@setlocale (LC_CTYPE, "C");
$strings = array(b"<", b"\xD0", b"\xD0\x90", b"\xD0\x90\xD0",
b"\xD0\x90\xD0\xB0", b"\xE0", b"A\xE0", b"\xE0\x80", b"\xE0\x79",
b"\xE0\x80\xBE",
b"Voil\xE0", b"Clich\xE9s",
b"\xFE", b"\xFE\x41", b"\xC3\xA9", b"\xC3\x79", b"\xF7\xBF\xBF\xBF",
b"\xFB\xBF\xBF\xBF\xBF", b"\xFD\xBF\xBF\xBF\xBF\xBF",
b"\x41\xF7\xF7\x42", b"\x42\xFB\xFB\x42", b"\x43\xFD\xFD\x42",
b"\x44\xF7\xF7", b"\x45\xFB\xFB", b"\x46\xFD\xFD"
);
foreach($strings as $string) {
$sc_encoded = htmlspecialchars ($string, ENT_QUOTES | ENT_IGNORE,
"utf-8");
var_dump(bin2hex($sc_encoded));
$ent_encoded = htmlentities ($string, ENT_QUOTES | ENT_IGNORE, "utf-8");
var_dump(bin2hex($ent_encoded));
}
?>
--EXPECTF--
%unicode|string%(8) "266c743b"
%unicode|string%(8) "266c743b"
%unicode|string%(0) ""
%unicode|string%(0) ""
%unicode|string%(4) "d090"
%unicode|string%(4) "d090"
%unicode|string%(4) "d090"
%unicode|string%(4) "d090"
%unicode|string%(8) "d090d0b0"
%unicode|string%(8) "d090d0b0"
%unicode|string%(0) ""
%unicode|string%(0) ""
%unicode|string%(2) "41"
%unicode|string%(2) "41"
%unicode|string%(0) ""
%unicode|string%(0) ""
%unicode|string%(2) "79"
%unicode|string%(2) "79"
%unicode|string%(8) "2667743b"
%unicode|string%(8) "2667743b"
%unicode|string%(8) "566f696c"
%unicode|string%(8) "566f696c"
%unicode|string%(12) "436c69636873"
%unicode|string%(12) "436c69636873"
%unicode|string%(0) ""
%unicode|string%(0) ""
%unicode|string%(2) "41"
%unicode|string%(2) "41"
%unicode|string%(4) "c3a9"
%unicode|string%(16) "266561637574653b"
%unicode|string%(2) "79"
%unicode|string%(2) "79"
%unicode|string%(8) "f7bfbfbf"
%unicode|string%(8) "f7bfbfbf"
%unicode|string%(10) "fbbfbfbfbf"
%unicode|string%(10) "fbbfbfbfbf"
%unicode|string%(12) "fdbfbfbfbfbf"
%unicode|string%(12) "fdbfbfbfbfbf"
%unicode|string%(4) "4142"
%unicode|string%(4) "4142"
%unicode|string%(4) "4242"
%unicode|string%(4) "4242"
%unicode|string%(4) "4342"
%unicode|string%(4) "4342"
%unicode|string%(2) "44"
%unicode|string%(2) "44"
%unicode|string%(2) "45"
%unicode|string%(2) "45"
%unicode|string%(2) "46"
%unicode|string%(2) "46"