moriyoshi                                Tue, 13 Oct 2009 05:18:37 +0000

Revision: http://svn.php.net/viewvc?view=revision&revision=289605

Log:
- Bug #49785: take 5. What the hell happened to me...

Bug: http://bugs.php.net/49785 (Closed) htmlspecialchars() should check byte 
sequence more strictly
      
Changed paths:
    U   php/php-src/branches/PHP_5_2/ext/standard/html.c
    U   php/php-src/branches/PHP_5_2/ext/standard/tests/strings/bug49785.phpt
    U   php/php-src/branches/PHP_5_3/ext/standard/html.c
    U   php/php-src/branches/PHP_5_3/ext/standard/tests/strings/bug49785.phpt
    U   php/php-src/trunk/ext/standard/html.c
    U   php/php-src/trunk/ext/standard/tests/strings/bug49785.phpt

Modified: php/php-src/branches/PHP_5_2/ext/standard/html.c
===================================================================
--- php/php-src/branches/PHP_5_2/ext/standard/html.c	2009-10-13 04:32:39 UTC (rev 289604)
+++ php/php-src/branches/PHP_5_2/ext/standard/html.c	2009-10-13 05:18:37 UTC (rev 289605)
@@ -539,19 +539,18 @@
 				c = str[pos];
 				if (c < 0x80) {
 					MB_WRITE(c);
-                    this_char = c;
+					this_char = c;
 					pos++;
 				} else if (c < 0xc0) {
 					MB_FAILURE(pos);
 				} else if (c < 0xe0) {
 					CHECK_LEN(pos, 2);
 					if (str[pos + 1] < 0x80 || str[pos + 1] > 0xbf) {
-                        MB_FAILURE(pos);
+						MB_FAILURE(pos);
 					}
 					this_char = ((c & 0x1f) << 6) | (str[pos + 1] & 0x3f);
 					if (this_char < 0x80) {
-						*status = FAILURE;
-						return 0;
+						MB_FAILURE(pos);
 					}
 					MB_WRITE((unsigned char)c);
 					MB_WRITE((unsigned char)str[pos + 1]);
@@ -559,14 +558,14 @@
 				} else if (c < 0xf0) {
 					CHECK_LEN(pos, 3);
 					if (str[pos + 1] < 0x80 || str[pos + 1] > 0xbf) {
-                        MB_FAILURE(pos);
+						MB_FAILURE(pos);
 					}
 					if (str[pos + 2] < 0x80 || str[pos + 2] > 0xbf) {
-                        MB_FAILURE(pos);
+						MB_FAILURE(pos);
 					}
 					this_char = ((c & 0x0f) << 12) | ((str[pos + 1] & 0x3f) << 6) | (str[pos + 2] & 0x3f);
 					if (this_char < 0x800) {
-                        MB_FAILURE(pos);
+						MB_FAILURE(pos);
 					}
 					MB_WRITE((unsigned char)c);
 					MB_WRITE((unsigned char)str[pos + 1]);
@@ -575,17 +574,17 @@
 				} else if (c < 0xf8) {
 					CHECK_LEN(pos, 4);
 					if (str[pos + 1] < 0x80 || str[pos + 1] > 0xbf) {
-                        MB_FAILURE(pos);
+						MB_FAILURE(pos);
 					}
 					if (str[pos + 2] < 0x80 || str[pos + 2] > 0xbf) {
-                        MB_FAILURE(pos);
+						MB_FAILURE(pos);
 					}
 					if (str[pos + 3] < 0x80 || str[pos + 3] > 0xbf) {
-                        MB_FAILURE(pos);
+						MB_FAILURE(pos);
 					}
 					this_char = ((c & 0x07) << 18) | ((str[pos + 1] & 0x3f) << 12) | ((str[pos + 2] & 0x3f) << 6) | (str[pos + 3] & 0x3f);
 					if (this_char < 0x10000) {
-                        MB_FAILURE(pos);
+						MB_FAILURE(pos);
 					}
 					MB_WRITE((unsigned char)c);
 					MB_WRITE((unsigned char)str[pos + 1]);
@@ -593,7 +592,7 @@
 					MB_WRITE((unsigned char)str[pos + 3]);
 					pos += 4;
 				} else {
-                    MB_FAILURE(pos);
+					MB_FAILURE(pos);
 				}
 			}
 			break;

Modified: php/php-src/branches/PHP_5_2/ext/standard/tests/strings/bug49785.phpt
===================================================================
--- php/php-src/branches/PHP_5_2/ext/standard/tests/strings/bug49785.phpt	2009-10-13 04:32:39 UTC (rev 289604)
+++ php/php-src/branches/PHP_5_2/ext/standard/tests/strings/bug49785.phpt	2009-10-13 05:18:37 UTC (rev 289605)
@@ -36,6 +36,12 @@
 var_dump(_bin2hex(htmlentities("\xf8\x88\x80\x80\x80", ENT_QUOTES, "UTF-8")));

 echo "--\n";
+// UTF-8: with ENT_IGNORE
+var_dump(_bin2hex(htmlentities("\xc0\xa0\xc2\x80", ENT_QUOTES | ENT_IGNORE, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xe0\x80\x80\xe0\xa0\x80", ENT_QUOTES | ENT_IGNORE, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xf0\x80\x80\x80\xf0\x90\x80\x80", ENT_QUOTES | ENT_IGNORE, "UTF-8")));
+
+echo "--\n";
 // UTF-8: alternative (invalid) UTF-8 sequence
 var_dump(_bin2hex(htmlspecialchars("\xc0\xa6", ENT_QUOTES, 'UTF-8')));
 var_dump(_bin2hex(htmlspecialchars("\xe0\x80\xa6", ENT_QUOTES, 'UTF-8')));
@@ -155,6 +161,10 @@
 string(0) ""
 string(0) ""
 --
+string(4) "c280"
+string(6) "e0a080"
+string(8) "f0908080"
+--
 string(0) ""
 string(0) ""
 string(0) ""

Modified: php/php-src/branches/PHP_5_3/ext/standard/html.c
===================================================================
--- php/php-src/branches/PHP_5_3/ext/standard/html.c	2009-10-13 04:32:39 UTC (rev 289604)
+++ php/php-src/branches/PHP_5_3/ext/standard/html.c	2009-10-13 05:18:37 UTC (rev 289605)
@@ -538,19 +538,18 @@
 				c = str[pos];
 				if (c < 0x80) {
 					MB_WRITE(c);
-                    this_char = c;
+					this_char = c;
 					pos++;
 				} else if (c < 0xc0) {
 					MB_FAILURE(pos);
 				} else if (c < 0xe0) {
 					CHECK_LEN(pos, 2);
 					if (str[pos + 1] < 0x80 || str[pos + 1] > 0xbf) {
-                        MB_FAILURE(pos);
+						MB_FAILURE(pos);
 					}
 					this_char = ((c & 0x1f) << 6) | (str[pos + 1] & 0x3f);
 					if (this_char < 0x80) {
-						*status = FAILURE;
-						return 0;
+						MB_FAILURE(pos);
 					}
 					MB_WRITE((unsigned char)c);
 					MB_WRITE((unsigned char)str[pos + 1]);
@@ -558,14 +557,14 @@
 				} else if (c < 0xf0) {
 					CHECK_LEN(pos, 3);
 					if (str[pos + 1] < 0x80 || str[pos + 1] > 0xbf) {
-                        MB_FAILURE(pos);
+						MB_FAILURE(pos);
 					}
 					if (str[pos + 2] < 0x80 || str[pos + 2] > 0xbf) {
-                        MB_FAILURE(pos);
+						MB_FAILURE(pos);
 					}
 					this_char = ((c & 0x0f) << 12) | ((str[pos + 1] & 0x3f) << 6) | (str[pos + 2] & 0x3f);
 					if (this_char < 0x800) {
-                        MB_FAILURE(pos);
+						MB_FAILURE(pos);
 					}
 					MB_WRITE((unsigned char)c);
 					MB_WRITE((unsigned char)str[pos + 1]);
@@ -574,17 +573,17 @@
 				} else if (c < 0xf8) {
 					CHECK_LEN(pos, 4);
 					if (str[pos + 1] < 0x80 || str[pos + 1] > 0xbf) {
-                        MB_FAILURE(pos);
+						MB_FAILURE(pos);
 					}
 					if (str[pos + 2] < 0x80 || str[pos + 2] > 0xbf) {
-                        MB_FAILURE(pos);
+						MB_FAILURE(pos);
 					}
 					if (str[pos + 3] < 0x80 || str[pos + 3] > 0xbf) {
-                        MB_FAILURE(pos);
+						MB_FAILURE(pos);
 					}
 					this_char = ((c & 0x07) << 18) | ((str[pos + 1] & 0x3f) << 12) | ((str[pos + 2] & 0x3f) << 6) | (str[pos + 3] & 0x3f);
 					if (this_char < 0x10000) {
-                        MB_FAILURE(pos);
+						MB_FAILURE(pos);
 					}
 					MB_WRITE((unsigned char)c);
 					MB_WRITE((unsigned char)str[pos + 1]);
@@ -592,7 +591,7 @@
 					MB_WRITE((unsigned char)str[pos + 3]);
 					pos += 4;
 				} else {
-                    MB_FAILURE(pos);
+					MB_FAILURE(pos);
 				}
 			}
 			break;

Modified: php/php-src/branches/PHP_5_3/ext/standard/tests/strings/bug49785.phpt
===================================================================
--- php/php-src/branches/PHP_5_3/ext/standard/tests/strings/bug49785.phpt	2009-10-13 04:32:39 UTC (rev 289604)
+++ php/php-src/branches/PHP_5_3/ext/standard/tests/strings/bug49785.phpt	2009-10-13 05:18:37 UTC (rev 289605)
@@ -36,6 +36,12 @@
 var_dump(_bin2hex(htmlentities("\xf8\x88\x80\x80\x80", ENT_QUOTES, "UTF-8")));

 echo "--\n";
+// UTF-8: with ENT_IGNORE
+var_dump(_bin2hex(htmlentities("\xc0\xa0\xc2\x80", ENT_QUOTES | ENT_IGNORE, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xe0\x80\x80\xe0\xa0\x80", ENT_QUOTES | ENT_IGNORE, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xf0\x80\x80\x80\xf0\x90\x80\x80", ENT_QUOTES | ENT_IGNORE, "UTF-8")));
+
+echo "--\n";
 // UTF-8: alternative (invalid) UTF-8 sequence
 var_dump(_bin2hex(htmlspecialchars("\xc0\xa6", ENT_QUOTES, 'UTF-8')));
 var_dump(_bin2hex(htmlspecialchars("\xe0\x80\xa6", ENT_QUOTES, 'UTF-8')));
@@ -155,6 +161,10 @@
 string(0) ""
 string(0) ""
 --
+string(4) "c280"
+string(6) "e0a080"
+string(8) "f0908080"
+--
 string(0) ""
 string(0) ""
 string(0) ""

Modified: php/php-src/trunk/ext/standard/html.c
===================================================================
--- php/php-src/trunk/ext/standard/html.c	2009-10-13 04:32:39 UTC (rev 289604)
+++ php/php-src/trunk/ext/standard/html.c	2009-10-13 05:18:37 UTC (rev 289605)
@@ -544,19 +544,18 @@
 				c = str[pos];
 				if (c < 0x80) {
 					MB_WRITE(c);
-                    this_char = c;
+					this_char = c;
 					pos++;
 				} else if (c < 0xc0) {
 					MB_FAILURE(pos);
 				} else if (c < 0xe0) {
 					CHECK_LEN(pos, 2);
 					if (str[pos + 1] < 0x80 || str[pos + 1] > 0xbf) {
-                        MB_FAILURE(pos);
+						MB_FAILURE(pos);
 					}
 					this_char = ((c & 0x1f) << 6) | (str[pos + 1] & 0x3f);
 					if (this_char < 0x80) {
-						*status = FAILURE;
-						return 0;
+						MB_FAILURE(pos);
 					}
 					MB_WRITE((unsigned char)c);
 					MB_WRITE((unsigned char)str[pos + 1]);
@@ -564,14 +563,14 @@
 				} else if (c < 0xf0) {
 					CHECK_LEN(pos, 3);
 					if (str[pos + 1] < 0x80 || str[pos + 1] > 0xbf) {
-                        MB_FAILURE(pos);
+						MB_FAILURE(pos);
 					}
 					if (str[pos + 2] < 0x80 || str[pos + 2] > 0xbf) {
-                        MB_FAILURE(pos);
+						MB_FAILURE(pos);
 					}
 					this_char = ((c & 0x0f) << 12) | ((str[pos + 1] & 0x3f) << 6) | (str[pos + 2] & 0x3f);
 					if (this_char < 0x800) {
-                        MB_FAILURE(pos);
+						MB_FAILURE(pos);
 					}
 					MB_WRITE((unsigned char)c);
 					MB_WRITE((unsigned char)str[pos + 1]);
@@ -580,17 +579,17 @@
 				} else if (c < 0xf8) {
 					CHECK_LEN(pos, 4);
 					if (str[pos + 1] < 0x80 || str[pos + 1] > 0xbf) {
-                        MB_FAILURE(pos);
+						MB_FAILURE(pos);
 					}
 					if (str[pos + 2] < 0x80 || str[pos + 2] > 0xbf) {
-                        MB_FAILURE(pos);
+						MB_FAILURE(pos);
 					}
 					if (str[pos + 3] < 0x80 || str[pos + 3] > 0xbf) {
-                        MB_FAILURE(pos);
+						MB_FAILURE(pos);
 					}
 					this_char = ((c & 0x07) << 18) | ((str[pos + 1] & 0x3f) << 12) | ((str[pos + 2] & 0x3f) << 6) | (str[pos + 3] & 0x3f);
 					if (this_char < 0x10000) {
-                        MB_FAILURE(pos);
+						MB_FAILURE(pos);
 					}
 					MB_WRITE((unsigned char)c);
 					MB_WRITE((unsigned char)str[pos + 1]);
@@ -598,7 +597,7 @@
 					MB_WRITE((unsigned char)str[pos + 3]);
 					pos += 4;
 				} else {
-                    MB_FAILURE(pos);
+					MB_FAILURE(pos);
 				}
 			}
 			break;

Modified: php/php-src/trunk/ext/standard/tests/strings/bug49785.phpt
===================================================================
--- php/php-src/trunk/ext/standard/tests/strings/bug49785.phpt	2009-10-13 04:32:39 UTC (rev 289604)
+++ php/php-src/trunk/ext/standard/tests/strings/bug49785.phpt	2009-10-13 05:18:37 UTC (rev 289605)
@@ -36,6 +36,12 @@
 var_dump(_bin2hex(htmlentities("\xf8\x88\x80\x80\x80", ENT_QUOTES, "UTF-8")));

 echo "--\n";
+// UTF-8: with ENT_IGNORE
+var_dump(_bin2hex(htmlentities("\xc0\xa0\xc2\x80", ENT_QUOTES | ENT_IGNORE, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xe0\x80\x80\xe0\xa0\x80", ENT_QUOTES | ENT_IGNORE, "UTF-8")));
+var_dump(_bin2hex(htmlentities("\xf0\x80\x80\x80\xf0\x90\x80\x80", ENT_QUOTES | ENT_IGNORE, "UTF-8")));
+
+echo "--\n";
 // UTF-8: alternative (invalid) UTF-8 sequence
 var_dump(_bin2hex(htmlspecialchars("\xc0\xa6", ENT_QUOTES, 'UTF-8')));
 var_dump(_bin2hex(htmlspecialchars("\xe0\x80\xa6", ENT_QUOTES, 'UTF-8')));
@@ -155,6 +161,10 @@
 string(0) ""
 string(0) ""
 --
+string(4) "c280"
+string(6) "e0a080"
+string(8) "f0908080"
+--
 string(0) ""
 string(0) ""
 string(0) ""
-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to