moriyoshi Tue, 13 Oct 2009 05:18:37 +0000 Revision: http://svn.php.net/viewvc?view=revision&revision=289605
Log: - Bug #49785: take 5. What the hell happened to me... Bug: http://bugs.php.net/49785 (Closed) htmlspecialchars() should check byte sequence more strictly Changed paths: U php/php-src/branches/PHP_5_2/ext/standard/html.c U php/php-src/branches/PHP_5_2/ext/standard/tests/strings/bug49785.phpt U php/php-src/branches/PHP_5_3/ext/standard/html.c U php/php-src/branches/PHP_5_3/ext/standard/tests/strings/bug49785.phpt U php/php-src/trunk/ext/standard/html.c U php/php-src/trunk/ext/standard/tests/strings/bug49785.phpt
Modified: php/php-src/branches/PHP_5_2/ext/standard/html.c =================================================================== --- php/php-src/branches/PHP_5_2/ext/standard/html.c 2009-10-13 04:32:39 UTC (rev 289604) +++ php/php-src/branches/PHP_5_2/ext/standard/html.c 2009-10-13 05:18:37 UTC (rev 289605) @@ -539,19 +539,18 @@ c = str[pos]; if (c < 0x80) { MB_WRITE(c); - this_char = c; + this_char = c; pos++; } else if (c < 0xc0) { MB_FAILURE(pos); } else if (c < 0xe0) { CHECK_LEN(pos, 2); if (str[pos + 1] < 0x80 || str[pos + 1] > 0xbf) { - MB_FAILURE(pos); + MB_FAILURE(pos); } this_char = ((c & 0x1f) << 6) | (str[pos + 1] & 0x3f); if (this_char < 0x80) { - *status = FAILURE; - return 0; + MB_FAILURE(pos); } MB_WRITE((unsigned char)c); MB_WRITE((unsigned char)str[pos + 1]); @@ -559,14 +558,14 @@ } else if (c < 0xf0) { CHECK_LEN(pos, 3); if (str[pos + 1] < 0x80 || str[pos + 1] > 0xbf) { - MB_FAILURE(pos); + MB_FAILURE(pos); } if (str[pos + 2] < 0x80 || str[pos + 2] > 0xbf) { - MB_FAILURE(pos); + MB_FAILURE(pos); } this_char = ((c & 0x0f) << 12) | ((str[pos + 1] & 0x3f) << 6) | (str[pos + 2] & 0x3f); if (this_char < 0x800) { - MB_FAILURE(pos); + MB_FAILURE(pos); } MB_WRITE((unsigned char)c); MB_WRITE((unsigned char)str[pos + 1]); @@ -575,17 +574,17 @@ } else if (c < 0xf8) { CHECK_LEN(pos, 4); if (str[pos + 1] < 0x80 || str[pos + 1] > 0xbf) { - MB_FAILURE(pos); + MB_FAILURE(pos); } if (str[pos + 2] < 0x80 || str[pos + 2] > 0xbf) { - MB_FAILURE(pos); + MB_FAILURE(pos); } if (str[pos + 3] < 0x80 || str[pos + 3] > 0xbf) { - MB_FAILURE(pos); + MB_FAILURE(pos); } this_char = ((c & 0x07) << 18) | ((str[pos + 1] & 0x3f) << 12) | ((str[pos + 2] & 0x3f) << 6) | (str[pos + 3] & 0x3f); if (this_char < 0x10000) { - MB_FAILURE(pos); + MB_FAILURE(pos); } MB_WRITE((unsigned char)c); MB_WRITE((unsigned char)str[pos + 1]); @@ -593,7 +592,7 @@ MB_WRITE((unsigned char)str[pos + 3]); pos += 4; } else { - MB_FAILURE(pos); + MB_FAILURE(pos); } } break; Modified: php/php-src/branches/PHP_5_2/ext/standard/tests/strings/bug49785.phpt =================================================================== --- php/php-src/branches/PHP_5_2/ext/standard/tests/strings/bug49785.phpt 2009-10-13 04:32:39 UTC (rev 289604) +++ php/php-src/branches/PHP_5_2/ext/standard/tests/strings/bug49785.phpt 2009-10-13 05:18:37 UTC (rev 289605) @@ -36,6 +36,12 @@ var_dump(_bin2hex(htmlentities("\xf8\x88\x80\x80\x80", ENT_QUOTES, "UTF-8"))); echo "--\n"; +// UTF-8: with ENT_IGNORE +var_dump(_bin2hex(htmlentities("\xc0\xa0\xc2\x80", ENT_QUOTES | ENT_IGNORE, "UTF-8"))); +var_dump(_bin2hex(htmlentities("\xe0\x80\x80\xe0\xa0\x80", ENT_QUOTES | ENT_IGNORE, "UTF-8"))); +var_dump(_bin2hex(htmlentities("\xf0\x80\x80\x80\xf0\x90\x80\x80", ENT_QUOTES | ENT_IGNORE, "UTF-8"))); + +echo "--\n"; // UTF-8: alternative (invalid) UTF-8 sequence var_dump(_bin2hex(htmlspecialchars("\xc0\xa6", ENT_QUOTES, 'UTF-8'))); var_dump(_bin2hex(htmlspecialchars("\xe0\x80\xa6", ENT_QUOTES, 'UTF-8'))); @@ -155,6 +161,10 @@ string(0) "" string(0) "" -- +string(4) "c280" +string(6) "e0a080" +string(8) "f0908080" +-- string(0) "" string(0) "" string(0) "" Modified: php/php-src/branches/PHP_5_3/ext/standard/html.c =================================================================== --- php/php-src/branches/PHP_5_3/ext/standard/html.c 2009-10-13 04:32:39 UTC (rev 289604) +++ php/php-src/branches/PHP_5_3/ext/standard/html.c 2009-10-13 05:18:37 UTC (rev 289605) @@ -538,19 +538,18 @@ c = str[pos]; if (c < 0x80) { MB_WRITE(c); - this_char = c; + this_char = c; pos++; } else if (c < 0xc0) { MB_FAILURE(pos); } else if (c < 0xe0) { CHECK_LEN(pos, 2); if (str[pos + 1] < 0x80 || str[pos + 1] > 0xbf) { - MB_FAILURE(pos); + MB_FAILURE(pos); } this_char = ((c & 0x1f) << 6) | (str[pos + 1] & 0x3f); if (this_char < 0x80) { - *status = FAILURE; - return 0; + MB_FAILURE(pos); } MB_WRITE((unsigned char)c); MB_WRITE((unsigned char)str[pos + 1]); @@ -558,14 +557,14 @@ } else if (c < 0xf0) { CHECK_LEN(pos, 3); if (str[pos + 1] < 0x80 || str[pos + 1] > 0xbf) { - MB_FAILURE(pos); + MB_FAILURE(pos); } if (str[pos + 2] < 0x80 || str[pos + 2] > 0xbf) { - MB_FAILURE(pos); + MB_FAILURE(pos); } this_char = ((c & 0x0f) << 12) | ((str[pos + 1] & 0x3f) << 6) | (str[pos + 2] & 0x3f); if (this_char < 0x800) { - MB_FAILURE(pos); + MB_FAILURE(pos); } MB_WRITE((unsigned char)c); MB_WRITE((unsigned char)str[pos + 1]); @@ -574,17 +573,17 @@ } else if (c < 0xf8) { CHECK_LEN(pos, 4); if (str[pos + 1] < 0x80 || str[pos + 1] > 0xbf) { - MB_FAILURE(pos); + MB_FAILURE(pos); } if (str[pos + 2] < 0x80 || str[pos + 2] > 0xbf) { - MB_FAILURE(pos); + MB_FAILURE(pos); } if (str[pos + 3] < 0x80 || str[pos + 3] > 0xbf) { - MB_FAILURE(pos); + MB_FAILURE(pos); } this_char = ((c & 0x07) << 18) | ((str[pos + 1] & 0x3f) << 12) | ((str[pos + 2] & 0x3f) << 6) | (str[pos + 3] & 0x3f); if (this_char < 0x10000) { - MB_FAILURE(pos); + MB_FAILURE(pos); } MB_WRITE((unsigned char)c); MB_WRITE((unsigned char)str[pos + 1]); @@ -592,7 +591,7 @@ MB_WRITE((unsigned char)str[pos + 3]); pos += 4; } else { - MB_FAILURE(pos); + MB_FAILURE(pos); } } break; Modified: php/php-src/branches/PHP_5_3/ext/standard/tests/strings/bug49785.phpt =================================================================== --- php/php-src/branches/PHP_5_3/ext/standard/tests/strings/bug49785.phpt 2009-10-13 04:32:39 UTC (rev 289604) +++ php/php-src/branches/PHP_5_3/ext/standard/tests/strings/bug49785.phpt 2009-10-13 05:18:37 UTC (rev 289605) @@ -36,6 +36,12 @@ var_dump(_bin2hex(htmlentities("\xf8\x88\x80\x80\x80", ENT_QUOTES, "UTF-8"))); echo "--\n"; +// UTF-8: with ENT_IGNORE +var_dump(_bin2hex(htmlentities("\xc0\xa0\xc2\x80", ENT_QUOTES | ENT_IGNORE, "UTF-8"))); +var_dump(_bin2hex(htmlentities("\xe0\x80\x80\xe0\xa0\x80", ENT_QUOTES | ENT_IGNORE, "UTF-8"))); +var_dump(_bin2hex(htmlentities("\xf0\x80\x80\x80\xf0\x90\x80\x80", ENT_QUOTES | ENT_IGNORE, "UTF-8"))); + +echo "--\n"; // UTF-8: alternative (invalid) UTF-8 sequence var_dump(_bin2hex(htmlspecialchars("\xc0\xa6", ENT_QUOTES, 'UTF-8'))); var_dump(_bin2hex(htmlspecialchars("\xe0\x80\xa6", ENT_QUOTES, 'UTF-8'))); @@ -155,6 +161,10 @@ string(0) "" string(0) "" -- +string(4) "c280" +string(6) "e0a080" +string(8) "f0908080" +-- string(0) "" string(0) "" string(0) "" Modified: php/php-src/trunk/ext/standard/html.c =================================================================== --- php/php-src/trunk/ext/standard/html.c 2009-10-13 04:32:39 UTC (rev 289604) +++ php/php-src/trunk/ext/standard/html.c 2009-10-13 05:18:37 UTC (rev 289605) @@ -544,19 +544,18 @@ c = str[pos]; if (c < 0x80) { MB_WRITE(c); - this_char = c; + this_char = c; pos++; } else if (c < 0xc0) { MB_FAILURE(pos); } else if (c < 0xe0) { CHECK_LEN(pos, 2); if (str[pos + 1] < 0x80 || str[pos + 1] > 0xbf) { - MB_FAILURE(pos); + MB_FAILURE(pos); } this_char = ((c & 0x1f) << 6) | (str[pos + 1] & 0x3f); if (this_char < 0x80) { - *status = FAILURE; - return 0; + MB_FAILURE(pos); } MB_WRITE((unsigned char)c); MB_WRITE((unsigned char)str[pos + 1]); @@ -564,14 +563,14 @@ } else if (c < 0xf0) { CHECK_LEN(pos, 3); if (str[pos + 1] < 0x80 || str[pos + 1] > 0xbf) { - MB_FAILURE(pos); + MB_FAILURE(pos); } if (str[pos + 2] < 0x80 || str[pos + 2] > 0xbf) { - MB_FAILURE(pos); + MB_FAILURE(pos); } this_char = ((c & 0x0f) << 12) | ((str[pos + 1] & 0x3f) << 6) | (str[pos + 2] & 0x3f); if (this_char < 0x800) { - MB_FAILURE(pos); + MB_FAILURE(pos); } MB_WRITE((unsigned char)c); MB_WRITE((unsigned char)str[pos + 1]); @@ -580,17 +579,17 @@ } else if (c < 0xf8) { CHECK_LEN(pos, 4); if (str[pos + 1] < 0x80 || str[pos + 1] > 0xbf) { - MB_FAILURE(pos); + MB_FAILURE(pos); } if (str[pos + 2] < 0x80 || str[pos + 2] > 0xbf) { - MB_FAILURE(pos); + MB_FAILURE(pos); } if (str[pos + 3] < 0x80 || str[pos + 3] > 0xbf) { - MB_FAILURE(pos); + MB_FAILURE(pos); } this_char = ((c & 0x07) << 18) | ((str[pos + 1] & 0x3f) << 12) | ((str[pos + 2] & 0x3f) << 6) | (str[pos + 3] & 0x3f); if (this_char < 0x10000) { - MB_FAILURE(pos); + MB_FAILURE(pos); } MB_WRITE((unsigned char)c); MB_WRITE((unsigned char)str[pos + 1]); @@ -598,7 +597,7 @@ MB_WRITE((unsigned char)str[pos + 3]); pos += 4; } else { - MB_FAILURE(pos); + MB_FAILURE(pos); } } break; Modified: php/php-src/trunk/ext/standard/tests/strings/bug49785.phpt =================================================================== --- php/php-src/trunk/ext/standard/tests/strings/bug49785.phpt 2009-10-13 04:32:39 UTC (rev 289604) +++ php/php-src/trunk/ext/standard/tests/strings/bug49785.phpt 2009-10-13 05:18:37 UTC (rev 289605) @@ -36,6 +36,12 @@ var_dump(_bin2hex(htmlentities("\xf8\x88\x80\x80\x80", ENT_QUOTES, "UTF-8"))); echo "--\n"; +// UTF-8: with ENT_IGNORE +var_dump(_bin2hex(htmlentities("\xc0\xa0\xc2\x80", ENT_QUOTES | ENT_IGNORE, "UTF-8"))); +var_dump(_bin2hex(htmlentities("\xe0\x80\x80\xe0\xa0\x80", ENT_QUOTES | ENT_IGNORE, "UTF-8"))); +var_dump(_bin2hex(htmlentities("\xf0\x80\x80\x80\xf0\x90\x80\x80", ENT_QUOTES | ENT_IGNORE, "UTF-8"))); + +echo "--\n"; // UTF-8: alternative (invalid) UTF-8 sequence var_dump(_bin2hex(htmlspecialchars("\xc0\xa6", ENT_QUOTES, 'UTF-8'))); var_dump(_bin2hex(htmlspecialchars("\xe0\x80\xa6", ENT_QUOTES, 'UTF-8'))); @@ -155,6 +161,10 @@ string(0) "" string(0) "" -- +string(4) "c280" +string(6) "e0a080" +string(8) "f0908080" +-- string(0) "" string(0) "" string(0) ""
-- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php