moriyoshi Mon, 07 Dec 2009 15:41:43 +0000 Revision: http://svn.php.net/viewvc?view=revision&revision=291821
Log: - Take account of surrogate pairs. Changed paths: U php/php-src/branches/PHP_5_2/ext/standard/html.c U php/php-src/branches/PHP_5_2/ext/standard/tests/strings/bug49785.phpt U php/php-src/branches/PHP_5_3/ext/standard/html.c U php/php-src/branches/PHP_5_3/ext/standard/tests/strings/bug49785.phpt U php/php-src/trunk/ext/standard/html.c U php/php-src/trunk/ext/standard/tests/strings/bug49785.phpt Modified: php/php-src/branches/PHP_5_2/ext/standard/html.c =================================================================== --- php/php-src/branches/PHP_5_2/ext/standard/html.c 2009-12-07 15:34:13 UTC (rev 291820) +++ php/php-src/branches/PHP_5_2/ext/standard/html.c 2009-12-07 15:41:43 UTC (rev 291821) @@ -566,6 +566,8 @@ this_char = ((c & 0x0f) << 12) | ((str[pos + 1] & 0x3f) << 6) | (str[pos + 2] & 0x3f); if (this_char < 0x800) { MB_FAILURE(pos); + } else if (this_char >= 0xd800 && this_char <= 0xdfff) { + MB_FAILURE(pos); } MB_WRITE((unsigned char)c); MB_WRITE((unsigned char)str[pos + 1]); Modified: php/php-src/branches/PHP_5_2/ext/standard/tests/strings/bug49785.phpt =================================================================== --- php/php-src/branches/PHP_5_2/ext/standard/tests/strings/bug49785.phpt 2009-12-07 15:34:13 UTC (rev 291820) +++ php/php-src/branches/PHP_5_2/ext/standard/tests/strings/bug49785.phpt 2009-12-07 15:41:43 UTC (rev 291821) @@ -36,10 +36,14 @@ var_dump(_bin2hex(htmlentities("\xf8\x88\x80\x80\x80", ENT_QUOTES, "UTF-8"))); echo "--\n"; -// UTF-8: alternative (invalid) UTF-8 sequence +// UTF-8: alternative (invalid) UTF-8 sequence / surrogate pairs var_dump(_bin2hex(htmlspecialchars("\xc0\xa6", ENT_QUOTES, 'UTF-8'))); var_dump(_bin2hex(htmlspecialchars("\xe0\x80\xa6", ENT_QUOTES, 'UTF-8'))); var_dump(_bin2hex(htmlspecialchars("\xf0\x80\x80\xa6", ENT_QUOTES, 'UTF-8'))); +var_dump(_bin2hex(htmlspecialchars("\xec\xbf\xbf", ENT_QUOTES, 'UTF-8'))); +var_dump(_bin2hex(htmlspecialchars("\xed\xa0\x80", ENT_QUOTES, 'UTF-8'))); +var_dump(_bin2hex(htmlspecialchars("\xed\xbf\xbf", ENT_QUOTES, 'UTF-8'))); +var_dump(_bin2hex(htmlspecialchars("\xee\x80\x80", ENT_QUOTES, 'UTF-8'))); // Shift_JIS: non-lead byte >= 0x80 var_dump(_bin2hex(htmlspecialchars("\x80", ENT_QUOTES, 'Shift_JIS'))); @@ -158,6 +162,10 @@ string(0) "" string(0) "" string(0) "" +string(6) "ecbfbf" +string(0) "" +string(0) "" +string(6) "ee8080" string(2) "80" string(2) "a0" string(2) "a1" Modified: php/php-src/branches/PHP_5_3/ext/standard/html.c =================================================================== --- php/php-src/branches/PHP_5_3/ext/standard/html.c 2009-12-07 15:34:13 UTC (rev 291820) +++ php/php-src/branches/PHP_5_3/ext/standard/html.c 2009-12-07 15:41:43 UTC (rev 291821) @@ -565,6 +565,8 @@ this_char = ((c & 0x0f) << 12) | ((str[pos + 1] & 0x3f) << 6) | (str[pos + 2] & 0x3f); if (this_char < 0x800) { MB_FAILURE(pos); + } else if (this_char >= 0xd800 && this_char <= 0xdfff) { + MB_FAILURE(pos); } MB_WRITE((unsigned char)c); MB_WRITE((unsigned char)str[pos + 1]); Modified: php/php-src/branches/PHP_5_3/ext/standard/tests/strings/bug49785.phpt =================================================================== --- php/php-src/branches/PHP_5_3/ext/standard/tests/strings/bug49785.phpt 2009-12-07 15:34:13 UTC (rev 291820) +++ php/php-src/branches/PHP_5_3/ext/standard/tests/strings/bug49785.phpt 2009-12-07 15:41:43 UTC (rev 291821) @@ -42,10 +42,14 @@ var_dump(_bin2hex(htmlentities("\xf0\x80\x80\x80\xf0\x90\x80\x80", ENT_QUOTES | ENT_IGNORE, "UTF-8"))); echo "--\n"; -// UTF-8: alternative (invalid) UTF-8 sequence +// UTF-8: alternative (invalid) UTF-8 sequence / surrogate pairs var_dump(_bin2hex(htmlspecialchars("\xc0\xa6", ENT_QUOTES, 'UTF-8'))); var_dump(_bin2hex(htmlspecialchars("\xe0\x80\xa6", ENT_QUOTES, 'UTF-8'))); var_dump(_bin2hex(htmlspecialchars("\xf0\x80\x80\xa6", ENT_QUOTES, 'UTF-8'))); +var_dump(_bin2hex(htmlspecialchars("\xec\xbf\xbf", ENT_QUOTES, 'UTF-8'))); +var_dump(_bin2hex(htmlspecialchars("\xed\xa0\x80", ENT_QUOTES, 'UTF-8'))); +var_dump(_bin2hex(htmlspecialchars("\xed\xbf\xbf", ENT_QUOTES, 'UTF-8'))); +var_dump(_bin2hex(htmlspecialchars("\xee\x80\x80", ENT_QUOTES, 'UTF-8'))); // Shift_JIS: non-lead byte >= 0x80 var_dump(_bin2hex(htmlspecialchars("\x80", ENT_QUOTES, 'Shift_JIS'))); @@ -168,6 +172,10 @@ string(0) "" string(0) "" string(0) "" +string(6) "ecbfbf" +string(0) "" +string(0) "" +string(6) "ee8080" string(2) "80" string(2) "a0" string(2) "a1" Modified: php/php-src/trunk/ext/standard/html.c =================================================================== --- php/php-src/trunk/ext/standard/html.c 2009-12-07 15:34:13 UTC (rev 291820) +++ php/php-src/trunk/ext/standard/html.c 2009-12-07 15:41:43 UTC (rev 291821) @@ -571,6 +571,8 @@ this_char = ((c & 0x0f) << 12) | ((str[pos + 1] & 0x3f) << 6) | (str[pos + 2] & 0x3f); if (this_char < 0x800) { MB_FAILURE(pos); + } else if (this_char >= 0xd800 && this_char <= 0xdfff) { + MB_FAILURE(pos); } MB_WRITE((unsigned char)c); MB_WRITE((unsigned char)str[pos + 1]); Modified: php/php-src/trunk/ext/standard/tests/strings/bug49785.phpt =================================================================== --- php/php-src/trunk/ext/standard/tests/strings/bug49785.phpt 2009-12-07 15:34:13 UTC (rev 291820) +++ php/php-src/trunk/ext/standard/tests/strings/bug49785.phpt 2009-12-07 15:41:43 UTC (rev 291821) @@ -42,10 +42,14 @@ var_dump(_bin2hex(htmlentities("\xf0\x80\x80\x80\xf0\x90\x80\x80", ENT_QUOTES | ENT_IGNORE, "UTF-8"))); echo "--\n"; -// UTF-8: alternative (invalid) UTF-8 sequence +// UTF-8: alternative (invalid) UTF-8 sequence / surrogate pairs var_dump(_bin2hex(htmlspecialchars("\xc0\xa6", ENT_QUOTES, 'UTF-8'))); var_dump(_bin2hex(htmlspecialchars("\xe0\x80\xa6", ENT_QUOTES, 'UTF-8'))); var_dump(_bin2hex(htmlspecialchars("\xf0\x80\x80\xa6", ENT_QUOTES, 'UTF-8'))); +var_dump(_bin2hex(htmlspecialchars("\xec\xbf\xbf", ENT_QUOTES, 'UTF-8'))); +var_dump(_bin2hex(htmlspecialchars("\xed\xa0\x80", ENT_QUOTES, 'UTF-8'))); +var_dump(_bin2hex(htmlspecialchars("\xed\xbf\xbf", ENT_QUOTES, 'UTF-8'))); +var_dump(_bin2hex(htmlspecialchars("\xee\x80\x80", ENT_QUOTES, 'UTF-8'))); // Shift_JIS: non-lead byte >= 0x80 var_dump(_bin2hex(htmlspecialchars("\x80", ENT_QUOTES, 'Shift_JIS'))); @@ -168,6 +172,10 @@ string(0) "" string(0) "" string(0) "" +string(6) "ecbfbf" +string(0) "" +string(0) "" +string(6) "ee8080" string(2) "80" string(2) "a0" string(2) "a1"
-- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php