moriyoshi Tue, 22 Dec 2009 05:50:34 +0000 Revision: http://svn.php.net/viewvc?view=revision&revision=292467
Log: - Fix bug #46478 (htmlentities() uses obsolete mapping table for character entity references) Bug: http://bugs.php.net/46478 (Assigned) htmlentities() uses obsolete mapping table for character entity references Changed paths: U php/php-src/branches/PHP_5_3/NEWS U php/php-src/branches/PHP_5_3/ext/standard/html.c A php/php-src/branches/PHP_5_3/ext/standard/tests/strings/html_entity_decode_html4.phpt A php/php-src/branches/PHP_5_3/ext/standard/tests/strings/htmlentities_html4.phpt
Modified: php/php-src/branches/PHP_5_3/NEWS =================================================================== --- php/php-src/branches/PHP_5_3/NEWS 2009-12-22 03:19:47 UTC (rev 292466) +++ php/php-src/branches/PHP_5_3/NEWS 2009-12-22 05:50:34 UTC (rev 292467) @@ -134,6 +134,8 @@ - Fixed bug #49174 (crash when extending PDOStatement and trying to set queryString property). (Felipe) - Fixed bug #47848 (importNode doesn't preserve attribute namespaces). (Rob) +- Fixed bug #46478 (htmlentities() uses obsolete mapping table for character + entity references). (Moriyoshi) - Fixed bug #45599 (strip_tags() truncates rest of string with invalid attribute). (Ilia, hradtke) - Fixed bug #45120 (PDOStatement->execute() returns true then false for same Modified: php/php-src/branches/PHP_5_3/ext/standard/html.c =================================================================== --- php/php-src/branches/PHP_5_3/ext/standard/html.c 2009-12-22 03:19:47 UTC (rev 292466) +++ php/php-src/branches/PHP_5_3/ext/standard/html.c 2009-12-22 05:50:34 UTC (rev 292467) @@ -144,7 +144,7 @@ NULL, NULL, NULL, NULL, NULL, NULL, NULL, "thetasym", "upsih", NULL, NULL, NULL, - "piv" + "piv" }; static entity_table_t ent_uni_punct[] = { @@ -154,7 +154,7 @@ NULL, NULL, NULL, "ndash", "mdash", NULL, NULL, NULL, /* 8216 */ "lsquo", "rsquo", "sbquo", NULL, "ldquo", "rdquo", "bdquo", NULL, - "dagger", "Dagger", "bull", NULL, NULL, NULL, "hellip", + "dagger", "Dagger", "bull", NULL, NULL, NULL, "hellip", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "permil", NULL, /* 8242 */ "prime", "Prime", NULL, NULL, NULL, NULL, NULL, "lsaquo", "rsaquo", NULL, @@ -194,39 +194,39 @@ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 8656 (0x21d0) */ - "lArr", "uArr", "rArr", "dArr", "hArr", "vArr", NULL, NULL, - NULL, NULL, "lAarr", "rAarr", NULL, "rarrw", NULL, NULL, + "lArr", "uArr", "rArr", "dArr", "hArr", NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 8672 (0x21e0) */ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 8704 (0x2200) */ - "forall", "comp", "part", "exist", "nexist", "empty", NULL, "nabla", - "isin", "notin", "epsis", "ni", "notni", "bepsi", NULL, "prod", + "forall", NULL, "part", "exist", NULL, "empty", NULL, "nabla", + "isin", "notin", NULL, "ni", NULL, NULL, NULL, "prod", /* 8720 (0x2210) */ - "coprod", "sum", "minus", "mnplus", "plusdo", NULL, "setmn", "lowast", - "compfn", NULL, "radic", NULL, NULL, "prop", "infin", "ang90", + NULL, "sum", "minus", NULL, NULL, NULL, NULL, "lowast", + NULL, NULL, "radic", NULL, NULL, "prop", "infin", NULL, /* 8736 (0x2220) */ - "ang", "angmsd", "angsph", "mid", "nmid", "par", "npar", "and", - "or", "cap", "cup", "int", NULL, NULL, "conint", NULL, + "ang", NULL, NULL, NULL, NULL, NULL, NULL, "and", + "or", "cap", "cup", "int", NULL, NULL, NULL, NULL, /* 8752 (0x2230) */ - NULL, NULL, NULL, NULL, "there4", "becaus", NULL, NULL, - NULL, NULL, NULL, NULL, "sim", "bsim", NULL, NULL, + NULL, NULL, NULL, NULL, "there4", NULL, NULL, NULL, + NULL, NULL, NULL, NULL, "sim", NULL, NULL, NULL, /* 8768 (0x2240) */ - "wreath", "nsim", NULL, "sime", "nsime", "cong", NULL, "ncong", - "asymp", "nap", "ape", NULL, "bcong", "asymp", "bump", "bumpe", + NULL, NULL, NULL, NULL, NULL, "cong", NULL, NULL, + "asymp", NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 8784 (0x2250) */ NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 8800 (0x2260) */ - "ne", "equiv", NULL, NULL, "le", "ge", "lE", "gE", - "lnE", "gnE", "Lt", "Gt", "twixt", NULL, "nlt", "ngt", + "ne", "equiv", NULL, NULL, "le", "ge", NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 8816 (0x2270) */ - "nles", "nges", "lsim", "gsim", NULL, NULL, "lg", "gl", - NULL, NULL, "pr", "sc", "cupre", "sscue", "prsim", "scsim", + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 8832 (0x2280) */ - "npr", "nsc", "sub", "sup", "nsub", "nsup", "sube", "supe", + NULL, NULL, "sub", "sup", "nsub", NULL, "sube", "supe", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* 8848 (0x2290) */ NULL, NULL, NULL, NULL, NULL, "oplus", NULL, "otimes", Added: php/php-src/branches/PHP_5_3/ext/standard/tests/strings/html_entity_decode_html4.phpt =================================================================== --- php/php-src/branches/PHP_5_3/ext/standard/tests/strings/html_entity_decode_html4.phpt (rev 0) +++ php/php-src/branches/PHP_5_3/ext/standard/tests/strings/html_entity_decode_html4.phpt 2009-12-22 05:50:34 UTC (rev 292467) @@ -0,0 +1,516 @@ +--TEST-- +html_entity_decode() conformance check (HTML 4) +--FILE-- +<?php +$map = array( + """, + "&", + "<", + ">", + " ", + "¡", + "¢", + "£", + "¤", + "¥", + "¦", + "§", + "¨", + "©", + "ª", + "«", + "¬", + "­", + "®", + "¯", + "°", + "±", + "²", + "³", + "´", + "µ", + "¶", + "·", + "¸", + "¹", + "º", + "»", + "¼", + "½", + "¾", + "¿", + "À", + "Á", + "Â", + "Ã", + "Ä", + "Å", + "Æ", + "Ç", + "È", + "É", + "Ê", + "Ë", + "Ì", + "Í", + "Î", + "Ï", + "Ð", + "Ñ", + "Ò", + "Ó", + "Ô", + "Õ", + "Ö", + "×", + "Ø", + "Ù", + "Ú", + "Û", + "Ü", + "Ý", + "Þ", + "ß", + "à", + "á", + "â", + "ã", + "ä", + "å", + "æ", + "ç", + "è", + "é", + "ê", + "ë", + "ì", + "í", + "î", + "ï", + "ð", + "ñ", + "ò", + "ó", + "ô", + "õ", + "ö", + "÷", + "ø", + "ù", + "ú", + "û", + "ü", + "ý", + "þ", + "ÿ", + "Œ", + "œ", + "Š", + "š", + "Ÿ", + "ƒ", + "ˆ", + "˜", + "Α", + "Β", + "Γ", + "Δ", + "Ε", + "Ζ", + "Η", + "Θ", + "Ι", + "Κ", + "Λ", + "Μ", + "Ν", + "Ξ", + "Ο", + "Π", + "Ρ", + "Σ", + "Τ", + "Υ", + "Φ", + "Χ", + "Ψ", + "Ω", + "α", + "β", + "γ", + "δ", + "ε", + "ζ", + "η", + "θ", + "ι", + "κ", + "λ", + "μ", + "ν", + "ξ", + "ο", + "π", + "ρ", + "ς", + "σ", + "τ", + "υ", + "φ", + "χ", + "ψ", + "ω", + "ϑ", + "ϒ", + "ϖ", + " ", + " ", + " ", + "‌", + "‍", + "‎", + "‏", + "–", + "—", + "‘", + "’", + "‚", + "“", + "”", + "„", + "†", + "‡", + "•", + "…", + "‰", + "′", + "″", + "‹", + "›", + "‾", + "⁄", + "€", + "ℑ", + "℘", + "ℜ", + "™", + "ℵ", + "←", + "↑", + "→", + "↓", + "↔", + "↵", + "⇐", + "⇑", + "⇒", + "⇓", + "⇔", + "∀", + "∂", + "∃", + "∅", + "∇", + "∈", + "∉", + "∋", + "∏", + "∑", + "−", + "∗", + "√", + "∝", + "∞", + "∠", + "∧", + "∨", + "∩", + "∪", + "∫", + "∴", + "∼", + "≅", + "≈", + "≠", + "≡", + "≤", + "≥", + "⊂", + "⊃", + "⊄", + "⊆", + "⊇", + "⊕", + "⊗", + "⊥", + "⋅", + "⌈", + "⌉", + "⌊", + "⌋", + "⟨", + "⟩", + "◊", + "♠", + "♣", + "♥", + "♦", +); + +foreach ($map as $str) { + echo bin2hex(html_entity_decode($str, ENT_QUOTES, "UTF-8")), "\n"; +} +?> +--EXPECT-- +22 +26 +3c +3e +c2a0 +c2a1 +c2a2 +c2a3 +c2a4 +c2a5 +c2a6 +c2a7 +c2a8 +c2a9 +c2aa +c2ab +c2ac +c2ad +c2ae +c2af +c2b0 +c2b1 +c2b2 +c2b3 +c2b4 +c2b5 +c2b6 +c2b7 +c2b8 +c2b9 +c2ba +c2bb +c2bc +c2bd +c2be +c2bf +c380 +c381 +c382 +c383 +c384 +c385 +c386 +c387 +c388 +c389 +c38a +c38b +c38c +c38d +c38e +c38f +c390 +c391 +c392 +c393 +c394 +c395 +c396 +c397 +c398 +c399 +c39a +c39b +c39c +c39d +c39e +c39f +c3a0 +c3a1 +c3a2 +c3a3 +c3a4 +c3a5 +c3a6 +c3a7 +c3a8 +c3a9 +c3aa +c3ab +c3ac +c3ad +c3ae +c3af +c3b0 +c3b1 +c3b2 +c3b3 +c3b4 +c3b5 +c3b6 +c3b7 +c3b8 +c3b9 +c3ba +c3bb +c3bc +c3bd +c3be +c3bf +c592 +c593 +c5a0 +c5a1 +c5b8 +c692 +cb86 +cb9c +ce91 +ce92 +ce93 +ce94 +ce95 +ce96 +ce97 +ce98 +ce99 +ce9a +ce9b +ce9c +ce9d +ce9e +ce9f +cea0 +cea1 +cea3 +cea4 +cea5 +cea6 +cea7 +cea8 +cea9 +ceb1 +ceb2 +ceb3 +ceb4 +ceb5 +ceb6 +ceb7 +ceb8 +ceb9 +ceba +cebb +cebc +cebd +cebe +cebf +cf80 +cf81 +cf82 +cf83 +cf84 +cf85 +cf86 +cf87 +cf88 +cf89 +cf91 +cf92 +cf96 +e28082 +e28083 +e28089 +e2808c +e2808d +e2808e +e2808f +e28093 +e28094 +e28098 +e28099 +e2809a +e2809c +e2809d +e2809e +e280a0 +e280a1 +e280a2 +e280a6 +e280b0 +e280b2 +e280b3 +e280b9 +e280ba +e280be +e28184 +e282ac +e28491 +e28498 +e2849c +e284a2 +e284b5 +e28690 +e28691 +e28692 +e28693 +e28694 +e286b5 +e28790 +e28791 +e28792 +e28793 +e28794 +e28880 +e28882 +e28883 +e28885 +e28887 +e28888 +e28889 +e2888b +e2888f +e28891 +e28892 +e28897 +e2889a +e2889d +e2889e +e288a0 +e288a7 +e288a8 +e288a9 +e288aa +e288ab +e288b4 +e288bc +e28985 +e28988 +e289a0 +e289a1 +e289a4 +e289a5 +e28a82 +e28a83 +e28a84 +e28a86 +e28a87 +e28a95 +e28a97 +e28aa5 +e28b85 +e28c88 +e28c89 +e28c8a +e28c8b +e28ca9 +e28caa +e2978a +e299a0 +e299a3 +e299a5 +e299a6 Added: php/php-src/branches/PHP_5_3/ext/standard/tests/strings/htmlentities_html4.phpt =================================================================== --- php/php-src/branches/PHP_5_3/ext/standard/tests/strings/htmlentities_html4.phpt (rev 0) +++ php/php-src/branches/PHP_5_3/ext/standard/tests/strings/htmlentities_html4.phpt 2009-12-22 05:50:34 UTC (rev 292467) @@ -0,0 +1,305 @@ +--TEST-- +htmlentities() conformance check (HTML 4) +--FILE-- +<?php +function utf32_utf8($k) { + if ($k < 0x80) { + $retval = pack('C', $k); + } else if ($k < 0x800) { + $retval = pack('C2', + 0xc0 | ($k >> 6), + 0x80 | ($k & 0x3f)); + } else if ($k < 0x10000) { + $retval = pack('C3', + 0xe0 | ($k >> 12), + 0x80 | (($k >> 6) & 0x3f), + 0x80 | ($k & 0x3f)); + } else if ($k < 0x200000) { + $retval = pack('C4', + 0xf0 | ($k >> 18), + 0x80 | (($k >> 12) & 0x3f), + 0x80 | (($k >> 6) & 0x3f), + 0x80 | ($k & 0x3f)); + } else if ($k < 0x4000000) { + $retval = pack('C5', + 0xf8 | ($k >> 24), + 0x80 | (($k >> 18) & 0x3f), + 0x80 | (($k >> 12) & 0x3f), + 0x80 | (($k >> 6) & 0x3f), + 0x80 | ($k & 0x3f)); + } else { + $retval = pack('C6', + 0xfc | ($k >> 30), + 0x80 | (($k >> 24) & 0x3f), + 0x80 | (($k >> 18) & 0x3f), + 0x80 | (($k >> 12) & 0x3f), + 0x80 | (($k >> 6) & 0x3f), + 0x80 | ($k & 0x3f)); + } + return $retval; +} + +for ($i = 0; $i < 0x110000; $i++) { + if ($i >= 0xd800 && $i < 0xe000) + continue; + $str = utf32_utf8($i); + $result = htmlentities($str, ENT_QUOTES, 'UTF-8'); + if ($str != $result) { + printf("%s\tU+%05X\n", $result, $i); + } +} +?> +--EXPECT-- +" U+00022 +& U+00026 +' U+00027 +< U+0003C +> U+0003E + U+000A0 +¡ U+000A1 +¢ U+000A2 +£ U+000A3 +¤ U+000A4 +¥ U+000A5 +¦ U+000A6 +§ U+000A7 +¨ U+000A8 +© U+000A9 +ª U+000AA +« U+000AB +¬ U+000AC +­ U+000AD +® U+000AE +¯ U+000AF +° U+000B0 +± U+000B1 +² U+000B2 +³ U+000B3 +´ U+000B4 +µ U+000B5 +¶ U+000B6 +· U+000B7 +¸ U+000B8 +¹ U+000B9 +º U+000BA +» U+000BB +¼ U+000BC +½ U+000BD +¾ U+000BE +¿ U+000BF +À U+000C0 +Á U+000C1 + U+000C2 +à U+000C3 +Ä U+000C4 +Å U+000C5 +Æ U+000C6 +Ç U+000C7 +È U+000C8 +É U+000C9 +Ê U+000CA +Ë U+000CB +Ì U+000CC +Í U+000CD +Î U+000CE +Ï U+000CF +Ð U+000D0 +Ñ U+000D1 +Ò U+000D2 +Ó U+000D3 +Ô U+000D4 +Õ U+000D5 +Ö U+000D6 +× U+000D7 +Ø U+000D8 +Ù U+000D9 +Ú U+000DA +Û U+000DB +Ü U+000DC +Ý U+000DD +Þ U+000DE +ß U+000DF +à U+000E0 +á U+000E1 +â U+000E2 +ã U+000E3 +ä U+000E4 +å U+000E5 +æ U+000E6 +ç U+000E7 +è U+000E8 +é U+000E9 +ê U+000EA +ë U+000EB +ì U+000EC +í U+000ED +î U+000EE +ï U+000EF +ð U+000F0 +ñ U+000F1 +ò U+000F2 +ó U+000F3 +ô U+000F4 +õ U+000F5 +ö U+000F6 +÷ U+000F7 +ø U+000F8 +ù U+000F9 +ú U+000FA +û U+000FB +ü U+000FC +ý U+000FD +þ U+000FE +ÿ U+000FF +Œ U+00152 +œ U+00153 +Š U+00160 +š U+00161 +Ÿ U+00178 +ƒ U+00192 +ˆ U+002C6 +˜ U+002DC +Α U+00391 +Β U+00392 +Γ U+00393 +Δ U+00394 +Ε U+00395 +Ζ U+00396 +Η U+00397 +Θ U+00398 +Ι U+00399 +Κ U+0039A +Λ U+0039B +Μ U+0039C +Ν U+0039D +Ξ U+0039E +Ο U+0039F +Π U+003A0 +Ρ U+003A1 +Σ U+003A3 +Τ U+003A4 +Υ U+003A5 +Φ U+003A6 +Χ U+003A7 +Ψ U+003A8 +Ω U+003A9 +α U+003B1 +β U+003B2 +γ U+003B3 +δ U+003B4 +ε U+003B5 +ζ U+003B6 +η U+003B7 +θ U+003B8 +ι U+003B9 +κ U+003BA +λ U+003BB +μ U+003BC +ν U+003BD +ξ U+003BE +ο U+003BF +π U+003C0 +ρ U+003C1 +ς U+003C2 +σ U+003C3 +τ U+003C4 +υ U+003C5 +φ U+003C6 +χ U+003C7 +ψ U+003C8 +ω U+003C9 +ϑ U+003D1 +ϒ U+003D2 +ϖ U+003D6 +  U+02002 +  U+02003 +  U+02009 +‌ U+0200C +‍ U+0200D +‎ U+0200E +‏ U+0200F +– U+02013 +— U+02014 +‘ U+02018 +’ U+02019 +‚ U+0201A +“ U+0201C +” U+0201D +„ U+0201E +† U+02020 +‡ U+02021 +• U+02022 +… U+02026 +‰ U+02030 +′ U+02032 +″ U+02033 +‹ U+02039 +› U+0203A +‾ U+0203E +⁄ U+02044 +€ U+020AC +ℑ U+02111 +℘ U+02118 +ℜ U+0211C +™ U+02122 +ℵ U+02135 +← U+02190 +↑ U+02191 +→ U+02192 +↓ U+02193 +↔ U+02194 +↵ U+021B5 +⇐ U+021D0 +⇑ U+021D1 +⇒ U+021D2 +⇓ U+021D3 +⇔ U+021D4 +∀ U+02200 +∂ U+02202 +∃ U+02203 +∅ U+02205 +∇ U+02207 +∈ U+02208 +∉ U+02209 +∋ U+0220B +∏ U+0220F +∑ U+02211 +− U+02212 +∗ U+02217 +√ U+0221A +∝ U+0221D +∞ U+0221E +∠ U+02220 +∧ U+02227 +∨ U+02228 +∩ U+02229 +∪ U+0222A +∫ U+0222B +∴ U+02234 +∼ U+0223C +≅ U+02245 +≈ U+02248 +≠ U+02260 +≡ U+02261 +≤ U+02264 +≥ U+02265 +⊂ U+02282 +⊃ U+02283 +⊄ U+02284 +⊆ U+02286 +⊇ U+02287 +⊕ U+02295 +⊗ U+02297 +⊥ U+022A5 +⋅ U+022C5 +⌈ U+02308 +⌉ U+02309 +⌊ U+0230A +⌋ U+0230B +⟨ U+02329 +⟩ U+0232A +◊ U+025CA +♠ U+02660 +♣ U+02663 +♥ U+02665 +♦ U+02666
-- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php