- Revision
- 282535
- Author
- ape...@igalia.com
- Date
- 2021-09-16 03:36:42 -0700 (Thu, 16 Sep 2021)
Log Message
Merge r278236 - Punycode encode U+0BE6 when not in context of other Tamil characters
https://bugs.webkit.org/show_bug.cgi?id=226409
<rdar://78160926>
Reviewed by Tim Horton.
Source/WTF:
It has quite legitimate use, so we don't want to always punycode encode it,
but when used in the context of non-Tamil characters we want to punycode encode it.
* wtf/URLHelpers.cpp:
(WTF::URLHelpers::isLookalikeCharacterOfScriptType<USCRIPT_ARMENIAN>):
(WTF::URLHelpers::isLookalikeCharacterOfScriptType<USCRIPT_TAMIL>):
(WTF::URLHelpers::isOfScriptType):
(WTF::URLHelpers::isLookalikeSequence):
(WTF::URLHelpers::isLookalikeCharacter):
(WTF::URLHelpers::isArmenianLookalikeCharacter): Deleted.
(WTF::URLHelpers::isArmenianScriptCharacter): Deleted.
(WTF::URLHelpers::isArmenianLookalikeSequence): Deleted.
Tools:
* TestWebKitAPI/Tests/WTF/cocoa/URLExtras.mm:
(TestWebKitAPI::TEST):
Modified Paths
Diff
Modified: releases/WebKitGTK/webkit-2.32/Source/WTF/ChangeLog (282534 => 282535)
--- releases/WebKitGTK/webkit-2.32/Source/WTF/ChangeLog 2021-09-16 10:36:35 UTC (rev 282534)
+++ releases/WebKitGTK/webkit-2.32/Source/WTF/ChangeLog 2021-09-16 10:36:42 UTC (rev 282535)
@@ -1,3 +1,24 @@
+2021-05-28 Alex Christensen <achristen...@webkit.org>
+
+ Punycode encode U+0BE6 when not in context of other Tamil characters
+ https://bugs.webkit.org/show_bug.cgi?id=226409
+ <rdar://78160926>
+
+ Reviewed by Tim Horton.
+
+ It has quite legitimate use, so we don't want to always punycode encode it,
+ but when used in the context of non-Tamil characters we want to punycode encode it.
+
+ * wtf/URLHelpers.cpp:
+ (WTF::URLHelpers::isLookalikeCharacterOfScriptType<USCRIPT_ARMENIAN>):
+ (WTF::URLHelpers::isLookalikeCharacterOfScriptType<USCRIPT_TAMIL>):
+ (WTF::URLHelpers::isOfScriptType):
+ (WTF::URLHelpers::isLookalikeSequence):
+ (WTF::URLHelpers::isLookalikeCharacter):
+ (WTF::URLHelpers::isArmenianLookalikeCharacter): Deleted.
+ (WTF::URLHelpers::isArmenianScriptCharacter): Deleted.
+ (WTF::URLHelpers::isArmenianLookalikeSequence): Deleted.
+
2021-05-27 Mikhail R. Gadelha <mikhail.rama...@gmail.com>
Increase NumberToStringBuffer to account for negative number
Modified: releases/WebKitGTK/webkit-2.32/Source/WTF/wtf/URLHelpers.cpp (282534 => 282535)
--- releases/WebKitGTK/webkit-2.32/Source/WTF/wtf/URLHelpers.cpp 2021-09-16 10:36:35 UTC (rev 282534)
+++ releases/WebKitGTK/webkit-2.32/Source/WTF/wtf/URLHelpers.cpp 2021-09-16 10:36:42 UTC (rev 282535)
@@ -67,7 +67,9 @@
#endif // !PLATFORM(COCOA)
-static bool isArmenianLookalikeCharacter(UChar32 codePoint)
+template<UScriptCode> bool isLookalikeCharacterOfScriptType(UChar32);
+
+template<> bool isLookalikeCharacterOfScriptType<USCRIPT_ARMENIAN>(UChar32 codePoint)
{
switch (codePoint) {
case 0x0548: /* ARMENIAN CAPITAL LETTER VO */
@@ -84,8 +86,19 @@
}
}
-static bool isArmenianScriptCharacter(UChar32 codePoint)
+template<> bool isLookalikeCharacterOfScriptType<USCRIPT_TAMIL>(UChar32 codePoint)
{
+ switch (codePoint) {
+ case 0x0BE6: /* TAMIL DIGIT ZERO */
+ return true;
+ default:
+ return false;
+ }
+}
+
+template <UScriptCode ScriptType>
+bool isOfScriptType(UChar32 codePoint)
+{
UErrorCode error = U_ZERO_ERROR;
UScriptCode script = uscript_getScript(codePoint, &error);
if (error != U_ZERO_ERROR) {
@@ -92,8 +105,7 @@
LOG_ERROR("got ICU error while trying to look at scripts: %d", error);
return false;
}
-
- return script == USCRIPT_ARMENIAN;
+ return script == ScriptType;
}
template<typename CharacterType> inline bool isASCIIDigitOrValidHostCharacter(CharacterType charCode)
@@ -118,16 +130,17 @@
}
}
-static bool isArmenianLookalikeSequence(const Optional<UChar32>& previousCodePoint, UChar32 codePoint)
+template <UScriptCode ScriptType>
+bool isLookalikeSequence(const Optional<UChar32>& previousCodePoint, UChar32 codePoint)
{
if (!previousCodePoint || *previousCodePoint == '/')
return false;
- auto isArmenianLookalikePair = [] (UChar first, UChar second) {
- return isArmenianLookalikeCharacter(first) && !(isArmenianScriptCharacter(second) || isASCIIDigitOrValidHostCharacter(second));
+ auto isLookalikePair = [] (UChar first, UChar second) {
+ return isLookalikeCharacterOfScriptType<ScriptType>(first) && !(isOfScriptType<ScriptType>(second) || isASCIIDigitOrValidHostCharacter(second));
};
- return isArmenianLookalikePair(codePoint, *previousCodePoint)
- || isArmenianLookalikePair(*previousCodePoint, codePoint);
+ return isLookalikePair(codePoint, *previousCodePoint)
+ || isLookalikePair(*previousCodePoint, codePoint);
}
static bool isLookalikeCharacter(const Optional<UChar32>& previousCodePoint, UChar32 codePoint)
@@ -277,7 +290,8 @@
case '.':
return false;
default:
- return isArmenianLookalikeSequence(previousCodePoint, codePoint);
+ return isLookalikeSequence<USCRIPT_ARMENIAN>(previousCodePoint, codePoint)
+ || isLookalikeSequence<USCRIPT_TAMIL>(previousCodePoint, codePoint);
}
}
Modified: releases/WebKitGTK/webkit-2.32/Tools/ChangeLog (282534 => 282535)
--- releases/WebKitGTK/webkit-2.32/Tools/ChangeLog 2021-09-16 10:36:35 UTC (rev 282534)
+++ releases/WebKitGTK/webkit-2.32/Tools/ChangeLog 2021-09-16 10:36:42 UTC (rev 282535)
@@ -1,3 +1,14 @@
+2021-05-28 Alex Christensen <achristen...@webkit.org>
+
+ Punycode encode U+0BE6 when not in context of other Tamil characters
+ https://bugs.webkit.org/show_bug.cgi?id=226409
+ <rdar://78160926>
+
+ Reviewed by Tim Horton.
+
+ * TestWebKitAPI/Tests/WTF/cocoa/URLExtras.mm:
+ (TestWebKitAPI::TEST):
+
2021-05-17 Wenson Hsieh <wenson_hs...@apple.com>
[GPU Process] Object identifiers with the deleted value should cause MESSAGE_CHECKs
Modified: releases/WebKitGTK/webkit-2.32/Tools/TestWebKitAPI/Tests/WTF/cocoa/URLExtras.mm (282534 => 282535)
--- releases/WebKitGTK/webkit-2.32/Tools/TestWebKitAPI/Tests/WTF/cocoa/URLExtras.mm 2021-09-16 10:36:35 UTC (rev 282534)
+++ releases/WebKitGTK/webkit-2.32/Tools/TestWebKitAPI/Tests/WTF/cocoa/URLExtras.mm 2021-09-16 10:36:42 UTC (rev 282535)
@@ -119,6 +119,8 @@
"xn--o-qdc", // 'o' U+0585
"xn--g-hdc", // U+0581 'g'
"xn--g-idc", // 'g' U+0581
+ "xn--o-00e", // U+0BE6 'o'
+ "xn--o-10e", // 'o' U+0BE6
};
for (const String& host : punycodedSpoofHosts) {
auto url = "" host, "/").utf8();
@@ -142,6 +144,9 @@
EXPECT_STREQ("https://\u0581%67/", userVisibleString(literalURL("https://\u0581g/")));
EXPECT_STREQ("https://o%D5%95%2F", userVisibleString(literalURL("https://o\u0555/")));
EXPECT_STREQ("https://o%D6%85%2F", userVisibleString(literalURL("https://o\u0585/")));
+
+ // Tamil
+ EXPECT_STREQ("https://\u0BE6\u0BE7\u0BE8\u0BE9count/", userVisibleString(literalURL("https://\u0BE6\u0BE7\u0BE8\u0BE9count/")));
}
TEST(WTF_URLExtras, URLExtras_DivisionSign)