Title: [282535] releases/WebKitGTK/webkit-2.32
Revision
282535
Author
ape...@igalia.com
Date
2021-09-16 03:36:42 -0700 (Thu, 16 Sep 2021)

Log Message

Merge r278236 - Punycode encode U+0BE6 when not in context of other Tamil characters
https://bugs.webkit.org/show_bug.cgi?id=226409
<rdar://78160926>

Reviewed by Tim Horton.

Source/WTF:

It has quite legitimate use, so we don't want to always punycode encode it,
but when used in the context of non-Tamil characters we want to punycode encode it.

* wtf/URLHelpers.cpp:
(WTF::URLHelpers::isLookalikeCharacterOfScriptType<USCRIPT_ARMENIAN>):
(WTF::URLHelpers::isLookalikeCharacterOfScriptType<USCRIPT_TAMIL>):
(WTF::URLHelpers::isOfScriptType):
(WTF::URLHelpers::isLookalikeSequence):
(WTF::URLHelpers::isLookalikeCharacter):
(WTF::URLHelpers::isArmenianLookalikeCharacter): Deleted.
(WTF::URLHelpers::isArmenianScriptCharacter): Deleted.
(WTF::URLHelpers::isArmenianLookalikeSequence): Deleted.

Tools:

* TestWebKitAPI/Tests/WTF/cocoa/URLExtras.mm:
(TestWebKitAPI::TEST):

Modified Paths

Diff

Modified: releases/WebKitGTK/webkit-2.32/Source/WTF/ChangeLog (282534 => 282535)


--- releases/WebKitGTK/webkit-2.32/Source/WTF/ChangeLog	2021-09-16 10:36:35 UTC (rev 282534)
+++ releases/WebKitGTK/webkit-2.32/Source/WTF/ChangeLog	2021-09-16 10:36:42 UTC (rev 282535)
@@ -1,3 +1,24 @@
+2021-05-28  Alex Christensen  <achristen...@webkit.org>
+
+        Punycode encode U+0BE6 when not in context of other Tamil characters
+        https://bugs.webkit.org/show_bug.cgi?id=226409
+        <rdar://78160926>
+
+        Reviewed by Tim Horton.
+
+        It has quite legitimate use, so we don't want to always punycode encode it,
+        but when used in the context of non-Tamil characters we want to punycode encode it.
+
+        * wtf/URLHelpers.cpp:
+        (WTF::URLHelpers::isLookalikeCharacterOfScriptType<USCRIPT_ARMENIAN>):
+        (WTF::URLHelpers::isLookalikeCharacterOfScriptType<USCRIPT_TAMIL>):
+        (WTF::URLHelpers::isOfScriptType):
+        (WTF::URLHelpers::isLookalikeSequence):
+        (WTF::URLHelpers::isLookalikeCharacter):
+        (WTF::URLHelpers::isArmenianLookalikeCharacter): Deleted.
+        (WTF::URLHelpers::isArmenianScriptCharacter): Deleted.
+        (WTF::URLHelpers::isArmenianLookalikeSequence): Deleted.
+
 2021-05-27  Mikhail R. Gadelha  <mikhail.rama...@gmail.com>
 
         Increase NumberToStringBuffer to account for negative number

Modified: releases/WebKitGTK/webkit-2.32/Source/WTF/wtf/URLHelpers.cpp (282534 => 282535)


--- releases/WebKitGTK/webkit-2.32/Source/WTF/wtf/URLHelpers.cpp	2021-09-16 10:36:35 UTC (rev 282534)
+++ releases/WebKitGTK/webkit-2.32/Source/WTF/wtf/URLHelpers.cpp	2021-09-16 10:36:42 UTC (rev 282535)
@@ -67,7 +67,9 @@
 
 #endif // !PLATFORM(COCOA)
 
-static bool isArmenianLookalikeCharacter(UChar32 codePoint)
+template<UScriptCode> bool isLookalikeCharacterOfScriptType(UChar32);
+
+template<> bool isLookalikeCharacterOfScriptType<USCRIPT_ARMENIAN>(UChar32 codePoint)
 {
     switch (codePoint) {
     case 0x0548: /* ARMENIAN CAPITAL LETTER VO */
@@ -84,8 +86,19 @@
     }
 }
 
-static bool isArmenianScriptCharacter(UChar32 codePoint)
+template<> bool isLookalikeCharacterOfScriptType<USCRIPT_TAMIL>(UChar32 codePoint)
 {
+    switch (codePoint) {
+    case 0x0BE6: /* TAMIL DIGIT ZERO */
+        return true;
+    default:
+        return false;
+    }
+}
+
+template <UScriptCode ScriptType>
+bool isOfScriptType(UChar32 codePoint)
+{
     UErrorCode error = U_ZERO_ERROR;
     UScriptCode script = uscript_getScript(codePoint, &error);
     if (error != U_ZERO_ERROR) {
@@ -92,8 +105,7 @@
         LOG_ERROR("got ICU error while trying to look at scripts: %d", error);
         return false;
     }
-
-    return script == USCRIPT_ARMENIAN;
+    return script == ScriptType;
 }
 
 template<typename CharacterType> inline bool isASCIIDigitOrValidHostCharacter(CharacterType charCode)
@@ -118,16 +130,17 @@
     }
 }
 
-static bool isArmenianLookalikeSequence(const Optional<UChar32>& previousCodePoint, UChar32 codePoint)
+template <UScriptCode ScriptType>
+bool isLookalikeSequence(const Optional<UChar32>& previousCodePoint, UChar32 codePoint)
 {
     if (!previousCodePoint || *previousCodePoint == '/')
         return false;
 
-    auto isArmenianLookalikePair = [] (UChar first, UChar second) {
-        return isArmenianLookalikeCharacter(first) && !(isArmenianScriptCharacter(second) || isASCIIDigitOrValidHostCharacter(second));
+    auto isLookalikePair = [] (UChar first, UChar second) {
+        return isLookalikeCharacterOfScriptType<ScriptType>(first) && !(isOfScriptType<ScriptType>(second) || isASCIIDigitOrValidHostCharacter(second));
     };
-    return isArmenianLookalikePair(codePoint, *previousCodePoint)
-        || isArmenianLookalikePair(*previousCodePoint, codePoint);
+    return isLookalikePair(codePoint, *previousCodePoint)
+        || isLookalikePair(*previousCodePoint, codePoint);
 }
 
 static bool isLookalikeCharacter(const Optional<UChar32>& previousCodePoint, UChar32 codePoint)
@@ -277,7 +290,8 @@
     case '.':
         return false;
     default:
-        return isArmenianLookalikeSequence(previousCodePoint, codePoint);
+        return isLookalikeSequence<USCRIPT_ARMENIAN>(previousCodePoint, codePoint)
+            || isLookalikeSequence<USCRIPT_TAMIL>(previousCodePoint, codePoint);
     }
 }
 

Modified: releases/WebKitGTK/webkit-2.32/Tools/ChangeLog (282534 => 282535)


--- releases/WebKitGTK/webkit-2.32/Tools/ChangeLog	2021-09-16 10:36:35 UTC (rev 282534)
+++ releases/WebKitGTK/webkit-2.32/Tools/ChangeLog	2021-09-16 10:36:42 UTC (rev 282535)
@@ -1,3 +1,14 @@
+2021-05-28  Alex Christensen  <achristen...@webkit.org>
+
+        Punycode encode U+0BE6 when not in context of other Tamil characters
+        https://bugs.webkit.org/show_bug.cgi?id=226409
+        <rdar://78160926>
+
+        Reviewed by Tim Horton.
+
+        * TestWebKitAPI/Tests/WTF/cocoa/URLExtras.mm:
+        (TestWebKitAPI::TEST):
+
 2021-05-17  Wenson Hsieh  <wenson_hs...@apple.com>
 
         [GPU Process] Object identifiers with the deleted value should cause MESSAGE_CHECKs

Modified: releases/WebKitGTK/webkit-2.32/Tools/TestWebKitAPI/Tests/WTF/cocoa/URLExtras.mm (282534 => 282535)


--- releases/WebKitGTK/webkit-2.32/Tools/TestWebKitAPI/Tests/WTF/cocoa/URLExtras.mm	2021-09-16 10:36:35 UTC (rev 282534)
+++ releases/WebKitGTK/webkit-2.32/Tools/TestWebKitAPI/Tests/WTF/cocoa/URLExtras.mm	2021-09-16 10:36:42 UTC (rev 282535)
@@ -119,6 +119,8 @@
         "xn--o-qdc", // 'o' U+0585
         "xn--g-hdc", // U+0581 'g'
         "xn--g-idc", // 'g' U+0581
+        "xn--o-00e", // U+0BE6 'o'
+        "xn--o-10e", // 'o' U+0BE6
     };
     for (const String& host : punycodedSpoofHosts) {
         auto url = "" host, "/").utf8();
@@ -142,6 +144,9 @@
     EXPECT_STREQ("https://\u0581%67/", userVisibleString(literalURL("https://\u0581g/")));
     EXPECT_STREQ("https://o%D5%95%2F", userVisibleString(literalURL("https://o\u0555/")));
     EXPECT_STREQ("https://o%D6%85%2F", userVisibleString(literalURL("https://o\u0585/")));
+
+    // Tamil
+    EXPECT_STREQ("https://\u0BE6\u0BE7\u0BE8\u0BE9count/", userVisibleString(literalURL("https://\u0BE6\u0BE7\u0BE8\u0BE9count/")));
 }
 
 TEST(WTF_URLExtras, URLExtras_DivisionSign)
_______________________________________________
webkit-changes mailing list
webkit-changes@lists.webkit.org
https://lists.webkit.org/mailman/listinfo/webkit-changes

Reply via email to