Title: [221964] branches/safari-604-branch

Diff

Modified: branches/safari-604-branch/Source/WTF/ChangeLog (221963 => 221964)


--- branches/safari-604-branch/Source/WTF/ChangeLog	2017-09-13 08:02:58 UTC (rev 221963)
+++ branches/safari-604-branch/Source/WTF/ChangeLog	2017-09-13 08:03:01 UTC (rev 221964)
@@ -1,3 +1,19 @@
+2017-09-12  Jason Marcell  <[email protected]>
+
+        Cherry-pick r221917. rdar://problem/34404461
+
+    2017-09-12  Brent Fulgham  <[email protected]>
+
+            Show punycode to user if a URL mixes Armenian Seh or Vo with other scripts
+            https://bugs.webkit.org/show_bug.cgi?id=176578
+            <rdar://problem/33906231>
+
+            Reviewed by Alex Christensen.
+
+            * wtf/ASCIICType.h:
+            (WTF::isASCIIDigitOrPunctuation): Added helper function to recognize ASCII digits
+            and punctuation characters.
+
 2017-08-02  Matthew Hanson  <[email protected]>
 
         Cherry-pick r219602. rdar://problem/33537767

Modified: branches/safari-604-branch/Source/WTF/wtf/ASCIICType.h (221963 => 221964)


--- branches/safari-604-branch/Source/WTF/wtf/ASCIICType.h	2017-09-13 08:02:58 UTC (rev 221963)
+++ branches/safari-604-branch/Source/WTF/wtf/ASCIICType.h	2017-09-13 08:03:01 UTC (rev 221964)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2007-2016 Apple Inc. All rights reserved.
+ * Copyright (C) 2007-2017 Apple Inc. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -247,8 +247,13 @@
     return LIKELY(toASCIILowerUnchecked(inputCharacter) == expectedASCIILowercaseLetter);
 }
 
+template<typename CharacterType> inline bool isASCIIDigitOrPunctuation(CharacterType charCode)
+{
+    return (charCode >= '!' && charCode <= '@') || (charCode >= '[' && charCode <= '`') || (charCode >= '{' && charCode <= '~');
 }
 
+}
+
 using WTF::isASCII;
 using WTF::isASCIIAlpha;
 using WTF::isASCIIAlphaCaselessEqual;
@@ -255,6 +260,7 @@
 using WTF::isASCIIAlphanumeric;
 using WTF::isASCIIBinaryDigit;
 using WTF::isASCIIDigit;
+using WTF::isASCIIDigitOrPunctuation;
 using WTF::isASCIIHexDigit;
 using WTF::isASCIILower;
 using WTF::isASCIIOctalDigit;

Modified: branches/safari-604-branch/Source/WebCore/ChangeLog (221963 => 221964)


--- branches/safari-604-branch/Source/WebCore/ChangeLog	2017-09-13 08:02:58 UTC (rev 221963)
+++ branches/safari-604-branch/Source/WebCore/ChangeLog	2017-09-13 08:03:01 UTC (rev 221964)
@@ -1,3 +1,26 @@
+2017-09-12  Jason Marcell  <[email protected]>
+
+        Cherry-pick r221917. rdar://problem/34404461
+
+    2017-09-12  Brent Fulgham  <[email protected]>
+
+            Show punycode to user if a URL mixes Armenian Seh or Vo with other scripts
+            https://bugs.webkit.org/show_bug.cgi?id=176578
+            <rdar://problem/33906231>
+
+            Reviewed by Alex Christensen.
+
+            Revise our "lookalike character" logic to include the Armenian Vo and Seh
+            characters, which can be mistaken for 'n' and 'v' when displayed in
+            certain fonts.
+
+            Tested by new API tests.
+
+            * platform/mac/WebCoreNSURLExtras.mm:
+            (WebCore::isArmenianLookalikeCharacter): Added utility function.
+            (WebCore::isArmenianScriptCharacter): Ditto.
+            (WebCore::isLookalikeCharacter): Handle Armenian-lookalike cases.
+
 2017-09-10  Jason Marcell  <[email protected]>
 
         Cherry-pick r221709. rdar://problem/34169683

Modified: branches/safari-604-branch/Source/WebCore/platform/mac/WebCoreNSURLExtras.mm (221963 => 221964)


--- branches/safari-604-branch/Source/WebCore/platform/mac/WebCoreNSURLExtras.mm	2017-09-13 08:02:58 UTC (rev 221963)
+++ branches/safari-604-branch/Source/WebCore/platform/mac/WebCoreNSURLExtras.mm	2017-09-13 08:03:01 UTC (rev 221964)
@@ -58,6 +58,48 @@
 
 namespace WebCore {
 
+static bool isArmenianLookalikeCharacter(UChar32 codePoint)
+{
+    return codePoint == 0x0548 || codePoint == 0x054D || codePoint == 0x0578 || codePoint == 0x057D;
+}
+
+static bool isArmenianScriptCharacter(UChar32 codePoint)
+{
+    UErrorCode error = U_ZERO_ERROR;
+    UScriptCode script = uscript_getScript(codePoint, &error);
+    if (error != U_ZERO_ERROR) {
+        LOG_ERROR("got ICU error while trying to look at scripts: %d", error);
+        return false;
+    }
+
+    return script == USCRIPT_ARMENIAN;
+}
+
+
+template<typename CharacterType> inline bool isASCIIDigitOrValidHostCharacter(CharacterType charCode)
+{
+    if (!isASCIIDigitOrPunctuation(charCode))
+        return false;
+
+    // Things the URL Parser rejects:
+    switch (charCode) {
+    case '#':
+    case '%':
+    case '/':
+    case ':':
+    case '?':
+    case '@':
+    case '[':
+    case '\\':
+    case ']':
+        return false;
+    default:
+        return true;
+    }
+}
+
+
+
 static BOOL isLookalikeCharacter(std::optional<UChar32> previousCodePoint, UChar32 charCode)
 {
     // This function treats the following as unsafe, lookalike characters:
@@ -186,8 +228,19 @@
         case 0x0307: /* COMBINING DOT ABOVE */
             return previousCodePoint == 0x0237 /* LATIN SMALL LETTER DOTLESS J */
                 || previousCodePoint == 0x0131; /* LATIN SMALL LETTER DOTLESS I */
+        case 0x0548: /* ARMENIAN CAPITAL LETTER VO */
+        case 0x054D: /* ARMENIAN CAPITAL LETTER SEH */
+        case 0x0578: /* ARMENIAN SMALL LETTER VO */
+        case 0x057D: /* ARMENIAN SMALL LETTER SEH */
+            return previousCodePoint
+                && !isASCIIDigitOrValidHostCharacter(previousCodePoint.value())
+                && !isArmenianScriptCharacter(previousCodePoint.value());
+        case '.':
+            return NO;
         default:
-            return NO;
+            return previousCodePoint
+                && isArmenianLookalikeCharacter(previousCodePoint.value())
+                && !(isArmenianScriptCharacter(charCode) || isASCIIDigitOrValidHostCharacter(charCode));
     }
 }
 

Modified: branches/safari-604-branch/Tools/ChangeLog (221963 => 221964)


--- branches/safari-604-branch/Tools/ChangeLog	2017-09-13 08:02:58 UTC (rev 221963)
+++ branches/safari-604-branch/Tools/ChangeLog	2017-09-13 08:03:01 UTC (rev 221964)
@@ -1,5 +1,20 @@
 2017-09-12  Jason Marcell  <[email protected]>
 
+        Cherry-pick r221917. rdar://problem/34404461
+
+    2017-09-12 Brent Fulgham  <[email protected]>
+
+            Show punycode to user if a URL mixes Armenian Seh or Vo with other scripts
+            https://bugs.webkit.org/show_bug.cgi?id=176578
+            <rdar://problem/33906231>
+
+            Reviewed by Alex Christensen.
+
+            * TestWebKitAPI/Tests/WebCore/cocoa/URLExtras.mm:
+            (TestWebKitAPI::TEST):
+
+2017-09-12  Jason Marcell  <[email protected]>
+
         Cherry-pick r221906. rdar://problem/34404478
 
     2017-09-11  Tim Horton  <[email protected]>

Modified: branches/safari-604-branch/Tools/TestWebKitAPI/Tests/WebCore/cocoa/URLExtras.mm (221963 => 221964)


--- branches/safari-604-branch/Tools/TestWebKitAPI/Tests/WebCore/cocoa/URLExtras.mm	2017-09-13 08:02:58 UTC (rev 221963)
+++ branches/safari-604-branch/Tools/TestWebKitAPI/Tests/WebCore/cocoa/URLExtras.mm	2017-09-13 08:03:01 UTC (rev 221964)
@@ -90,6 +90,10 @@
         "xn--o8f", // U+1D21
         "xn--p8f", // U+1D22
         "xn--0na", // U+0261
+        "xn--cn-ded", // U+054D
+        "xn--ews-nfe.org", // U+054D
+        "xn--yotube-qkh", // U+0578
+        "xn--cla-7fe.edu", // U+0578
     };
     for (const String& host : punycodedSpoofHosts) {
         auto url = "" host, "/").utf8();
@@ -97,6 +101,20 @@
     }
 }
 
+TEST(WebCore, URLExtras_NotSpoofed)
+{
+    // Valid mixtures of Armenian and other scripts
+    EXPECT_STREQ("https://en.wikipedia.org/wiki/.\u0570\u0561\u0575", userVisibleString(literalURL("https://en.wikipedia.org/wiki/.\u0570\u0561\u0575")));
+    EXPECT_STREQ("https://\u0573\u0574\u0578.\u0570\u0561\u0575", userVisibleString(literalURL("https://\u0573\u0574\u0578.\u0570\u0561\u0575")));
+    EXPECT_STREQ("https://\u0573-1-\u0574\u0578.\u0570\u0561\u0575", userVisibleString(literalURL("https://\u0573-1-\u0574\u0578.\u0570\u0561\u0575")));
+    EXPECT_STREQ("https://2\u0573_\u0574\u0578.\u0570\u0561\u0575", userVisibleString(literalURL("https://2\u0573_\u0574\u0578.\u0570\u0561\u0575")));
+    EXPECT_STREQ("https://\u0573_\u0574\u05783.\u0570\u0561\u0575", userVisibleString(literalURL("https://\u0573_\u0574\u05783.\u0570\u0561\u0575")));
+    EXPECT_STREQ("https://got\u0551\u0535\u0543.com", userVisibleString(literalURL("https://got\u0551\u0535\u0543.com")));
+    EXPECT_STREQ("https://\u0551\u0535\u0543fans.net", userVisibleString(literalURL("https://\u0551\u0535\u0543fans.net")));
+    EXPECT_STREQ("https://\u0551\u0535or\u0575\u0543.biz", userVisibleString(literalURL("https://\u0551\u0535or\u0575\u0543.biz")));
+    EXPECT_STREQ("https://\u0551\u0535and!$^&*()-~+={}or<>,.?\u0575\u0543.biz", userVisibleString(literalURL("https://\u0551\u0535and!$^&*()-~+={}or<>,.?\u0575\u0543.biz")));
+}
+
 TEST(WebCore, URLExtras_DivisionSign)
 {
     // Selected the division sign as an example of a non-ASCII character that is allowed in host names, since it's a lookalike character.
_______________________________________________
webkit-changes mailing list
[email protected]
https://lists.webkit.org/mailman/listinfo/webkit-changes

Reply via email to