Diff
Modified: branches/safari-604-branch/Source/WTF/ChangeLog (221963 => 221964)
--- branches/safari-604-branch/Source/WTF/ChangeLog 2017-09-13 08:02:58 UTC (rev 221963)
+++ branches/safari-604-branch/Source/WTF/ChangeLog 2017-09-13 08:03:01 UTC (rev 221964)
@@ -1,3 +1,19 @@
+2017-09-12 Jason Marcell <[email protected]>
+
+ Cherry-pick r221917. rdar://problem/34404461
+
+ 2017-09-12 Brent Fulgham <[email protected]>
+
+ Show punycode to user if a URL mixes Armenian Seh or Vo with other scripts
+ https://bugs.webkit.org/show_bug.cgi?id=176578
+ <rdar://problem/33906231>
+
+ Reviewed by Alex Christensen.
+
+ * wtf/ASCIICType.h:
+ (WTF::isASCIIDigitOrPunctuation): Added helper function to recognize ASCII digits
+ and punctuation characters.
+
2017-08-02 Matthew Hanson <[email protected]>
Cherry-pick r219602. rdar://problem/33537767
Modified: branches/safari-604-branch/Source/WTF/wtf/ASCIICType.h (221963 => 221964)
--- branches/safari-604-branch/Source/WTF/wtf/ASCIICType.h 2017-09-13 08:02:58 UTC (rev 221963)
+++ branches/safari-604-branch/Source/WTF/wtf/ASCIICType.h 2017-09-13 08:03:01 UTC (rev 221964)
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2007-2016 Apple Inc. All rights reserved.
+ * Copyright (C) 2007-2017 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -247,8 +247,13 @@
return LIKELY(toASCIILowerUnchecked(inputCharacter) == expectedASCIILowercaseLetter);
}
+template<typename CharacterType> inline bool isASCIIDigitOrPunctuation(CharacterType charCode)
+{
+ return (charCode >= '!' && charCode <= '@') || (charCode >= '[' && charCode <= '`') || (charCode >= '{' && charCode <= '~');
}
+}
+
using WTF::isASCII;
using WTF::isASCIIAlpha;
using WTF::isASCIIAlphaCaselessEqual;
@@ -255,6 +260,7 @@
using WTF::isASCIIAlphanumeric;
using WTF::isASCIIBinaryDigit;
using WTF::isASCIIDigit;
+using WTF::isASCIIDigitOrPunctuation;
using WTF::isASCIIHexDigit;
using WTF::isASCIILower;
using WTF::isASCIIOctalDigit;
Modified: branches/safari-604-branch/Source/WebCore/ChangeLog (221963 => 221964)
--- branches/safari-604-branch/Source/WebCore/ChangeLog 2017-09-13 08:02:58 UTC (rev 221963)
+++ branches/safari-604-branch/Source/WebCore/ChangeLog 2017-09-13 08:03:01 UTC (rev 221964)
@@ -1,3 +1,26 @@
+2017-09-12 Jason Marcell <[email protected]>
+
+ Cherry-pick r221917. rdar://problem/34404461
+
+ 2017-09-12 Brent Fulgham <[email protected]>
+
+ Show punycode to user if a URL mixes Armenian Seh or Vo with other scripts
+ https://bugs.webkit.org/show_bug.cgi?id=176578
+ <rdar://problem/33906231>
+
+ Reviewed by Alex Christensen.
+
+ Revise our "lookalike character" logic to include the Armenian Vo and Seh
+ characters, which can be mistaken for 'n' and 'v' when displayed in
+ certain fonts.
+
+ Tested by new API tests.
+
+ * platform/mac/WebCoreNSURLExtras.mm:
+ (WebCore::isArmenianLookalikeCharacter): Added utility function.
+ (WebCore::isArmenianScriptCharacter): Ditto.
+ (WebCore::isLookalikeCharacter): Handle Armenian-lookalike cases.
+
2017-09-10 Jason Marcell <[email protected]>
Cherry-pick r221709. rdar://problem/34169683
Modified: branches/safari-604-branch/Source/WebCore/platform/mac/WebCoreNSURLExtras.mm (221963 => 221964)
--- branches/safari-604-branch/Source/WebCore/platform/mac/WebCoreNSURLExtras.mm 2017-09-13 08:02:58 UTC (rev 221963)
+++ branches/safari-604-branch/Source/WebCore/platform/mac/WebCoreNSURLExtras.mm 2017-09-13 08:03:01 UTC (rev 221964)
@@ -58,6 +58,48 @@
namespace WebCore {
+static bool isArmenianLookalikeCharacter(UChar32 codePoint)
+{
+ return codePoint == 0x0548 || codePoint == 0x054D || codePoint == 0x0578 || codePoint == 0x057D;
+}
+
+static bool isArmenianScriptCharacter(UChar32 codePoint)
+{
+ UErrorCode error = U_ZERO_ERROR;
+ UScriptCode script = uscript_getScript(codePoint, &error);
+ if (error != U_ZERO_ERROR) {
+ LOG_ERROR("got ICU error while trying to look at scripts: %d", error);
+ return false;
+ }
+
+ return script == USCRIPT_ARMENIAN;
+}
+
+
+template<typename CharacterType> inline bool isASCIIDigitOrValidHostCharacter(CharacterType charCode)
+{
+ if (!isASCIIDigitOrPunctuation(charCode))
+ return false;
+
+ // Things the URL Parser rejects:
+ switch (charCode) {
+ case '#':
+ case '%':
+ case '/':
+ case ':':
+ case '?':
+ case '@':
+ case '[':
+ case '\\':
+ case ']':
+ return false;
+ default:
+ return true;
+ }
+}
+
+
+
static BOOL isLookalikeCharacter(std::optional<UChar32> previousCodePoint, UChar32 charCode)
{
// This function treats the following as unsafe, lookalike characters:
@@ -186,8 +228,19 @@
case 0x0307: /* COMBINING DOT ABOVE */
return previousCodePoint == 0x0237 /* LATIN SMALL LETTER DOTLESS J */
|| previousCodePoint == 0x0131; /* LATIN SMALL LETTER DOTLESS I */
+ case 0x0548: /* ARMENIAN CAPITAL LETTER VO */
+ case 0x054D: /* ARMENIAN CAPITAL LETTER SEH */
+ case 0x0578: /* ARMENIAN SMALL LETTER VO */
+ case 0x057D: /* ARMENIAN SMALL LETTER SEH */
+ return previousCodePoint
+ && !isASCIIDigitOrValidHostCharacter(previousCodePoint.value())
+ && !isArmenianScriptCharacter(previousCodePoint.value());
+ case '.':
+ return NO;
default:
- return NO;
+ return previousCodePoint
+ && isArmenianLookalikeCharacter(previousCodePoint.value())
+ && !(isArmenianScriptCharacter(charCode) || isASCIIDigitOrValidHostCharacter(charCode));
}
}
Modified: branches/safari-604-branch/Tools/ChangeLog (221963 => 221964)
--- branches/safari-604-branch/Tools/ChangeLog 2017-09-13 08:02:58 UTC (rev 221963)
+++ branches/safari-604-branch/Tools/ChangeLog 2017-09-13 08:03:01 UTC (rev 221964)
@@ -1,5 +1,20 @@
2017-09-12 Jason Marcell <[email protected]>
+ Cherry-pick r221917. rdar://problem/34404461
+
+ 2017-09-12 Brent Fulgham <[email protected]>
+
+ Show punycode to user if a URL mixes Armenian Seh or Vo with other scripts
+ https://bugs.webkit.org/show_bug.cgi?id=176578
+ <rdar://problem/33906231>
+
+ Reviewed by Alex Christensen.
+
+ * TestWebKitAPI/Tests/WebCore/cocoa/URLExtras.mm:
+ (TestWebKitAPI::TEST):
+
+2017-09-12 Jason Marcell <[email protected]>
+
Cherry-pick r221906. rdar://problem/34404478
2017-09-11 Tim Horton <[email protected]>
Modified: branches/safari-604-branch/Tools/TestWebKitAPI/Tests/WebCore/cocoa/URLExtras.mm (221963 => 221964)
--- branches/safari-604-branch/Tools/TestWebKitAPI/Tests/WebCore/cocoa/URLExtras.mm 2017-09-13 08:02:58 UTC (rev 221963)
+++ branches/safari-604-branch/Tools/TestWebKitAPI/Tests/WebCore/cocoa/URLExtras.mm 2017-09-13 08:03:01 UTC (rev 221964)
@@ -90,6 +90,10 @@
"xn--o8f", // U+1D21
"xn--p8f", // U+1D22
"xn--0na", // U+0261
+ "xn--cn-ded", // U+054D
+ "xn--ews-nfe.org", // U+054D
+ "xn--yotube-qkh", // U+0578
+ "xn--cla-7fe.edu", // U+0578
};
for (const String& host : punycodedSpoofHosts) {
auto url = "" host, "/").utf8();
@@ -97,6 +101,20 @@
}
}
+TEST(WebCore, URLExtras_NotSpoofed)
+{
+ // Valid mixtures of Armenian and other scripts
+ EXPECT_STREQ("https://en.wikipedia.org/wiki/.\u0570\u0561\u0575", userVisibleString(literalURL("https://en.wikipedia.org/wiki/.\u0570\u0561\u0575")));
+ EXPECT_STREQ("https://\u0573\u0574\u0578.\u0570\u0561\u0575", userVisibleString(literalURL("https://\u0573\u0574\u0578.\u0570\u0561\u0575")));
+ EXPECT_STREQ("https://\u0573-1-\u0574\u0578.\u0570\u0561\u0575", userVisibleString(literalURL("https://\u0573-1-\u0574\u0578.\u0570\u0561\u0575")));
+ EXPECT_STREQ("https://2\u0573_\u0574\u0578.\u0570\u0561\u0575", userVisibleString(literalURL("https://2\u0573_\u0574\u0578.\u0570\u0561\u0575")));
+ EXPECT_STREQ("https://\u0573_\u0574\u05783.\u0570\u0561\u0575", userVisibleString(literalURL("https://\u0573_\u0574\u05783.\u0570\u0561\u0575")));
+ EXPECT_STREQ("https://got\u0551\u0535\u0543.com", userVisibleString(literalURL("https://got\u0551\u0535\u0543.com")));
+ EXPECT_STREQ("https://\u0551\u0535\u0543fans.net", userVisibleString(literalURL("https://\u0551\u0535\u0543fans.net")));
+ EXPECT_STREQ("https://\u0551\u0535or\u0575\u0543.biz", userVisibleString(literalURL("https://\u0551\u0535or\u0575\u0543.biz")));
+ EXPECT_STREQ("https://\u0551\u0535and!$^&*()-~+={}or<>,.?\u0575\u0543.biz", userVisibleString(literalURL("https://\u0551\u0535and!$^&*()-~+={}or<>,.?\u0575\u0543.biz")));
+}
+
TEST(WebCore, URLExtras_DivisionSign)
{
// Selected the division sign as an example of a non-ASCII character that is allowed in host names, since it's a lookalike character.