- Revision
- 221917
- Author
- [email protected]
- Date
- 2017-09-12 08:35:10 -0700 (Tue, 12 Sep 2017)
Log Message
Show punycode to user if a URL mixes Armenian Seh or Vo with other scripts
https://bugs.webkit.org/show_bug.cgi?id=176578
<rdar://problem/33906231>
Reviewed by Alex Christensen.
Source/WebCore:
Revise our "lookalike character" logic to include the Armenian Vo and Seh
characters, which can be mistaken for 'n' and 'v' when displayed in
certain fonts.
Tested by new API tests.
* platform/mac/WebCoreNSURLExtras.mm:
(WebCore::isArmenianLookalikeCharacter): Added utility function.
(WebCore::isArmenianScriptCharacter): Ditto.
(WebCore::isLookalikeCharacter): Handle Armenian-lookalike cases.
Source/WTF:
* wtf/ASCIICType.h:
(WTF::isASCIIDigitOrPunctuation): Added helper function to recognize ASCII digits
and punctuation characters.
Tools:
* TestWebKitAPI/Tests/WebCore/cocoa/URLExtras.mm:
(TestWebKitAPI::TEST):
Modified Paths
Diff
Modified: trunk/Source/WTF/ChangeLog (221916 => 221917)
--- trunk/Source/WTF/ChangeLog 2017-09-12 15:31:41 UTC (rev 221916)
+++ trunk/Source/WTF/ChangeLog 2017-09-12 15:35:10 UTC (rev 221917)
@@ -1,3 +1,15 @@
+2017-09-12 Brent Fulgham <[email protected]>
+
+ Show punycode to user if a URL mixes Armenian Seh or Vo with other scripts
+ https://bugs.webkit.org/show_bug.cgi?id=176578
+ <rdar://problem/33906231>
+
+ Reviewed by Alex Christensen.
+
+ * wtf/ASCIICType.h:
+ (WTF::isASCIIDigitOrPunctuation): Added helper function to recognize ASCII digits
+ and punctuation characters.
+
2017-09-12 Sam Weinig <[email protected]>
[Cleanup] Follow up cleanup for DOMFormData implementation
Modified: trunk/Source/WTF/wtf/ASCIICType.h (221916 => 221917)
--- trunk/Source/WTF/wtf/ASCIICType.h 2017-09-12 15:31:41 UTC (rev 221916)
+++ trunk/Source/WTF/wtf/ASCIICType.h 2017-09-12 15:35:10 UTC (rev 221917)
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2007-2016 Apple Inc. All rights reserved.
+ * Copyright (C) 2007-2017 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -247,8 +247,13 @@
return LIKELY(toASCIILowerUnchecked(inputCharacter) == expectedASCIILowercaseLetter);
}
+template<typename CharacterType> inline bool isASCIIDigitOrPunctuation(CharacterType charCode)
+{
+ return (charCode >= '!' && charCode <= '@') || (charCode >= '[' && charCode <= '`') || (charCode >= '{' && charCode <= '~');
}
+}
+
using WTF::isASCII;
using WTF::isASCIIAlpha;
using WTF::isASCIIAlphaCaselessEqual;
@@ -255,6 +260,7 @@
using WTF::isASCIIAlphanumeric;
using WTF::isASCIIBinaryDigit;
using WTF::isASCIIDigit;
+using WTF::isASCIIDigitOrPunctuation;
using WTF::isASCIIHexDigit;
using WTF::isASCIILower;
using WTF::isASCIIOctalDigit;
Modified: trunk/Source/WebCore/ChangeLog (221916 => 221917)
--- trunk/Source/WebCore/ChangeLog 2017-09-12 15:31:41 UTC (rev 221916)
+++ trunk/Source/WebCore/ChangeLog 2017-09-12 15:35:10 UTC (rev 221917)
@@ -1,3 +1,22 @@
+2017-09-12 Brent Fulgham <[email protected]>
+
+ Show punycode to user if a URL mixes Armenian Seh or Vo with other scripts
+ https://bugs.webkit.org/show_bug.cgi?id=176578
+ <rdar://problem/33906231>
+
+ Reviewed by Alex Christensen.
+
+ Revise our "lookalike character" logic to include the Armenian Vo and Seh
+ characters, which can be mistaken for 'n' and 'v' when displayed in
+ certain fonts.
+
+ Tested by new API tests.
+
+ * platform/mac/WebCoreNSURLExtras.mm:
+ (WebCore::isArmenianLookalikeCharacter): Added utility function.
+ (WebCore::isArmenianScriptCharacter): Ditto.
+ (WebCore::isLookalikeCharacter): Handle Armenian-lookalike cases.
+
2017-09-12 Antti Koivisto <[email protected]>
Remove RenderElement::isCSSAnimating boolean
Modified: trunk/Source/WebCore/platform/mac/WebCoreNSURLExtras.mm (221916 => 221917)
--- trunk/Source/WebCore/platform/mac/WebCoreNSURLExtras.mm 2017-09-12 15:31:41 UTC (rev 221916)
+++ trunk/Source/WebCore/platform/mac/WebCoreNSURLExtras.mm 2017-09-12 15:35:10 UTC (rev 221917)
@@ -58,6 +58,48 @@
namespace WebCore {
+static bool isArmenianLookalikeCharacter(UChar32 codePoint)
+{
+ return codePoint == 0x0548 || codePoint == 0x054D || codePoint == 0x0578 || codePoint == 0x057D;
+}
+
+static bool isArmenianScriptCharacter(UChar32 codePoint)
+{
+ UErrorCode error = U_ZERO_ERROR;
+ UScriptCode script = uscript_getScript(codePoint, &error);
+ if (error != U_ZERO_ERROR) {
+ LOG_ERROR("got ICU error while trying to look at scripts: %d", error);
+ return false;
+ }
+
+ return script == USCRIPT_ARMENIAN;
+}
+
+
+template<typename CharacterType> inline bool isASCIIDigitOrValidHostCharacter(CharacterType charCode)
+{
+ if (!isASCIIDigitOrPunctuation(charCode))
+ return false;
+
+ // Things the URL Parser rejects:
+ switch (charCode) {
+ case '#':
+ case '%':
+ case '/':
+ case ':':
+ case '?':
+ case '@':
+ case '[':
+ case '\\':
+ case ']':
+ return false;
+ default:
+ return true;
+ }
+}
+
+
+
static BOOL isLookalikeCharacter(std::optional<UChar32> previousCodePoint, UChar32 charCode)
{
// This function treats the following as unsafe, lookalike characters:
@@ -186,8 +228,19 @@
case 0x0307: /* COMBINING DOT ABOVE */
return previousCodePoint == 0x0237 /* LATIN SMALL LETTER DOTLESS J */
|| previousCodePoint == 0x0131; /* LATIN SMALL LETTER DOTLESS I */
+ case 0x0548: /* ARMENIAN CAPITAL LETTER VO */
+ case 0x054D: /* ARMENIAN CAPITAL LETTER SEH */
+ case 0x0578: /* ARMENIAN SMALL LETTER VO */
+ case 0x057D: /* ARMENIAN SMALL LETTER SEH */
+ return previousCodePoint
+ && !isASCIIDigitOrValidHostCharacter(previousCodePoint.value())
+ && !isArmenianScriptCharacter(previousCodePoint.value());
+ case '.':
+ return NO;
default:
- return NO;
+ return previousCodePoint
+ && isArmenianLookalikeCharacter(previousCodePoint.value())
+ && !(isArmenianScriptCharacter(charCode) || isASCIIDigitOrValidHostCharacter(charCode));
}
}
Modified: trunk/Tools/ChangeLog (221916 => 221917)
--- trunk/Tools/ChangeLog 2017-09-12 15:31:41 UTC (rev 221916)
+++ trunk/Tools/ChangeLog 2017-09-12 15:35:10 UTC (rev 221917)
@@ -1,3 +1,14 @@
+2017-09-12 Brent Fulgham <[email protected]>
+
+ Show punycode to user if a URL mixes Armenian Seh or Vo with other scripts
+ https://bugs.webkit.org/show_bug.cgi?id=176578
+ <rdar://problem/33906231>
+
+ Reviewed by Alex Christensen.
+
+ * TestWebKitAPI/Tests/WebCore/cocoa/URLExtras.mm:
+ (TestWebKitAPI::TEST):
+
2017-09-12 Carlos Garcia Campos <[email protected]>
[Freetype] Doesn't support coloured fonts
Modified: trunk/Tools/TestWebKitAPI/Tests/WebCore/cocoa/URLExtras.mm (221916 => 221917)
--- trunk/Tools/TestWebKitAPI/Tests/WebCore/cocoa/URLExtras.mm 2017-09-12 15:31:41 UTC (rev 221916)
+++ trunk/Tools/TestWebKitAPI/Tests/WebCore/cocoa/URLExtras.mm 2017-09-12 15:35:10 UTC (rev 221917)
@@ -90,6 +90,10 @@
"xn--o8f", // U+1D21
"xn--p8f", // U+1D22
"xn--0na", // U+0261
+ "xn--cn-ded", // U+054D
+ "xn--ews-nfe.org", // U+054D
+ "xn--yotube-qkh", // U+0578
+ "xn--cla-7fe.edu", // U+0578
};
for (const String& host : punycodedSpoofHosts) {
auto url = "" host, "/").utf8();
@@ -97,6 +101,20 @@
}
}
+TEST(WebCore, URLExtras_NotSpoofed)
+{
+ // Valid mixtures of Armenian and other scripts
+ EXPECT_STREQ("https://en.wikipedia.org/wiki/.\u0570\u0561\u0575", userVisibleString(literalURL("https://en.wikipedia.org/wiki/.\u0570\u0561\u0575")));
+ EXPECT_STREQ("https://\u0573\u0574\u0578.\u0570\u0561\u0575", userVisibleString(literalURL("https://\u0573\u0574\u0578.\u0570\u0561\u0575")));
+ EXPECT_STREQ("https://\u0573-1-\u0574\u0578.\u0570\u0561\u0575", userVisibleString(literalURL("https://\u0573-1-\u0574\u0578.\u0570\u0561\u0575")));
+ EXPECT_STREQ("https://2\u0573_\u0574\u0578.\u0570\u0561\u0575", userVisibleString(literalURL("https://2\u0573_\u0574\u0578.\u0570\u0561\u0575")));
+ EXPECT_STREQ("https://\u0573_\u0574\u05783.\u0570\u0561\u0575", userVisibleString(literalURL("https://\u0573_\u0574\u05783.\u0570\u0561\u0575")));
+ EXPECT_STREQ("https://got\u0551\u0535\u0543.com", userVisibleString(literalURL("https://got\u0551\u0535\u0543.com")));
+ EXPECT_STREQ("https://\u0551\u0535\u0543fans.net", userVisibleString(literalURL("https://\u0551\u0535\u0543fans.net")));
+ EXPECT_STREQ("https://\u0551\u0535or\u0575\u0543.biz", userVisibleString(literalURL("https://\u0551\u0535or\u0575\u0543.biz")));
+ EXPECT_STREQ("https://\u0551\u0535and!$^&*()-~+={}or<>,.?\u0575\u0543.biz", userVisibleString(literalURL("https://\u0551\u0535and!$^&*()-~+={}or<>,.?\u0575\u0543.biz")));
+}
+
TEST(WebCore, URLExtras_DivisionSign)
{
// Selected the division sign as an example of a non-ASCII character that is allowed in host names, since it's a lookalike character.