Title: [206887] trunk
Revision
206887
Author
[email protected]
Date
2016-10-06 16:05:17 -0700 (Thu, 06 Oct 2016)

Log Message

URLParser: Non-ASCII characters in Non-UTF-8 encoded queries of relative URLs with ws, wss, or nonspecial schemes should be UTF-8 encoded
https://bugs.webkit.org/show_bug.cgi?id=163089

Reviewed by Tim Horton.

Source/WebCore:

This is a change similar to r206818 but with relative URLs.
This matches the spec, URL::parse, and other browsers' behavior.
Covered by new API tests for URLParser.
This also fixes tests like http/tests/misc/url-in-utf32le.html when URLParser is enabled.

* platform/URL.cpp:
(WebCore::URL::URL):
Use the same encoding for the URL constructor whether or not the URLParser is enabled.
* platform/URLParser.cpp:
(WebCore::URLParser::copyURLPartsUntil):
(WebCore::URLParser::parse):
(WebCore::isSpecial): Deleted.
* platform/URLParser.h:
Use UTF-8 for non-special, ws, or wss schemes.

Tools:

* TestWebKitAPI/Tests/WebCore/URLParser.cpp:
(TestWebKitAPI::checkURL):
(TestWebKitAPI::TEST_F):

Modified Paths

Diff

Modified: trunk/Source/WebCore/ChangeLog (206886 => 206887)


--- trunk/Source/WebCore/ChangeLog	2016-10-06 22:47:47 UTC (rev 206886)
+++ trunk/Source/WebCore/ChangeLog	2016-10-06 23:05:17 UTC (rev 206887)
@@ -1,3 +1,25 @@
+2016-10-06  Alex Christensen  <[email protected]>
+
+        URLParser: Non-ASCII characters in Non-UTF-8 encoded queries of relative URLs with ws, wss, or nonspecial schemes should be UTF-8 encoded
+        https://bugs.webkit.org/show_bug.cgi?id=163089
+
+        Reviewed by Tim Horton.
+
+        This is a change similar to r206818 but with relative URLs.
+        This matches the spec, URL::parse, and other browsers' behavior.
+        Covered by new API tests for URLParser.
+        This also fixes tests like http/tests/misc/url-in-utf32le.html when URLParser is enabled.
+
+        * platform/URL.cpp:
+        (WebCore::URL::URL):
+        Use the same encoding for the URL constructor whether or not the URLParser is enabled.
+        * platform/URLParser.cpp:
+        (WebCore::URLParser::copyURLPartsUntil):
+        (WebCore::URLParser::parse):
+        (WebCore::isSpecial): Deleted.
+        * platform/URLParser.h:
+        Use UTF-8 for non-special, ws, or wss schemes.
+
 2016-10-06  Zalan Bujtas  <[email protected]>
 
         Add back ASSERT(!needsLayout) to RenderTableSection which is now valid

Modified: trunk/Source/WebCore/platform/URL.cpp (206886 => 206887)


--- trunk/Source/WebCore/platform/URL.cpp	2016-10-06 22:47:47 UTC (rev 206886)
+++ trunk/Source/WebCore/platform/URL.cpp	2016-10-06 23:05:17 UTC (rev 206887)
@@ -464,14 +464,14 @@
 
 URL::URL(const URL& base, const String& relative, const TextEncoding& encoding)
 {
+    // For UTF-{7,16,32}, we want to use UTF-8 for the query part as
+    // we do when submitting a form. A form with GET method
+    // has its contents added to a URL as query params and it makes sense
+    // to be consistent.
     if (URLParser::enabled()) {
-        URLParser parser(relative, base, encoding);
+        URLParser parser(relative, base, encoding.encodingForFormSubmission());
         *this = parser.result();
     } else {
-        // For UTF-{7,16,32}, we want to use UTF-8 for the query part as
-        // we do when submitting a form. A form with GET method
-        // has its contents added to a URL as query params and it makes sense
-        // to be consistent.
         init(base, relative, encoding.encodingForFormSubmission());
     }
 }

Modified: trunk/Source/WebCore/platform/URLParser.cpp (206886 => 206887)


--- trunk/Source/WebCore/platform/URLParser.cpp	2016-10-06 22:47:47 UTC (rev 206886)
+++ trunk/Source/WebCore/platform/URLParser.cpp	2016-10-06 23:05:17 UTC (rev 206887)
@@ -688,24 +688,6 @@
     NonSpecial
 };
 
-ALWAYS_INLINE bool isSpecial(Scheme scheme)
-{
-    switch (scheme) {
-    case Scheme::WS:
-    case Scheme::WSS:
-    case Scheme::File:
-    case Scheme::FTP:
-    case Scheme::Gopher:
-    case Scheme::HTTP:
-    case Scheme::HTTPS:
-        return true;
-    case Scheme::NonSpecial:
-        return false;
-    }
-    ASSERT_NOT_REACHED();
-    return false;
-}
-
 ALWAYS_INLINE static Scheme scheme(StringView scheme)
 {
     auto length = scheme.length();
@@ -834,7 +816,7 @@
 }
 
 template<typename CharacterType>
-void URLParser::copyURLPartsUntil(const URL& base, URLPart part, const CodePointIterator<CharacterType>& iterator)
+void URLParser::copyURLPartsUntil(const URL& base, URLPart part, const CodePointIterator<CharacterType>& iterator, bool& isUTF8Encoding)
 {
     syntaxViolation(iterator);
 
@@ -873,7 +855,24 @@
         m_url.m_protocolIsInHTTPFamily = base.m_protocolIsInHTTPFamily;
         m_url.m_schemeEnd = base.m_schemeEnd;
     }
-    m_urlIsSpecial = isSpecial(scheme(StringView(m_asciiBuffer.data(), m_url.m_schemeEnd)));
+    switch (scheme(StringView(m_asciiBuffer.data(), m_url.m_schemeEnd))) {
+    case Scheme::WS:
+    case Scheme::WSS:
+        isUTF8Encoding = true;
+        FALLTHROUGH;
+    case Scheme::File:
+    case Scheme::FTP:
+    case Scheme::Gopher:
+    case Scheme::HTTP:
+    case Scheme::HTTPS:
+        m_urlIsSpecial = true;
+        return;
+    case Scheme::NonSpecial:
+        m_urlIsSpecial = false;
+        isUTF8Encoding = true;
+        return;
+    }
+    ASSERT_NOT_REACHED();
 }
 
 static const char* dotASCIICode = "2e";
@@ -1308,7 +1307,7 @@
                 return;
             }
             if (base.m_cannotBeABaseURL && *c == '#') {
-                copyURLPartsUntil(base, URLPart::QueryEnd, c);
+                copyURLPartsUntil(base, URLPart::QueryEnd, c, isUTF8Encoding);
                 state = State::Fragment;
                 appendToASCIIBuffer('#');
                 ++c;
@@ -1318,7 +1317,7 @@
                 state = State::Relative;
                 break;
             }
-            copyURLPartsUntil(base, URLPart::SchemeEnd, c);
+            copyURLPartsUntil(base, URLPart::SchemeEnd, c, isUTF8Encoding);
             appendToASCIIBuffer(':');
             state = State::File;
             break;
@@ -1368,7 +1367,7 @@
                 ++c;
                 break;
             case '?':
-                copyURLPartsUntil(base, URLPart::PathEnd, c);
+                copyURLPartsUntil(base, URLPart::PathEnd, c, isUTF8Encoding);
                 appendToASCIIBuffer('?');
                 ++c;
                 if (isUTF8Encoding)
@@ -1379,13 +1378,13 @@
                 }
                 break;
             case '#':
-                copyURLPartsUntil(base, URLPart::QueryEnd, c);
+                copyURLPartsUntil(base, URLPart::QueryEnd, c, isUTF8Encoding);
                 appendToASCIIBuffer('#');
                 state = State::Fragment;
                 ++c;
                 break;
             default:
-                copyURLPartsUntil(base, URLPart::PathAfterLastSlash, c);
+                copyURLPartsUntil(base, URLPart::PathAfterLastSlash, c, isUTF8Encoding);
                 state = State::Path;
                 break;
             }
@@ -1394,11 +1393,11 @@
             LOG_STATE("RelativeSlash");
             if (*c == '/' || *c == '\\') {
                 ++c;
-                copyURLPartsUntil(base, URLPart::SchemeEnd, c);
+                copyURLPartsUntil(base, URLPart::SchemeEnd, c, isUTF8Encoding);
                 appendToASCIIBuffer("://", 3);
                 state = State::SpecialAuthorityIgnoreSlashes;
             } else {
-                copyURLPartsUntil(base, URLPart::PortEnd, c);
+                copyURLPartsUntil(base, URLPart::PortEnd, c, isUTF8Encoding);
                 appendToASCIIBuffer('/');
                 m_url.m_pathAfterLastSlash = base.m_portEnd + 1;
                 state = State::Path;
@@ -1531,7 +1530,7 @@
             case '?':
                 syntaxViolation(c);
                 if (base.isValid() && base.protocolIs("file")) {
-                    copyURLPartsUntil(base, URLPart::PathEnd, c);
+                    copyURLPartsUntil(base, URLPart::PathEnd, c, isUTF8Encoding);
                     appendToASCIIBuffer('?');
                     ++c;
                 } else {
@@ -1555,7 +1554,7 @@
             case '#':
                 syntaxViolation(c);
                 if (base.isValid() && base.protocolIs("file")) {
-                    copyURLPartsUntil(base, URLPart::QueryEnd, c);
+                    copyURLPartsUntil(base, URLPart::QueryEnd, c, isUTF8Encoding);
                     appendToASCIIBuffer('#');
                 } else {
                     appendToASCIIBuffer("///#", 4);
@@ -1574,7 +1573,7 @@
             default:
                 syntaxViolation(c);
                 if (base.isValid() && base.protocolIs("file") && shouldCopyFileURL(c))
-                    copyURLPartsUntil(base, URLPart::PathAfterLastSlash, c);
+                    copyURLPartsUntil(base, URLPart::PathAfterLastSlash, c, isUTF8Encoding);
                 else {
                     appendToASCIIBuffer("///", 3);
                     m_url.m_userStart = currentPosition(c) - 1;
@@ -1847,7 +1846,7 @@
         RELEASE_ASSERT_NOT_REACHED();
     case State::SpecialRelativeOrAuthority:
         LOG_FINAL_STATE("SpecialRelativeOrAuthority");
-        copyURLPartsUntil(base, URLPart::QueryEnd, c);
+        copyURLPartsUntil(base, URLPart::QueryEnd, c, isUTF8Encoding);
         m_url.m_fragmentEnd = m_url.m_queryEnd;
         break;
     case State::PathOrAuthority:
@@ -1870,7 +1869,7 @@
         RELEASE_ASSERT_NOT_REACHED();
     case State::RelativeSlash:
         LOG_FINAL_STATE("RelativeSlash");
-        copyURLPartsUntil(base, URLPart::PortEnd, c);
+        copyURLPartsUntil(base, URLPart::PortEnd, c, isUTF8Encoding);
         appendToASCIIBuffer('/');
         m_url.m_pathAfterLastSlash = base.m_portEnd + 1;
         m_url.m_pathEnd = m_url.m_pathAfterLastSlash;
@@ -1935,7 +1934,7 @@
     case State::File:
         LOG_FINAL_STATE("File");
         if (base.isValid() && base.protocolIs("file")) {
-            copyURLPartsUntil(base, URLPart::QueryEnd, c);
+            copyURLPartsUntil(base, URLPart::QueryEnd, c, isUTF8Encoding);
             appendToASCIIBuffer(':');
         }
         syntaxViolation(c);

Modified: trunk/Source/WebCore/platform/URLParser.h (206886 => 206887)


--- trunk/Source/WebCore/platform/URLParser.h	2016-10-06 22:47:47 UTC (rev 206886)
+++ trunk/Source/WebCore/platform/URLParser.h	2016-10-06 23:05:17 UTC (rev 206887)
@@ -109,7 +109,7 @@
     void serializeIPv6(IPv6Address);
 
     enum class URLPart;
-    template<typename CharacterType> void copyURLPartsUntil(const URL& base, URLPart, const CodePointIterator<CharacterType>&);
+    template<typename CharacterType> void copyURLPartsUntil(const URL& base, URLPart, const CodePointIterator<CharacterType>&, bool& isUTF8Encoding);
     static size_t urlLengthUntilPart(const URL&, URLPart);
     void popPath();
 };

Modified: trunk/Tools/ChangeLog (206886 => 206887)


--- trunk/Tools/ChangeLog	2016-10-06 22:47:47 UTC (rev 206886)
+++ trunk/Tools/ChangeLog	2016-10-06 23:05:17 UTC (rev 206887)
@@ -1,5 +1,16 @@
 2016-10-06  Alex Christensen  <[email protected]>
 
+        URLParser: Non-ASCII characters in Non-UTF-8 encoded queries of relative URLs with ws, wss, or nonspecial schemes should be UTF-8 encoded
+        https://bugs.webkit.org/show_bug.cgi?id=163089
+
+        Reviewed by Tim Horton.
+
+        * TestWebKitAPI/Tests/WebCore/URLParser.cpp:
+        (TestWebKitAPI::checkURL):
+        (TestWebKitAPI::TEST_F):
+
+2016-10-06  Alex Christensen  <[email protected]>
+
         Skip tabs and newlines between end of query and beginning of fragment in non-UTF-8-encoded URLs
         https://bugs.webkit.org/show_bug.cgi?id=163071
 

Modified: trunk/Tools/TestWebKitAPI/Tests/WebCore/URLParser.cpp (206886 => 206887)


--- trunk/Tools/TestWebKitAPI/Tests/WebCore/URLParser.cpp	2016-10-06 22:47:47 UTC (rev 206886)
+++ trunk/Tools/TestWebKitAPI/Tests/WebCore/URLParser.cpp	2016-10-06 23:05:17 UTC (rev 206887)
@@ -118,13 +118,14 @@
     EXPECT_TRUE(URLParser::internalValuesConsistent(url));
     EXPECT_TRUE(URLParser::internalValuesConsistent(oldURL));
 
-    if (testTabs == TestTabs::Yes) {
-        for (size_t i = 0; i < urlString.length(); ++i) {
-            String urlStringWithTab = insertTabAtLocation(urlString, i);
-            checkURL(urlStringWithTab,
-                parts.isInvalid() ? invalidParts(urlStringWithTab) : parts,
-                TestTabs::No);
-        }
+    if (testTabs == TestTabs::No)
+        return;
+
+    for (size_t i = 0; i < urlString.length(); ++i) {
+        String urlStringWithTab = insertTabAtLocation(urlString, i);
+        checkURL(urlStringWithTab,
+            parts.isInvalid() ? invalidParts(urlStringWithTab) : parts,
+            TestTabs::No);
     }
 }
 
@@ -367,14 +368,15 @@
     EXPECT_TRUE(URLParser::internalValuesConsistent(url));
     EXPECT_TRUE(URLParser::internalValuesConsistent(oldURL));
     
-    if (testTabs == TestTabs::Yes) {
-        for (size_t i = 0; i < urlString.length(); ++i) {
-            String urlStringWithTab = insertTabAtLocation(urlString, i);
-            checkRelativeURL(urlStringWithTab,
-                baseURLString,
-                parts.isInvalid() ? invalidParts(urlStringWithTab) : parts,
-                TestTabs::No);
-        }
+    if (testTabs == TestTabs::No)
+        return;
+
+    for (size_t i = 0; i < urlString.length(); ++i) {
+        String urlStringWithTab = insertTabAtLocation(urlString, i);
+        checkRelativeURL(urlStringWithTab,
+            baseURLString,
+            parts.isInvalid() ? invalidParts(urlStringWithTab) : parts,
+            TestTabs::No);
     }
 }
 
@@ -491,14 +493,15 @@
     EXPECT_TRUE(URLParser::internalValuesConsistent(url));
     EXPECT_TRUE(URLParser::internalValuesConsistent(oldURL));
     
-    if (testTabs == TestTabs::Yes) {
-        for (size_t i = 0; i < urlString.length(); ++i) {
-            String urlStringWithTab = insertTabAtLocation(urlString, i);
-            checkURLDifferences(urlStringWithTab,
-                partsNew.isInvalid() ? invalidParts(urlStringWithTab) : partsNew,
-                partsOld.isInvalid() ? invalidParts(urlStringWithTab) : partsOld,
-                TestTabs::No);
-        }
+    if (testTabs == TestTabs::No)
+        return;
+
+    for (size_t i = 0; i < urlString.length(); ++i) {
+        String urlStringWithTab = insertTabAtLocation(urlString, i);
+        checkURLDifferences(urlStringWithTab,
+            partsNew.isInvalid() ? invalidParts(urlStringWithTab) : partsNew,
+            partsOld.isInvalid() ? invalidParts(urlStringWithTab) : partsOld,
+            TestTabs::No);
     }
 }
 
@@ -535,14 +538,15 @@
     EXPECT_TRUE(URLParser::internalValuesConsistent(url));
     EXPECT_TRUE(URLParser::internalValuesConsistent(oldURL));
 
-    if (testTabs == TestTabs::Yes) {
-        for (size_t i = 0; i < urlString.length(); ++i) {
-            String urlStringWithTab = insertTabAtLocation(urlString, i);
-            checkRelativeURLDifferences(urlStringWithTab, baseURLString,
-                partsNew.isInvalid() ? invalidParts(urlStringWithTab) : partsNew,
-                partsOld.isInvalid() ? invalidParts(urlStringWithTab) : partsOld,
-                TestTabs::No);
-        }
+    if (testTabs == TestTabs::No)
+        return;
+
+    for (size_t i = 0; i < urlString.length(); ++i) {
+        String urlStringWithTab = insertTabAtLocation(urlString, i);
+        checkRelativeURLDifferences(urlStringWithTab, baseURLString,
+            partsNew.isInvalid() ? invalidParts(urlStringWithTab) : partsNew,
+            partsOld.isInvalid() ? invalidParts(urlStringWithTab) : partsOld,
+            TestTabs::No);
     }
 }
 
@@ -1173,16 +1177,43 @@
     EXPECT_TRUE(eq(parts.fragment, url.fragmentIdentifier()));
     EXPECT_TRUE(eq(parts.string, url.string()));
 
-    if (testTabs == TestTabs::Yes) {
-        for (size_t i = 0; i < urlString.length(); ++i) {
-            String urlStringWithTab = insertTabAtLocation(urlString, i);
-            checkURL(urlStringWithTab, encoding,
-                parts.isInvalid() ? invalidParts(urlStringWithTab) : parts,
-                TestTabs::No);
-        }
+    if (testTabs == TestTabs::No)
+        return;
+
+    for (size_t i = 0; i < urlString.length(); ++i) {
+        String urlStringWithTab = insertTabAtLocation(urlString, i);
+        checkURL(urlStringWithTab, encoding,
+            parts.isInvalid() ? invalidParts(urlStringWithTab) : parts,
+            TestTabs::No);
     }
 }
 
+static void checkURL(const String& urlString, const String& baseURLString, const TextEncoding& encoding, const ExpectedParts& parts, TestTabs testTabs = TestTabs::Yes)
+{
+    URLParser baseParser(baseURLString, { }, encoding);
+    URLParser parser(urlString, baseParser.result(), encoding);
+    auto url = ""
+    EXPECT_TRUE(eq(parts.protocol, url.protocol()));
+    EXPECT_TRUE(eq(parts.user, url.user()));
+    EXPECT_TRUE(eq(parts.password, url.pass()));
+    EXPECT_TRUE(eq(parts.host, url.host()));
+    EXPECT_EQ(parts.port, url.port());
+    EXPECT_TRUE(eq(parts.path, url.path()));
+    EXPECT_TRUE(eq(parts.query, url.query()));
+    EXPECT_TRUE(eq(parts.fragment, url.fragmentIdentifier()));
+    EXPECT_TRUE(eq(parts.string, url.string()));
+    
+    if (testTabs == TestTabs::No)
+        return;
+
+    for (size_t i = 0; i < urlString.length(); ++i) {
+        String urlStringWithTab = insertTabAtLocation(urlString, i);
+        checkURL(urlStringWithTab, baseURLString, encoding,
+            parts.isInvalid() ? invalidParts(urlStringWithTab) : parts,
+            TestTabs::No);
+    }
+}
+
 TEST_F(URLParserTest, QueryEncoding)
 {
     checkURL(utf16String(u"http://host?ß😍#ß😍"), UTF8Encoding(), {"http", "", "", "host", 0, "/", "%C3%9F%F0%9F%98%8D", utf16String(u"ß😍"), utf16String(u"http://host/?%C3%9F%F0%9F%98%8D#ß😍")}, testTabsValueForSurrogatePairs);
@@ -1207,7 +1238,14 @@
     checkURL(makeString("asdf://host/path?", withUmlauts), iso88591, {"asdf", "", "", "host", 0, "/path", "%C3%9C%D0%B0%D1%91", "", "asdf://host/path?%C3%9C%D0%B0%D1%91"});
     checkURL(makeString("https://host/path?", withUmlauts), iso88591, {"https", "", "", "host", 0, "/path", "%DC%26%231072%3B%26%231105%3B", "", "https://host/path?%DC%26%231072%3B%26%231105%3B"});
     checkURL(makeString("gopher://host/path?", withUmlauts), iso88591, {"gopher", "", "", "host", 0, "/path", "%DC%26%231072%3B%26%231105%3B", "", "gopher://host/path?%DC%26%231072%3B%26%231105%3B"});
-    
+    checkURL(makeString("/path?", withUmlauts, "#fragment"), "ws://example.com/", iso88591, {"ws", "", "", "example.com", 0, "/path", "%C3%9C%D0%B0%D1%91", "fragment", "ws://example.com/path?%C3%9C%D0%B0%D1%91#fragment"});
+    checkURL(makeString("/path?", withUmlauts, "#fragment"), "wss://example.com/", iso88591, {"wss", "", "", "example.com", 0, "/path", "%C3%9C%D0%B0%D1%91", "fragment", "wss://example.com/path?%C3%9C%D0%B0%D1%91#fragment"});
+    checkURL(makeString("/path?", withUmlauts, "#fragment"), "asdf://example.com/", iso88591, {"asdf", "", "", "example.com", 0, "/path", "%C3%9C%D0%B0%D1%91", "fragment", "asdf://example.com/path?%C3%9C%D0%B0%D1%91#fragment"});
+    checkURL(makeString("/path?", withUmlauts, "#fragment"), "https://example.com/", iso88591, {"https", "", "", "example.com", 0, "/path", "%DC%26%231072%3B%26%231105%3B", "fragment", "https://example.com/path?%DC%26%231072%3B%26%231105%3B#fragment"});
+    checkURL(makeString("/path?", withUmlauts, "#fragment"), "gopher://example.com/", iso88591, {"gopher", "", "", "example.com", 0, "/path", "%DC%26%231072%3B%26%231105%3B", "fragment", "gopher://example.com/path?%DC%26%231072%3B%26%231105%3B#fragment"});
+    checkURL(makeString("gopher://host/path?", withUmlauts, "#fragment"), "asdf://example.com/?doesntmatter", iso88591, {"gopher", "", "", "host", 0, "/path", "%DC%26%231072%3B%26%231105%3B", "fragment", "gopher://host/path?%DC%26%231072%3B%26%231105%3B#fragment"});
+    checkURL(makeString("asdf://host/path?", withUmlauts, "#fragment"), "http://example.com/?doesntmatter", iso88591, {"asdf", "", "", "host", 0, "/path", "%C3%9C%D0%B0%D1%91", "fragment", "asdf://host/path?%C3%9C%D0%B0%D1%91#fragment"});
+
     // FIXME: Add more tests with other encodings and things like non-ascii characters, emoji and unmatched surrogate pairs.
 }
 
_______________________________________________
webkit-changes mailing list
[email protected]
https://lists.webkit.org/mailman/listinfo/webkit-changes

Reply via email to