Title: [206162] trunk
Revision
206162
Author
achristen...@apple.com
Date
2016-09-20 11:46:26 -0700 (Tue, 20 Sep 2016)

Log Message

Align URLParser with web platform tests when parsing non-special relative URLs ending in AuthorityOrHost state
https://bugs.webkit.org/show_bug.cgi?id=162251

Reviewed by Tim Horton.

Source/WebCore:

Covered by new and updated API tests.

* platform/URLParser.cpp:
(WebCore::URLParser::parse):
Fix parsing of non-special URLs that end after scheme:// with no authority.
We used to assume that parsing non-special schemes would never end with just scheme:// but a string can indeed end right there.
When a non-special relative URL contains just scheme:// we need the resulting URL to be valid to conform with the web platform tests.
(WebCore::URLParser::parseHostAndPort):
Renamed to reflect what the function actually does.
(WebCore::URLParser::internalValuesConsistent):
Add utility function for testing.
(WebCore::URLParser::parseHost): Deleted.
* platform/URLParser.h:

Tools:

* TestWebKitAPI/Tests/WebCore/URLParser.cpp:
(TestWebKitAPI::checkURL):
(TestWebKitAPI::TEST_F):
(TestWebKitAPI::checkRelativeURL):
(TestWebKitAPI::checkURLDifferences):
(TestWebKitAPI::checkRelativeURLDifferences):

Modified Paths

Diff

Modified: trunk/Source/WebCore/ChangeLog (206161 => 206162)


--- trunk/Source/WebCore/ChangeLog	2016-09-20 18:41:43 UTC (rev 206161)
+++ trunk/Source/WebCore/ChangeLog	2016-09-20 18:46:26 UTC (rev 206162)
@@ -1,3 +1,24 @@
+2016-09-20  Alex Christensen  <achristen...@webkit.org>
+
+        Align URLParser with web platform tests when parsing non-special relative URLs ending in AuthorityOrHost state
+        https://bugs.webkit.org/show_bug.cgi?id=162251
+
+        Reviewed by Tim Horton.
+
+        Covered by new and updated API tests.
+
+        * platform/URLParser.cpp:
+        (WebCore::URLParser::parse):
+        Fix parsing of non-special URLs that end after scheme:// with no authority.
+        We used to assume that parsing non-special schemes would never end with just scheme:// but a string can indeed end right there.
+        When a non-special relative URL contains just scheme:// we need the resulting URL to be valid to conform with the web platform tests.
+        (WebCore::URLParser::parseHostAndPort):
+        Renamed to reflect what the function actually does.
+        (WebCore::URLParser::internalValuesConsistent):
+        Add utility function for testing.
+        (WebCore::URLParser::parseHost): Deleted.
+        * platform/URLParser.h:
+
 2016-09-20  Javier Fernandez  <jfernan...@igalia.com>
 
         [css-grid] The 'grid' shorthand has a new syntax.

Modified: trunk/Source/WebCore/platform/URLParser.cpp (206161 => 206162)


--- trunk/Source/WebCore/platform/URLParser.cpp	2016-09-20 18:41:43 UTC (rev 206161)
+++ trunk/Source/WebCore/platform/URLParser.cpp	2016-09-20 18:46:26 UTC (rev 206162)
@@ -999,20 +999,20 @@
                     else
                         state = State::SpecialAuthoritySlashes;
                 } else {
-                    m_url.m_userStart = m_asciiBuffer.size();
-                    m_url.m_userEnd = m_url.m_userStart;
-                    m_url.m_passwordEnd = m_url.m_userStart;
-                    m_url.m_hostEnd = m_url.m_userStart;
-                    m_url.m_portEnd = m_url.m_userStart;
                     auto maybeSlash = c;
                     incrementIteratorSkippingTabAndNewLine<serialized>(maybeSlash);
                     if (!maybeSlash.atEnd() && *maybeSlash == '/') {
                         m_asciiBuffer.append('/');
-                        m_url.m_pathAfterLastSlash = m_url.m_userStart + 1;
+                        m_url.m_userStart = m_asciiBuffer.size();
                         state = State::PathOrAuthority;
                         c = maybeSlash;
                         ASSERT(*c == '/');
                     } else {
+                        m_url.m_userStart = m_asciiBuffer.size();
+                        m_url.m_userEnd = m_url.m_userStart;
+                        m_url.m_passwordEnd = m_url.m_userStart;
+                        m_url.m_hostEnd = m_url.m_userStart;
+                        m_url.m_portEnd = m_url.m_userStart;
                         m_url.m_pathAfterLastSlash = m_url.m_userStart;
                         m_url.m_cannotBeABaseURL = true;
                         state = State::CannotBeABaseURLPath;
@@ -1076,8 +1076,16 @@
                 state = State::AuthorityOrHost;
                 ++c;
                 authorityOrHostBegin = c;
-            } else
+            } else {
+                ASSERT(m_asciiBuffer.last() == '/');
+                m_url.m_userStart = m_asciiBuffer.size() - 1;
+                m_url.m_userEnd = m_url.m_userStart;
+                m_url.m_passwordEnd = m_url.m_userStart;
+                m_url.m_hostEnd = m_url.m_userStart;
+                m_url.m_portEnd = m_url.m_userStart;
+                m_url.m_pathAfterLastSlash = m_url.m_userStart + 1;
                 state = State::Path;
+            }
             break;
         case State::Relative:
             LOG_STATE("Relative");
@@ -1162,7 +1170,7 @@
                 if (isSlash || *c == '?' || *c == '#') {
                     m_url.m_userEnd = m_asciiBuffer.size();
                     m_url.m_passwordEnd = m_url.m_userEnd;
-                    if (!parseHost<serialized>(CodePointIterator<CharacterType>(authorityOrHostBegin, c)))
+                    if (!parseHostAndPort<serialized>(CodePointIterator<CharacterType>(authorityOrHostBegin, c)))
                         return failure(input, length);
                     if (!isSlash) {
                         m_asciiBuffer.append('/');
@@ -1179,7 +1187,7 @@
         case State::Host:
             LOG_STATE("Host");
             if (*c == '/' || *c == '?' || *c == '#') {
-                if (!parseHost<serialized>(CodePointIterator<CharacterType>(authorityOrHostBegin, c)))
+                if (!parseHostAndPort<serialized>(CodePointIterator<CharacterType>(authorityOrHostBegin, c)))
                     return failure(input, length);
                 state = State::Path;
                 break;
@@ -1309,7 +1317,7 @@
                     state = State::Path;
                     break;
                 }
-                if (!parseHost<serialized>(CodePointIterator<CharacterType>(authorityOrHostBegin, c)))
+                if (!parseHostAndPort<serialized>(CodePointIterator<CharacterType>(authorityOrHostBegin, c)))
                     return failure(input, length);
                 
                 if (StringView(m_asciiBuffer.data() + m_url.m_passwordEnd, m_asciiBuffer.size() - m_url.m_passwordEnd) == "localhost")  {
@@ -1433,6 +1441,11 @@
         break;
     case State::PathOrAuthority:
         LOG_FINAL_STATE("PathOrAuthority");
+        m_url.m_userEnd = m_asciiBuffer.size();
+        m_url.m_passwordEnd = m_url.m_userEnd;
+        m_url.m_hostEnd = m_url.m_userEnd;
+        m_url.m_portEnd = m_url.m_userEnd;
+        m_url.m_pathAfterLastSlash = m_url.m_userEnd;
         m_url.m_pathEnd = m_url.m_pathAfterLastSlash;
         m_url.m_queryEnd = m_url.m_pathAfterLastSlash;
         m_url.m_fragmentEnd = m_url.m_pathAfterLastSlash;
@@ -1470,11 +1483,20 @@
         LOG_FINAL_STATE("AuthorityOrHost");
         m_url.m_userEnd = m_asciiBuffer.size();
         m_url.m_passwordEnd = m_url.m_userEnd;
-        FALLTHROUGH;
+        if (authorityOrHostBegin.atEnd()) {
+            m_url.m_hostEnd = m_url.m_userEnd;
+            m_url.m_portEnd = m_url.m_userEnd;
+        } else if (!parseHostAndPort<serialized>(authorityOrHostBegin))
+            return failure(input, length);
+        m_asciiBuffer.append('/');
+        m_url.m_pathEnd = m_url.m_portEnd + 1;
+        m_url.m_pathAfterLastSlash = m_url.m_pathEnd;
+        m_url.m_queryEnd = m_url.m_pathEnd;
+        m_url.m_fragmentEnd = m_url.m_pathEnd;
+        break;
     case State::Host:
-        if (state == State::Host)
-            LOG_FINAL_STATE("Host");
-        if (!parseHost<serialized>(authorityOrHostBegin))
+        LOG_FINAL_STATE("Host");
+        if (!parseHostAndPort<serialized>(authorityOrHostBegin))
             return failure(input, length);
         m_asciiBuffer.append('/');
         m_url.m_pathEnd = m_url.m_portEnd + 1;
@@ -1528,7 +1550,7 @@
             break;
         }
 
-        if (!parseHost<serialized>(CodePointIterator<CharacterType>(authorityOrHostBegin, c)))
+        if (!parseHostAndPort<serialized>(CodePointIterator<CharacterType>(authorityOrHostBegin, c)))
             return failure(input, length);
 
         if (StringView(m_asciiBuffer.data() + m_url.m_passwordEnd, m_asciiBuffer.size() - m_url.m_passwordEnd) == "localhost")  {
@@ -1583,6 +1605,7 @@
     }
     m_url.m_isValid = true;
     LOG(URLParser, "Parsed URL <%s>", m_url.m_string.utf8().data());
+    ASSERT(internalValuesConsistent(m_url));
     return m_url;
 }
 
@@ -2006,7 +2029,7 @@
 }
 
 template<bool serialized, typename CharacterType>
-bool URLParser::parseHost(CodePointIterator<CharacterType> iterator)
+bool URLParser::parseHostAndPort(CodePointIterator<CharacterType> iterator)
 {
     if (iterator.atEnd())
         return false;
@@ -2224,6 +2247,22 @@
         && a.m_fragmentEnd == b.m_fragmentEnd;
 }
 
+bool URLParser::internalValuesConsistent(const URL& url)
+{    
+    return url.m_schemeEnd <= url.m_userStart
+        && url.m_userStart <= url.m_userEnd
+        && url.m_userEnd <= url.m_passwordEnd
+        && url.m_passwordEnd <= url.m_hostEnd
+        && url.m_hostEnd <= url.m_hostEnd
+        && url.m_portEnd <= url.m_pathAfterLastSlash
+        && url.m_pathAfterLastSlash <= url.m_pathEnd
+        && url.m_pathEnd <= url.m_queryEnd
+        && url.m_queryEnd <= url.m_fragmentEnd
+        && (url.m_isValid ? url.m_fragmentEnd == url.m_string.length() : !url.m_fragmentEnd);
+    // FIXME: Why do we even store m_fragmentEnd?
+    // It should be able to be deduced from m_isValid and m_string.length() to save memory.
+}
+
 static bool urlParserEnabled = false;
 
 void URLParser::setEnabled(bool enabled)

Modified: trunk/Source/WebCore/platform/URLParser.h (206161 => 206162)


--- trunk/Source/WebCore/platform/URLParser.h	2016-09-20 18:41:43 UTC (rev 206161)
+++ trunk/Source/WebCore/platform/URLParser.h	2016-09-20 18:46:26 UTC (rev 206162)
@@ -38,7 +38,9 @@
 public:
     WEBCORE_EXPORT URL parse(const String&, const URL& = { }, const TextEncoding& = UTF8Encoding());
     WEBCORE_EXPORT URL parseSerializedURL(const String&);
+
     WEBCORE_EXPORT static bool allValuesEqual(const URL&, const URL&);
+    WEBCORE_EXPORT static bool internalValuesConsistent(const URL&);
 
     WEBCORE_EXPORT static bool enabled();
     WEBCORE_EXPORT static void setEnabled(bool);
@@ -56,7 +58,7 @@
 
     template<bool serialized, typename CharacterType> URL parse(const CharacterType*, const unsigned length, const URL&, const TextEncoding&);
     template<bool serialized, typename CharacterType> void parseAuthority(CodePointIterator<CharacterType>);
-    template<bool serialized, typename CharacterType> bool parseHost(CodePointIterator<CharacterType>);
+    template<bool serialized, typename CharacterType> bool parseHostAndPort(CodePointIterator<CharacterType>);
     template<bool serialized, typename CharacterType> bool parsePort(CodePointIterator<CharacterType>&);
     template<typename CharacterType> URL failure(const CharacterType*, unsigned length);
 

Modified: trunk/Tools/ChangeLog (206161 => 206162)


--- trunk/Tools/ChangeLog	2016-09-20 18:41:43 UTC (rev 206161)
+++ trunk/Tools/ChangeLog	2016-09-20 18:46:26 UTC (rev 206162)
@@ -1,5 +1,19 @@
 2016-09-20  Alex Christensen  <achristen...@webkit.org>
 
+        Align URLParser with web platform tests when parsing non-special relative URLs ending in AuthorityOrHost state
+        https://bugs.webkit.org/show_bug.cgi?id=162251
+
+        Reviewed by Tim Horton.
+
+        * TestWebKitAPI/Tests/WebCore/URLParser.cpp:
+        (TestWebKitAPI::checkURL):
+        (TestWebKitAPI::TEST_F):
+        (TestWebKitAPI::checkRelativeURL):
+        (TestWebKitAPI::checkURLDifferences):
+        (TestWebKitAPI::checkRelativeURLDifferences):
+
+2016-09-20  Alex Christensen  <achristen...@webkit.org>
+
         URLParser should allow '@' in user
         https://bugs.webkit.org/show_bug.cgi?id=162272
 

Modified: trunk/Tools/TestWebKitAPI/Tests/WebCore/URLParser.cpp (206161 => 206162)


--- trunk/Tools/TestWebKitAPI/Tests/WebCore/URLParser.cpp	2016-09-20 18:41:43 UTC (rev 206161)
+++ trunk/Tools/TestWebKitAPI/Tests/WebCore/URLParser.cpp	2016-09-20 18:46:26 UTC (rev 206162)
@@ -82,6 +82,8 @@
     EXPECT_TRUE(eq(parts.string, oldURL.string()));
     
     EXPECT_TRUE(URLParser::allValuesEqual(url, oldURL));
+    EXPECT_TRUE(URLParser::internalValuesConsistent(url));
+    EXPECT_TRUE(URLParser::internalValuesConsistent(oldURL));
 }
 
 template<size_t length>
@@ -201,9 +203,9 @@
     checkURL("sc:/pa/pa", {"sc", "", "", "", 0, "/pa/pa", "", "", "sc:/pa/pa"});
     checkURL("sc:/pa", {"sc", "", "", "", 0, "/pa", "", "", "sc:/pa"});
     checkURL("sc:/pa/", {"sc", "", "", "", 0, "/pa/", "", "", "sc:/pa/"});
+    checkURL("notspecial:/notuser:notpassword@nothost", {"notspecial", "", "", "", 0, "/notuser:notpassword@nothost", "", "", "notspecial:/notuser:notpassword@nothost"});
     checkURL("sc://pa/", {"sc", "", "", "pa", 0, "/", "", "", "sc://pa/"});
     checkURL("http://host   \a   ", {"http", "", "", "host", 0, "/", "", "", "http://host/"});
-    checkURL("notspecial:/", {"notspecial", "", "", "", 0, "/", "", "", "notspecial:/"});
     checkURL("notspecial:/a", {"notspecial", "", "", "", 0, "/a", "", "", "notspecial:/a"});
     checkURL("notspecial:", {"notspecial", "", "", "", 0, "", "", "", "notspecial:"});
     checkURL("http:/a", {"http", "", "", "a", 0, "/", "", "", "http://a/"});
@@ -210,6 +212,8 @@
     checkURL("http://256/", {"http", "", "", "256", 0, "/", "", "", "http://256/"});
     checkURL("http://256./", {"http", "", "", "256.", 0, "/", "", "", "http://256./"});
     checkURL("http://123.256/", {"http", "", "", "123.256", 0, "/", "", "", "http://123.256/"});
+    checkURL("notspecial:/a", {"notspecial", "", "", "", 0, "/a", "", "", "notspecial:/a"});
+    checkURL("notspecial:", {"notspecial", "", "", "", 0, "", "", "", "notspecial:"});
     // FIXME: Fix and add a test with an invalid surrogate pair at the end with a space as the second code unit.
 
     // This disagrees with the web platform test for http://:@www.example.com but agrees with Chrome and URL::parse,
@@ -246,6 +250,8 @@
     EXPECT_TRUE(eq(parts.string, oldURL.string()));
 
     EXPECT_TRUE(URLParser::allValuesEqual(url, oldURL));
+    EXPECT_TRUE(URLParser::internalValuesConsistent(url));
+    EXPECT_TRUE(URLParser::internalValuesConsistent(oldURL));
 }
 
 TEST_F(URLParserTest, ParseRelative)
@@ -287,9 +293,7 @@
     checkRelativeURL("  foo.com  ", "http://example.org/foo/bar", {"http", "", "", "example.org", 0, "/foo/foo.com", "", "", "http://example.org/foo/foo.com"});
     checkRelativeURL(" \a baz", "http://example.org/foo/bar", {"http", "", "", "example.org", 0, "/foo/baz", "", "", "http://example.org/foo/baz"});
     checkRelativeURL("~", "http://example.org", {"http", "", "", "example.org", 0, "/~", "", "", "http://example.org/~"});
-    checkRelativeURL("notspecial:/", "about:blank", {"notspecial", "", "", "", 0, "/", "", "", "notspecial:/"});
     checkRelativeURL("notspecial:", "about:blank", {"notspecial", "", "", "", 0, "", "", "", "notspecial:"});
-    checkRelativeURL("notspecial:/", "http://host", {"notspecial", "", "", "", 0, "/", "", "", "notspecial:/"});
     checkRelativeURL("notspecial:", "http://host", {"notspecial", "", "", "", 0, "", "", "", "notspecial:"});
     checkRelativeURL("http:", "http://host", {"http", "", "", "host", 0, "/", "", "", "http://host/"});
     
@@ -324,6 +328,8 @@
     EXPECT_TRUE(eq(partsOld.string, oldURL.string()));
     
     EXPECT_FALSE(URLParser::allValuesEqual(url, oldURL));
+    EXPECT_TRUE(URLParser::internalValuesConsistent(url));
+    EXPECT_TRUE(URLParser::internalValuesConsistent(oldURL));
 }
 
 static void checkRelativeURLDifferences(const String& urlString, const String& baseURLString, const ExpectedParts& partsNew, const ExpectedParts& partsOld)
@@ -355,6 +361,8 @@
     EXPECT_TRUE(eq(partsOld.string, oldURL.string()));
     
     EXPECT_FALSE(URLParser::allValuesEqual(url, oldURL));
+    EXPECT_TRUE(URLParser::internalValuesConsistent(url));
+    EXPECT_TRUE(URLParser::internalValuesConsistent(oldURL));
 }
 
 // These are differences between the new URLParser and the old URL::parse which make URLParser more standards compliant.
@@ -547,6 +555,20 @@
     checkRelativeURLDifferences("https://@test@test@example:800/", "http://doesnotmatter/",
         {"https", "@test@test", "", "example", 800, "/", "", "", "https://%40test%40test@example:800/"},
         {"", "", "", "", 0, "", "", "", "https://@test@test@example:800/"});
+    checkRelativeURLDifferences("foo://", "http://example.org/foo/bar",
+        {"foo", "", "", "", 0, "/", "", "", "foo:///"},
+        {"foo", "", "", "", 0, "//", "", "", "foo://"});
+
+    // This matches the spec and web platform tests, but not Chrome, Firefox, or URL::parse.
+    checkRelativeURLDifferences("notspecial:/", "about:blank",
+        {"notspecial", "", "", "", 0, "", "", "", "notspecial:/"},
+        {"notspecial", "", "", "", 0, "/", "", "", "notspecial:/"});
+    checkRelativeURLDifferences("notspecial:/", "http://host",
+        {"notspecial", "", "", "", 0, "", "", "", "notspecial:/"},
+        {"notspecial", "", "", "", 0, "/", "", "", "notspecial:/"});
+    checkURLDifferences("notspecial:/",
+        {"notspecial", "", "", "", 0, "", "", "", "notspecial:/"},
+        {"notspecial", "", "", "", 0, "/", "", "", "notspecial:/"});
 }
 
 TEST_F(URLParserTest, DefaultPort)
_______________________________________________
webkit-changes mailing list
webkit-changes@lists.webkit.org
https://lists.webkit.org/mailman/listinfo/webkit-changes

Reply via email to