Title: [205922] trunk/Source/WebCore
Revision
205922
Author
[email protected]
Date
2016-09-14 12:34:53 -0700 (Wed, 14 Sep 2016)

Log Message

URLParser: Add fast path for hosts containing no non-ASCII or percent characters
https://bugs.webkit.org/show_bug.cgi?id=161970

Reviewed by Daniel Bates.

Covered by existing tests.

* platform/URLParser.cpp:
(WebCore::URLParser::parse):
(WebCore::URLParser::parseHost):
* platform/URLParser.h:
When parsing the host of a URL, if it contains non-ASCII characters or percent-encoded values,
we need to do additional encoding.  Many URLs, including all already-parsed URLs, do not have
such characters in their host, and therefore do not need the additional encoding.  Skipping
the additional encoding significantly speeds up my URL parsing benchmark.

Modified Paths

Diff

Modified: trunk/Source/WebCore/ChangeLog (205921 => 205922)


--- trunk/Source/WebCore/ChangeLog	2016-09-14 18:51:26 UTC (rev 205921)
+++ trunk/Source/WebCore/ChangeLog	2016-09-14 19:34:53 UTC (rev 205922)
@@ -1,3 +1,21 @@
+2016-09-14  Alex Christensen  <[email protected]>
+
+        URLParser: Add fast path for hosts containing no non-ASCII or percent characters
+        https://bugs.webkit.org/show_bug.cgi?id=161970
+
+        Reviewed by Daniel Bates.
+
+        Covered by existing tests.
+
+        * platform/URLParser.cpp:
+        (WebCore::URLParser::parse):
+        (WebCore::URLParser::parseHost):
+        * platform/URLParser.h:
+        When parsing the host of a URL, if it contains non-ASCII characters or percent-encoded values,
+        we need to do additional encoding.  Many URLs, including all already-parsed URLs, do not have
+        such characters in their host, and therefore do not need the additional encoding.  Skipping
+        the additional encoding significantly speeds up my URL parsing benchmark.
+
 2016-09-14  Zalan Bujtas  <[email protected]>
 
         ShowRenderTree should take position offset into account when printing inflow positioned renderers.

Modified: trunk/Source/WebCore/platform/URLParser.cpp (205921 => 205922)


--- trunk/Source/WebCore/platform/URLParser.cpp	2016-09-14 18:51:26 UTC (rev 205921)
+++ trunk/Source/WebCore/platform/URLParser.cpp	2016-09-14 19:34:53 UTC (rev 205922)
@@ -43,6 +43,7 @@
 template<typename CharacterType> static bool isInDefaultEncodeSet(CharacterType character) { return isInSimpleEncodeSet(character) || character == 0x0020 || character == '"' || character == '#' || character == '<' || character == '>' || character == '?' || character == '`' || character == '{' || character == '}'; }
 template<typename CharacterType> static bool isInUserInfoEncodeSet(CharacterType character) { return isInDefaultEncodeSet(character) || character == '/' || character == ':' || character == ';' || character == '=' || character == '@' || character == '[' || character == '\\' || character == ']' || character == '^' || character == '|'; }
 template<typename CharacterType> static bool isInvalidDomainCharacter(CharacterType character) { return character == 0x0000 || character == 0x0009 || character == 0x000A || character == 0x000D || character == 0x0020 || character == '#' || character == '%' || character == '/' || character == ':' || character == '?' || character == '@' || character == '[' || character == '\\' || character == ']'; }
+template<typename CharacterType> static bool isPercentOrNonASCII(CharacterType character) { return !isASCII(character) || character == '%'; }
     
 static bool isWindowsDriveLetter(StringView::CodePoints::Iterator iterator, const StringView::CodePoints::Iterator& end)
 {
@@ -733,6 +734,7 @@
                         ++c;
                     authorityOrHostBegin = c;
                     state = State::Host;
+                    m_hostHasPercentOrNonASCII = false;
                     break;
                 }
                 bool isSlash = *c == '/' || (m_urlIsSpecial && *c == '\\');
@@ -748,6 +750,8 @@
                     state = State::Path;
                     break;
                 }
+                if (isPercentOrNonASCII(*c))
+                    m_hostHasPercentOrNonASCII = true;
                 ++c;
             }
             break;
@@ -759,6 +763,8 @@
                 state = State::Path;
                 break;
             }
+            if (isPercentOrNonASCII(*c))
+                m_hostHasPercentOrNonASCII = true;
             ++c;
             break;
         case State::File:
@@ -889,6 +895,8 @@
                 state = State::PathStart;
                 break;
             }
+            if (isPercentOrNonASCII(*c))
+                m_hostHasPercentOrNonASCII = true;
             ++c;
             break;
         case State::PathStart:
@@ -1558,6 +1566,40 @@
             return true;
         }
     }
+    
+    if (!m_hostHasPercentOrNonASCII) {
+        auto hostIterator = iterator;
+        for (; iterator != end; ++iterator) {
+            if (isTabOrNewline(*iterator))
+                continue;
+            if (*iterator == ':')
+                break;
+        }
+        if (auto address = parseIPv4Host(hostIterator, iterator)) {
+            serializeIPv4(address.value(), m_buffer);
+            m_url.m_hostEnd = m_buffer.length();
+            if (iterator == end) {
+                m_url.m_portEnd = m_buffer.length();
+                return true;
+            }
+            ++iterator;
+            return parsePort(iterator, end);
+        }
+        for (; hostIterator != iterator; ++hostIterator) {
+            if (!isTabOrNewline(*hostIterator))
+                m_buffer.append(toASCIILower(*hostIterator));
+        }
+        m_url.m_hostEnd = m_buffer.length();
+        if (hostIterator != end) {
+            ASSERT(*hostIterator == ':');
+            ++hostIterator;
+            while (hostIterator != end && isTabOrNewline(*hostIterator))
+                ++hostIterator;
+            return parsePort(hostIterator, end);
+        }
+        m_url.m_portEnd = m_buffer.length();
+        return true;
+    }
 
     // FIXME: We probably don't need to make so many buffers and String copies.
     StringBuilder utf8Encoded;

Modified: trunk/Source/WebCore/platform/URLParser.h (205921 => 205922)


--- trunk/Source/WebCore/platform/URLParser.h	2016-09-14 18:51:26 UTC (rev 205921)
+++ trunk/Source/WebCore/platform/URLParser.h	2016-09-14 19:34:53 UTC (rev 205922)
@@ -48,6 +48,7 @@
     URL m_url;
     StringBuilder m_buffer;
     bool m_urlIsSpecial { false };
+    bool m_hostHasPercentOrNonASCII { false };
     void parseAuthority(StringView::CodePoints::Iterator&, const StringView::CodePoints::Iterator& end);
     bool parseHost(StringView::CodePoints::Iterator&, const StringView::CodePoints::Iterator& end);
     bool parsePort(StringView::CodePoints::Iterator&, const StringView::CodePoints::Iterator& end);
_______________________________________________
webkit-changes mailing list
[email protected]
https://lists.webkit.org/mailman/listinfo/webkit-changes

Reply via email to