Title: [205918] trunk/Source/WebCore
Revision
205918
Author
[email protected]
Date
2016-09-14 11:16:29 -0700 (Wed, 14 Sep 2016)

Log Message

URLParser: Add fast path for utf8 encoding queries
https://bugs.webkit.org/show_bug.cgi?id=161968

Reviewed by Daniel Bates.

No change in behavior.  Covered by existing tests.

* platform/URLParser.cpp:
(WebCore::utf8PercentEncodeQuery):
(WebCore::URLParser::parse):
If the text encoding is UTF-8 (which is quite common), then we can encode the query
as we iterate its code points. This reduces memory allocation and significantly speeds
up my URL parsing benchmark.

Modified Paths

Diff

Modified: trunk/Source/WebCore/ChangeLog (205917 => 205918)


--- trunk/Source/WebCore/ChangeLog	2016-09-14 17:55:15 UTC (rev 205917)
+++ trunk/Source/WebCore/ChangeLog	2016-09-14 18:16:29 UTC (rev 205918)
@@ -1,3 +1,19 @@
+2016-09-14  Alex Christensen  <[email protected]>
+
+        URLParser: Add fast path for utf8 encoding queries
+        https://bugs.webkit.org/show_bug.cgi?id=161968
+
+        Reviewed by Daniel Bates.
+
+        No change in behavior.  Covered by existing tests.
+
+        * platform/URLParser.cpp:
+        (WebCore::utf8PercentEncodeQuery):
+        (WebCore::URLParser::parse):
+        If the text encoding is UTF-8 (which is quite common), then we can encode the query
+        as we iterate its code points. This reduces memory allocation and significantly speeds
+        up my URL parsing benchmark.
+
 2016-09-14  Chris Dumez  <[email protected]>
 
         Regression(r152725): generate-bindings.pl --write-dependencies does not work

Modified: trunk/Source/WebCore/platform/URLParser.cpp (205917 => 205918)


--- trunk/Source/WebCore/platform/URLParser.cpp	2016-09-14 17:55:15 UTC (rev 205917)
+++ trunk/Source/WebCore/platform/URLParser.cpp	2016-09-14 18:16:29 UTC (rev 205918)
@@ -112,6 +112,23 @@
     return byte == 0x3E;
 }
 
+static void utf8PercentEncodeQuery(UChar32 codePoint, StringBuilder& builder)
+{
+    uint8_t buffer[U8_MAX_LENGTH];
+    int32_t offset = 0;
+    UBool error = false;
+    U8_APPEND(buffer, offset, U8_MAX_LENGTH, codePoint, error);
+    ASSERT_WITH_SECURITY_IMPLICATION(offset <= static_cast<int32_t>(sizeof(buffer)));
+    // FIXME: Check error.
+    for (int32_t i = 0; i < offset; ++i) {
+        auto byte = buffer[i];
+        if (shouldPercentEncodeQueryByte(byte))
+            percentEncode(byte, builder);
+        else
+            builder.append(byte);
+    }
+}
+    
 static void encodeQuery(const StringBuilder& source, StringBuilder& destination, const TextEncoding& encoding)
 {
     // FIXME: It is unclear in the spec what to do when encoding fails. The behavior should be specified and tested.
@@ -478,7 +495,7 @@
     m_buffer.clear();
     m_buffer.reserveCapacity(input.length());
     
-    // FIXME: We shouldn't need to allocate another buffer for this.
+    bool isUTF8Encoding = encoding == UTF8Encoding();
     StringBuilder queryBuffer;
 
     unsigned endIndex = input.length();
@@ -940,12 +957,16 @@
         case State::Query:
             LOG_STATE("Query");
             if (*c == '#') {
-                encodeQuery(queryBuffer, m_buffer, encoding);
+                if (!isUTF8Encoding)
+                    encodeQuery(queryBuffer, m_buffer, encoding);
                 m_url.m_queryEnd = m_buffer.length();
                 state = State::Fragment;
                 break;
             }
-            queryBuffer.append(*c);
+            if (isUTF8Encoding)
+                utf8PercentEncodeQuery(*c, m_buffer);
+            else
+                queryBuffer.append(*c);
             ++c;
             break;
         case State::Fragment:
@@ -1097,7 +1118,8 @@
         break;
     case State::Query:
         LOG_FINAL_STATE("Query");
-        encodeQuery(queryBuffer, m_buffer, encoding);
+        if (!isUTF8Encoding)
+            encodeQuery(queryBuffer, m_buffer, encoding);
         m_url.m_queryEnd = m_buffer.length();
         m_url.m_fragmentEnd = m_url.m_queryEnd;
         break;
@@ -1548,6 +1570,7 @@
         int32_t offset = 0;
         UBool error = false;
         U8_APPEND(buffer, offset, U8_MAX_LENGTH, *iterator, error);
+        ASSERT_WITH_SECURITY_IMPLICATION(offset <= static_cast<int32_t>(sizeof(buffer)));
         // FIXME: Check error.
         utf8Encoded.append(buffer, offset);
     }
_______________________________________________
webkit-changes mailing list
[email protected]
https://lists.webkit.org/mailman/listinfo/webkit-changes

Reply via email to