Title: [206198] trunk
Revision
206198
Author
achristen...@apple.com
Date
2016-09-20 23:34:13 -0700 (Tue, 20 Sep 2016)

Log Message

Optimize URLParser
https://bugs.webkit.org/show_bug.cgi?id=162105

Reviewed by Geoffrey Garen.

Source/WebCore:

Covered by new API tests.
This is about a 5% speedup on my URLParser benchmark.

* platform/URLParser.cpp:
(WebCore::percentEncodeByte):
(WebCore::utf8PercentEncode):
(WebCore::utf8QueryEncode):
(WebCore::encodeQuery):
(WebCore::URLParser::parse):
(WebCore::serializeURLEncodedForm):
(WebCore::percentEncode): Deleted.
(WebCore::utf8PercentEncodeQuery): Deleted.

Tools:

* TestWebKitAPI/Tests/WebCore/URLParser.cpp:
(TestWebKitAPI::TEST_F):

Modified Paths

Diff

Modified: trunk/Source/WebCore/ChangeLog (206197 => 206198)


--- trunk/Source/WebCore/ChangeLog	2016-09-21 06:17:13 UTC (rev 206197)
+++ trunk/Source/WebCore/ChangeLog	2016-09-21 06:34:13 UTC (rev 206198)
@@ -1,3 +1,23 @@
+2016-09-20  Alex Christensen  <achristen...@webkit.org>
+
+        Optimize URLParser
+        https://bugs.webkit.org/show_bug.cgi?id=162105
+
+        Reviewed by Geoffrey Garen.
+
+        Covered by new API tests.
+        This is about a 5% speedup on my URLParser benchmark.
+
+        * platform/URLParser.cpp:
+        (WebCore::percentEncodeByte):
+        (WebCore::utf8PercentEncode):
+        (WebCore::utf8QueryEncode):
+        (WebCore::encodeQuery):
+        (WebCore::URLParser::parse):
+        (WebCore::serializeURLEncodedForm):
+        (WebCore::percentEncode): Deleted.
+        (WebCore::utf8PercentEncodeQuery): Deleted.
+
 2016-09-20  Carlos Garcia Campos  <cgar...@igalia.com>
 
         [GTK] Clean up DataObjectGtk handling

Modified: trunk/Source/WebCore/platform/URLParser.cpp (206197 => 206198)


--- trunk/Source/WebCore/platform/URLParser.cpp	2016-09-21 06:17:13 UTC (rev 206197)
+++ trunk/Source/WebCore/platform/URLParser.cpp	2016-09-21 06:34:13 UTC (rev 206198)
@@ -457,7 +457,7 @@
     return !isSlashQuestionOrHash(*iterator);
 }
 
-inline static void percentEncode(uint8_t byte, Vector<LChar>& buffer)
+inline static void percentEncodeByte(uint8_t byte, Vector<LChar>& buffer)
 {
     buffer.append('%');
     buffer.append(upperNibbleToASCIIHexDigit(byte));
@@ -464,6 +464,9 @@
     buffer.append(lowerNibbleToASCIIHexDigit(byte));
 }
 
+const char* replacementCharacterUTF8PercentEncoded = "%EF%BF%BD";
+const size_t replacementCharacterUTF8PercentEncodedLength = 9;
+
 template<bool serialized>
 inline static void utf8PercentEncode(UChar32 codePoint, Vector<LChar>& destination, bool(*isInCodeSet)(UChar32))
 {
@@ -472,23 +475,30 @@
         ASSERT_WITH_SECURITY_IMPLICATION(!isInCodeSet(codePoint));
         destination.append(codePoint);
     } else {
-        if (isInCodeSet(codePoint)) {
-            uint8_t buffer[U8_MAX_LENGTH];
-            int32_t offset = 0;
-            UBool error = false;
-            U8_APPEND(buffer, offset, U8_MAX_LENGTH, codePoint, error);
-            // FIXME: Check error.
-            for (int32_t i = 0; i < offset; ++i)
-                percentEncode(buffer[i], destination);
-        } else {
-            ASSERT_WITH_MESSAGE(isASCII(codePoint), "isInCodeSet should always return true for non-ASCII characters");
-            destination.append(codePoint);
+        if (isASCII(codePoint)) {
+            if (isInCodeSet(codePoint))
+                percentEncodeByte(codePoint, destination);
+            else
+                destination.append(codePoint);
+            return;
         }
+        ASSERT_WITH_MESSAGE(isInCodeSet(codePoint), "isInCodeSet should always return true for non-ASCII characters");
+        
+        if (!U_IS_UNICODE_CHAR(codePoint)) {
+            destination.append(replacementCharacterUTF8PercentEncoded, replacementCharacterUTF8PercentEncodedLength);
+            return;
+        }
+        
+        uint8_t buffer[U8_MAX_LENGTH];
+        int32_t offset = 0;
+        U8_APPEND_UNSAFE(buffer, offset, codePoint);
+        for (int32_t i = 0; i < offset; ++i)
+            percentEncodeByte(buffer[i], destination);
     }
 }
 
 template<bool serialized>
-inline static void utf8PercentEncodeQuery(UChar32 codePoint, Vector<LChar>& destination)
+inline static void utf8QueryEncode(UChar32 codePoint, Vector<LChar>& destination)
 {
     if (serialized) {
         ASSERT_WITH_SECURITY_IMPLICATION(isASCII(codePoint));
@@ -495,16 +505,26 @@
         ASSERT_WITH_SECURITY_IMPLICATION(!shouldPercentEncodeQueryByte(codePoint));
         destination.append(codePoint);
     } else {
+        if (isASCII(codePoint)) {
+            if (shouldPercentEncodeQueryByte(codePoint))
+                percentEncodeByte(codePoint, destination);
+            else
+                destination.append(codePoint);
+            return;
+        }
+        
+        if (!U_IS_UNICODE_CHAR(codePoint)) {
+            destination.append(replacementCharacterUTF8PercentEncoded, replacementCharacterUTF8PercentEncodedLength);
+            return;
+        }
+
         uint8_t buffer[U8_MAX_LENGTH];
         int32_t offset = 0;
-        UBool error = false;
-        U8_APPEND(buffer, offset, U8_MAX_LENGTH, codePoint, error);
-        ASSERT_WITH_SECURITY_IMPLICATION(offset <= static_cast<int32_t>(sizeof(buffer)));
-        // FIXME: Check error.
+        U8_APPEND_UNSAFE(buffer, offset, codePoint);
         for (int32_t i = 0; i < offset; ++i) {
             auto byte = buffer[i];
             if (shouldPercentEncodeQueryByte(byte))
-                percentEncode(byte, destination);
+                percentEncodeByte(byte, destination);
             else
                 destination.append(byte);
         }
@@ -520,7 +540,7 @@
     for (size_t i = 0; i < length; ++i) {
         uint8_t byte = data[i];
         if (shouldPercentEncodeQueryByte(byte))
-            percentEncode(byte, destination);
+            percentEncodeByte(byte, destination);
         else
             destination.append(byte);
     }
@@ -1413,7 +1433,7 @@
                 break;
             }
             if (isUTF8Encoding)
-                utf8PercentEncodeQuery<serialized>(*c, m_asciiBuffer);
+                utf8QueryEncode<serialized>(*c, m_asciiBuffer);
             else
                 appendCodePoint(queryBuffer, *c);
             ++c;
@@ -2198,7 +2218,7 @@
             || (byte >= 0x61 && byte <= 0x7A))
             output.append(byte);
         else
-            percentEncode(byte, output);
+            percentEncodeByte(byte, output);
     }
 }
     

Modified: trunk/Tools/ChangeLog (206197 => 206198)


--- trunk/Tools/ChangeLog	2016-09-21 06:17:13 UTC (rev 206197)
+++ trunk/Tools/ChangeLog	2016-09-21 06:34:13 UTC (rev 206198)
@@ -1,3 +1,13 @@
+2016-09-20  Alex Christensen  <achristen...@webkit.org>
+
+        Optimize URLParser
+        https://bugs.webkit.org/show_bug.cgi?id=162105
+
+        Reviewed by Geoffrey Garen.
+
+        * TestWebKitAPI/Tests/WebCore/URLParser.cpp:
+        (TestWebKitAPI::TEST_F):
+
 2016-09-20  Aakash Jain  <aakash_j...@apple.com>
 
         enable remote_api (for debugging) in flakiness dashboard app

Modified: trunk/Tools/TestWebKitAPI/Tests/WebCore/URLParser.cpp (206197 => 206198)


--- trunk/Tools/TestWebKitAPI/Tests/WebCore/URLParser.cpp	2016-09-21 06:17:13 UTC (rev 206197)
+++ trunk/Tools/TestWebKitAPI/Tests/WebCore/URLParser.cpp	2016-09-21 06:34:13 UTC (rev 206198)
@@ -215,7 +215,6 @@
     checkURL("http://123.256/", {"http", "", "", "123.256", 0, "/", "", "", "http://123.256/"});
     checkURL("notspecial:/a", {"notspecial", "", "", "", 0, "/a", "", "", "notspecial:/a"});
     checkURL("notspecial:", {"notspecial", "", "", "", 0, "", "", "", "notspecial:"});
-    // FIXME: Fix and add a test with an invalid surrogate pair at the end with a space as the second code unit.
 
     // This disagrees with the web platform test for http://:@www.example.com but agrees with Chrome and URL::parse,
     // and Firefox fails the web platform test differently. Maybe the web platform test ought to be changed.
@@ -656,7 +655,9 @@
     checkURLDifferences("http://%48OsT",
         {"http", "", "", "host", 0, "/", "", "", "http://host/"},
         {"http", "", "", "%48ost", 0, "/", "", "", "http://%48ost/"});
-
+    checkURLDifferences("http://host/`",
+        {"http", "", "", "host", 0, "/%60", "", "", "http://host/%60"},
+        {"http", "", "", "host", 0, "/`", "", "", "http://host/`"});
 }
     
 static void shouldFail(const String& urlString)
@@ -719,6 +720,29 @@
         {"ws", "", "", "", 0, "", "", "", "ws:"},
         {"ws", "", "", "", 0, "s:", "", "", "ws:s:"});
     checkRelativeURL("notspecial:", "http://example.org/foo/bar", {"notspecial", "", "", "", 0, "", "", "", "notspecial:"});
+    
+    const wchar_t surrogateBegin = 0xD800;
+    const wchar_t validSurrogateEnd = 0xDD55;
+    const wchar_t invalidSurrogateEnd = 'A';
+    checkURL(wideString<12>({'h', 't', 't', 'p', ':', '/', '/', 'w', '/', surrogateBegin, validSurrogateEnd, '\0'}),
+        {"http", "", "", "w", 0, "/%F0%90%85%95", "", "", "http://w/%F0%90%85%95"});
+    
+    // URLParser matches Chrome and Firefox but not URL::parse.
+    checkURLDifferences(wideString<12>({'h', 't', 't', 'p', ':', '/', '/', 'w', '/', surrogateBegin, invalidSurrogateEnd}),
+        {"http", "", "", "w", 0, "/%EF%BF%BDA", "", "", "http://w/%EF%BF%BDA"},
+        {"http", "", "", "w", 0, "/%ED%A0%80A", "", "", "http://w/%ED%A0%80A"});
+    checkURLDifferences(wideString<13>({'h', 't', 't', 'p', ':', '/', '/', 'w', '/', '?', surrogateBegin, invalidSurrogateEnd, '\0'}),
+        {"http", "", "", "w", 0, "/", "%EF%BF%BDA", "", "http://w/?%EF%BF%BDA"},
+        {"http", "", "", "w", 0, "/", "%ED%A0%80A", "", "http://w/?%ED%A0%80A"});
+    checkURLDifferences(wideString<11>({'h', 't', 't', 'p', ':', '/', '/', 'w', '/', surrogateBegin, '\0'}),
+        {"http", "", "", "w", 0, "/%EF%BF%BD", "", "", "http://w/%EF%BF%BD"},
+        {"http", "", "", "w", 0, "/%ED%A0%80", "", "", "http://w/%ED%A0%80"});
+    checkURLDifferences(wideString<12>({'h', 't', 't', 'p', ':', '/', '/', 'w', '/', '?', surrogateBegin, '\0'}),
+        {"http", "", "", "w", 0, "/", "%EF%BF%BD", "", "http://w/?%EF%BF%BD"},
+        {"http", "", "", "w", 0, "/", "%ED%A0%80", "", "http://w/?%ED%A0%80"});
+    checkURLDifferences(wideString<13>({'h', 't', 't', 'p', ':', '/', '/', 'w', '/', '?', surrogateBegin, ' ', '\0'}),
+        {"http", "", "", "w", 0, "/", "%EF%BF%BD", "", "http://w/?%EF%BF%BD"},
+        {"http", "", "", "w", 0, "/", "%ED%A0%80", "", "http://w/?%ED%A0%80"});
 }
 
 static void checkURL(const String& urlString, const TextEncoding& encoding, const ExpectedParts& parts)
_______________________________________________
webkit-changes mailing list
webkit-changes@lists.webkit.org
https://lists.webkit.org/mailman/listinfo/webkit-changes

Reply via email to