Modified: trunk/Source/WebCore/ChangeLog (205917 => 205918)
--- trunk/Source/WebCore/ChangeLog 2016-09-14 17:55:15 UTC (rev 205917)
+++ trunk/Source/WebCore/ChangeLog 2016-09-14 18:16:29 UTC (rev 205918)
@@ -1,3 +1,19 @@
+2016-09-14 Alex Christensen <[email protected]>
+
+ URLParser: Add fast path for utf8 encoding queries
+ https://bugs.webkit.org/show_bug.cgi?id=161968
+
+ Reviewed by Daniel Bates.
+
+ No change in behavior. Covered by existing tests.
+
+ * platform/URLParser.cpp:
+ (WebCore::utf8PercentEncodeQuery):
+ (WebCore::URLParser::parse):
+ If the text encoding is UTF-8 (which is quite common), then we can encode the query
+ as we iterate its code points. This reduces memory allocation and significantly speeds
+ up my URL parsing benchmark.
+
2016-09-14 Chris Dumez <[email protected]>
Regression(r152725): generate-bindings.pl --write-dependencies does not work
Modified: trunk/Source/WebCore/platform/URLParser.cpp (205917 => 205918)
--- trunk/Source/WebCore/platform/URLParser.cpp 2016-09-14 17:55:15 UTC (rev 205917)
+++ trunk/Source/WebCore/platform/URLParser.cpp 2016-09-14 18:16:29 UTC (rev 205918)
@@ -112,6 +112,23 @@
return byte == 0x3E;
}
+static void utf8PercentEncodeQuery(UChar32 codePoint, StringBuilder& builder)
+{
+ uint8_t buffer[U8_MAX_LENGTH];
+ int32_t offset = 0;
+ UBool error = false;
+ U8_APPEND(buffer, offset, U8_MAX_LENGTH, codePoint, error);
+ ASSERT_WITH_SECURITY_IMPLICATION(offset <= static_cast<int32_t>(sizeof(buffer)));
+ // FIXME: Check error.
+ for (int32_t i = 0; i < offset; ++i) {
+ auto byte = buffer[i];
+ if (shouldPercentEncodeQueryByte(byte))
+ percentEncode(byte, builder);
+ else
+ builder.append(byte);
+ }
+}
+
static void encodeQuery(const StringBuilder& source, StringBuilder& destination, const TextEncoding& encoding)
{
// FIXME: It is unclear in the spec what to do when encoding fails. The behavior should be specified and tested.
@@ -478,7 +495,7 @@
m_buffer.clear();
m_buffer.reserveCapacity(input.length());
- // FIXME: We shouldn't need to allocate another buffer for this.
+ bool isUTF8Encoding = encoding == UTF8Encoding();
StringBuilder queryBuffer;
unsigned endIndex = input.length();
@@ -940,12 +957,16 @@
case State::Query:
LOG_STATE("Query");
if (*c == '#') {
- encodeQuery(queryBuffer, m_buffer, encoding);
+ if (!isUTF8Encoding)
+ encodeQuery(queryBuffer, m_buffer, encoding);
m_url.m_queryEnd = m_buffer.length();
state = State::Fragment;
break;
}
- queryBuffer.append(*c);
+ if (isUTF8Encoding)
+ utf8PercentEncodeQuery(*c, m_buffer);
+ else
+ queryBuffer.append(*c);
++c;
break;
case State::Fragment:
@@ -1097,7 +1118,8 @@
break;
case State::Query:
LOG_FINAL_STATE("Query");
- encodeQuery(queryBuffer, m_buffer, encoding);
+ if (!isUTF8Encoding)
+ encodeQuery(queryBuffer, m_buffer, encoding);
m_url.m_queryEnd = m_buffer.length();
m_url.m_fragmentEnd = m_url.m_queryEnd;
break;
@@ -1548,6 +1570,7 @@
int32_t offset = 0;
UBool error = false;
U8_APPEND(buffer, offset, U8_MAX_LENGTH, *iterator, error);
+ ASSERT_WITH_SECURITY_IMPLICATION(offset <= static_cast<int32_t>(sizeof(buffer)));
// FIXME: Check error.
utf8Encoded.append(buffer, offset);
}