Modified: trunk/Source/WebCore/platform/URLParser.cpp (206197 => 206198)
--- trunk/Source/WebCore/platform/URLParser.cpp 2016-09-21 06:17:13 UTC (rev 206197)
+++ trunk/Source/WebCore/platform/URLParser.cpp 2016-09-21 06:34:13 UTC (rev 206198)
@@ -457,7 +457,7 @@
return !isSlashQuestionOrHash(*iterator);
}
-inline static void percentEncode(uint8_t byte, Vector<LChar>& buffer)
+inline static void percentEncodeByte(uint8_t byte, Vector<LChar>& buffer)
{
buffer.append('%');
buffer.append(upperNibbleToASCIIHexDigit(byte));
@@ -464,6 +464,9 @@
buffer.append(lowerNibbleToASCIIHexDigit(byte));
}
+const char* replacementCharacterUTF8PercentEncoded = "%EF%BF%BD";
+const size_t replacementCharacterUTF8PercentEncodedLength = 9;
+
template<bool serialized>
inline static void utf8PercentEncode(UChar32 codePoint, Vector<LChar>& destination, bool(*isInCodeSet)(UChar32))
{
@@ -472,23 +475,30 @@
ASSERT_WITH_SECURITY_IMPLICATION(!isInCodeSet(codePoint));
destination.append(codePoint);
} else {
- if (isInCodeSet(codePoint)) {
- uint8_t buffer[U8_MAX_LENGTH];
- int32_t offset = 0;
- UBool error = false;
- U8_APPEND(buffer, offset, U8_MAX_LENGTH, codePoint, error);
- // FIXME: Check error.
- for (int32_t i = 0; i < offset; ++i)
- percentEncode(buffer[i], destination);
- } else {
- ASSERT_WITH_MESSAGE(isASCII(codePoint), "isInCodeSet should always return true for non-ASCII characters");
- destination.append(codePoint);
+ if (isASCII(codePoint)) {
+ if (isInCodeSet(codePoint))
+ percentEncodeByte(codePoint, destination);
+ else
+ destination.append(codePoint);
+ return;
}
+ ASSERT_WITH_MESSAGE(isInCodeSet(codePoint), "isInCodeSet should always return true for non-ASCII characters");
+
+ if (!U_IS_UNICODE_CHAR(codePoint)) {
+ destination.append(replacementCharacterUTF8PercentEncoded, replacementCharacterUTF8PercentEncodedLength);
+ return;
+ }
+
+ uint8_t buffer[U8_MAX_LENGTH];
+ int32_t offset = 0;
+ U8_APPEND_UNSAFE(buffer, offset, codePoint);
+ for (int32_t i = 0; i < offset; ++i)
+ percentEncodeByte(buffer[i], destination);
}
}
template<bool serialized>
-inline static void utf8PercentEncodeQuery(UChar32 codePoint, Vector<LChar>& destination)
+inline static void utf8QueryEncode(UChar32 codePoint, Vector<LChar>& destination)
{
if (serialized) {
ASSERT_WITH_SECURITY_IMPLICATION(isASCII(codePoint));
@@ -495,16 +505,26 @@
ASSERT_WITH_SECURITY_IMPLICATION(!shouldPercentEncodeQueryByte(codePoint));
destination.append(codePoint);
} else {
+ if (isASCII(codePoint)) {
+ if (shouldPercentEncodeQueryByte(codePoint))
+ percentEncodeByte(codePoint, destination);
+ else
+ destination.append(codePoint);
+ return;
+ }
+
+ if (!U_IS_UNICODE_CHAR(codePoint)) {
+ destination.append(replacementCharacterUTF8PercentEncoded, replacementCharacterUTF8PercentEncodedLength);
+ return;
+ }
+
uint8_t buffer[U8_MAX_LENGTH];
int32_t offset = 0;
- UBool error = false;
- U8_APPEND(buffer, offset, U8_MAX_LENGTH, codePoint, error);
- ASSERT_WITH_SECURITY_IMPLICATION(offset <= static_cast<int32_t>(sizeof(buffer)));
- // FIXME: Check error.
+ U8_APPEND_UNSAFE(buffer, offset, codePoint);
for (int32_t i = 0; i < offset; ++i) {
auto byte = buffer[i];
if (shouldPercentEncodeQueryByte(byte))
- percentEncode(byte, destination);
+ percentEncodeByte(byte, destination);
else
destination.append(byte);
}
@@ -520,7 +540,7 @@
for (size_t i = 0; i < length; ++i) {
uint8_t byte = data[i];
if (shouldPercentEncodeQueryByte(byte))
- percentEncode(byte, destination);
+ percentEncodeByte(byte, destination);
else
destination.append(byte);
}
@@ -1413,7 +1433,7 @@
break;
}
if (isUTF8Encoding)
- utf8PercentEncodeQuery<serialized>(*c, m_asciiBuffer);
+ utf8QueryEncode<serialized>(*c, m_asciiBuffer);
else
appendCodePoint(queryBuffer, *c);
++c;
@@ -2198,7 +2218,7 @@
|| (byte >= 0x61 && byte <= 0x7A))
output.append(byte);
else
- percentEncode(byte, output);
+ percentEncodeByte(byte, output);
}
}
Modified: trunk/Tools/TestWebKitAPI/Tests/WebCore/URLParser.cpp (206197 => 206198)
--- trunk/Tools/TestWebKitAPI/Tests/WebCore/URLParser.cpp 2016-09-21 06:17:13 UTC (rev 206197)
+++ trunk/Tools/TestWebKitAPI/Tests/WebCore/URLParser.cpp 2016-09-21 06:34:13 UTC (rev 206198)
@@ -215,7 +215,6 @@
checkURL("http://123.256/", {"http", "", "", "123.256", 0, "/", "", "", "http://123.256/"});
checkURL("notspecial:/a", {"notspecial", "", "", "", 0, "/a", "", "", "notspecial:/a"});
checkURL("notspecial:", {"notspecial", "", "", "", 0, "", "", "", "notspecial:"});
- // FIXME: Fix and add a test with an invalid surrogate pair at the end with a space as the second code unit.
// This disagrees with the web platform test for http://:@www.example.com but agrees with Chrome and URL::parse,
// and Firefox fails the web platform test differently. Maybe the web platform test ought to be changed.
@@ -656,7 +655,9 @@
checkURLDifferences("http://%48OsT",
{"http", "", "", "host", 0, "/", "", "", "http://host/"},
{"http", "", "", "%48ost", 0, "/", "", "", "http://%48ost/"});
-
+ checkURLDifferences("http://host/`",
+ {"http", "", "", "host", 0, "/%60", "", "", "http://host/%60"},
+ {"http", "", "", "host", 0, "/`", "", "", "http://host/`"});
}
static void shouldFail(const String& urlString)
@@ -719,6 +720,29 @@
{"ws", "", "", "", 0, "", "", "", "ws:"},
{"ws", "", "", "", 0, "s:", "", "", "ws:s:"});
checkRelativeURL("notspecial:", "http://example.org/foo/bar", {"notspecial", "", "", "", 0, "", "", "", "notspecial:"});
+
+ const wchar_t surrogateBegin = 0xD800;
+ const wchar_t validSurrogateEnd = 0xDD55;
+ const wchar_t invalidSurrogateEnd = 'A';
+ checkURL(wideString<12>({'h', 't', 't', 'p', ':', '/', '/', 'w', '/', surrogateBegin, validSurrogateEnd, '\0'}),
+ {"http", "", "", "w", 0, "/%F0%90%85%95", "", "", "http://w/%F0%90%85%95"});
+
+ // URLParser matches Chrome and Firefox but not URL::parse.
+ checkURLDifferences(wideString<12>({'h', 't', 't', 'p', ':', '/', '/', 'w', '/', surrogateBegin, invalidSurrogateEnd}),
+ {"http", "", "", "w", 0, "/%EF%BF%BDA", "", "", "http://w/%EF%BF%BDA"},
+ {"http", "", "", "w", 0, "/%ED%A0%80A", "", "", "http://w/%ED%A0%80A"});
+ checkURLDifferences(wideString<13>({'h', 't', 't', 'p', ':', '/', '/', 'w', '/', '?', surrogateBegin, invalidSurrogateEnd, '\0'}),
+ {"http", "", "", "w", 0, "/", "%EF%BF%BDA", "", "http://w/?%EF%BF%BDA"},
+ {"http", "", "", "w", 0, "/", "%ED%A0%80A", "", "http://w/?%ED%A0%80A"});
+ checkURLDifferences(wideString<11>({'h', 't', 't', 'p', ':', '/', '/', 'w', '/', surrogateBegin, '\0'}),
+ {"http", "", "", "w", 0, "/%EF%BF%BD", "", "", "http://w/%EF%BF%BD"},
+ {"http", "", "", "w", 0, "/%ED%A0%80", "", "", "http://w/%ED%A0%80"});
+ checkURLDifferences(wideString<12>({'h', 't', 't', 'p', ':', '/', '/', 'w', '/', '?', surrogateBegin, '\0'}),
+ {"http", "", "", "w", 0, "/", "%EF%BF%BD", "", "http://w/?%EF%BF%BD"},
+ {"http", "", "", "w", 0, "/", "%ED%A0%80", "", "http://w/?%ED%A0%80"});
+ checkURLDifferences(wideString<13>({'h', 't', 't', 'p', ':', '/', '/', 'w', '/', '?', surrogateBegin, ' ', '\0'}),
+ {"http", "", "", "w", 0, "/", "%EF%BF%BD", "", "http://w/?%EF%BF%BD"},
+ {"http", "", "", "w", 0, "/", "%ED%A0%80", "", "http://w/?%ED%A0%80"});
}
static void checkURL(const String& urlString, const TextEncoding& encoding, const ExpectedParts& parts)