Log Message
URLParser should percent-encode non-ASCII and non-printable characters in fragment https://bugs.webkit.org/show_bug.cgi?id=163287
Reviewed by Brady Eidson. Source/WebCore: Based on discussion in https://github.com/whatwg/url/issues/150 If that discussion decides to keep the spec as-is (which keeps non-ASCII characters in the fragment to match IE and Edge's behavior, which Chrome has followed for special schemes) then we can revert this change later after enabling the URL parser. Making this change keeps behavior matching Safari and Firefox, as well as Chrome's handling of non-special schemes, such as data URLs. Covered by updated API tests. * platform/URLParser.cpp: (WebCore::URLParser::appendToASCIIBuffer): (WebCore::URLParser::copyURLPartsUntil): (WebCore::URLParser::syntaxViolation): (WebCore::URLParser::currentPosition): (WebCore::URLParser::parse): (WebCore::URLParser::fragmentSyntaxViolation): Deleted. * platform/URLParser.h: No more non-ASCII characters in canonicalized URLs. Tools: * TestWebKitAPI/Tests/WebCore/URLParser.cpp: (TestWebKitAPI::TEST_F):
Modified Paths
Diff
Modified: trunk/Source/WebCore/ChangeLog (207151 => 207152)
--- trunk/Source/WebCore/ChangeLog 2016-10-11 18:20:23 UTC (rev 207151)
+++ trunk/Source/WebCore/ChangeLog 2016-10-11 18:28:50 UTC (rev 207152)
@@ -1,5 +1,30 @@
2016-10-11 Alex Christensen <[email protected]>
+ URLParser should percent-encode non-ASCII and non-printable characters in fragment
+ https://bugs.webkit.org/show_bug.cgi?id=163287
+
+ Reviewed by Brady Eidson.
+
+ Based on discussion in https://github.com/whatwg/url/issues/150
+ If that discussion decides to keep the spec as-is (which keeps non-ASCII characters in the fragment
+ to match IE and Edge's behavior, which Chrome has followed for special schemes) then we can revert
+ this change later after enabling the URL parser. Making this change keeps behavior matching Safari
+ and Firefox, as well as Chrome's handling of non-special schemes, such as data URLs.
+
+ Covered by updated API tests.
+
+ * platform/URLParser.cpp:
+ (WebCore::URLParser::appendToASCIIBuffer):
+ (WebCore::URLParser::copyURLPartsUntil):
+ (WebCore::URLParser::syntaxViolation):
+ (WebCore::URLParser::currentPosition):
+ (WebCore::URLParser::parse):
+ (WebCore::URLParser::fragmentSyntaxViolation): Deleted.
+ * platform/URLParser.h:
+ No more non-ASCII characters in canonicalized URLs.
+
+2016-10-11 Alex Christensen <[email protected]>
+
Remove dead networking code
https://bugs.webkit.org/show_bug.cgi?id=163263
Modified: trunk/Source/WebCore/platform/URLParser.cpp (207151 => 207152)
--- trunk/Source/WebCore/platform/URLParser.cpp 2016-10-11 18:20:23 UTC (rev 207151)
+++ trunk/Source/WebCore/platform/URLParser.cpp 2016-10-11 18:28:50 UTC (rev 207152)
@@ -462,7 +462,6 @@
ALWAYS_INLINE void URLParser::appendToASCIIBuffer(UChar32 codePoint)
{
- ASSERT(m_unicodeFragmentBuffer.isEmpty());
ASSERT(isASCII(codePoint));
if (UNLIKELY(m_didSeeSyntaxViolation))
m_asciiBuffer.append(codePoint);
@@ -470,7 +469,6 @@
ALWAYS_INLINE void URLParser::appendToASCIIBuffer(const char* characters, size_t length)
{
- ASSERT(m_unicodeFragmentBuffer.isEmpty());
if (UNLIKELY(m_didSeeSyntaxViolation))
m_asciiBuffer.append(characters, length);
}
@@ -829,7 +827,6 @@
syntaxViolation(iterator);
m_asciiBuffer.clear();
- m_unicodeFragmentBuffer.clear();
copyASCIIStringUntil(base.m_string, urlLengthUntilPart(base, part));
switch (part) {
case URLPart::FragmentEnd:
@@ -1018,7 +1015,6 @@
m_didSeeSyntaxViolation = true;
ASSERT(m_asciiBuffer.isEmpty());
- ASSERT(m_unicodeFragmentBuffer.isEmpty());
size_t codeUnitsToCopy = iterator.codeUnitsSince(reinterpret_cast<const CharacterType*>(m_inputBegin));
RELEASE_ASSERT(codeUnitsToCopy <= m_inputString.length());
m_asciiBuffer.reserveCapacity(m_inputString.length());
@@ -1028,30 +1024,6 @@
}
}
-template<typename CharacterType>
-void URLParser::fragmentSyntaxViolation(const CodePointIterator<CharacterType>& iterator)
-{
- ASSERT(m_didSeeUnicodeFragmentCodePoint);
- if (m_didSeeSyntaxViolation)
- return;
- m_didSeeSyntaxViolation = true;
-
- ASSERT(m_asciiBuffer.isEmpty());
- ASSERT(m_unicodeFragmentBuffer.isEmpty());
- size_t codeUnitsToCopy = iterator.codeUnitsSince(reinterpret_cast<const CharacterType*>(m_inputBegin));
- size_t asciiCodeUnitsToCopy = m_url.m_queryEnd;
- size_t unicodeCodeUnitsToCopy = codeUnitsToCopy - asciiCodeUnitsToCopy;
- RELEASE_ASSERT(codeUnitsToCopy <= m_inputString.length());
- m_asciiBuffer.reserveCapacity(asciiCodeUnitsToCopy);
- for (size_t i = 0; i < asciiCodeUnitsToCopy; ++i) {
- ASSERT(isASCII(m_inputString[i]));
- m_asciiBuffer.uncheckedAppend(m_inputString[i]);
- }
- m_unicodeFragmentBuffer.reserveCapacity(m_inputString.length() - asciiCodeUnitsToCopy);
- for (size_t i = asciiCodeUnitsToCopy; i < asciiCodeUnitsToCopy + unicodeCodeUnitsToCopy; ++i)
- m_unicodeFragmentBuffer.uncheckedAppend(m_inputString[i]);
-}
-
void URLParser::failure()
{
m_url.invalidate();
@@ -1111,10 +1083,8 @@
template<typename CharacterType>
ALWAYS_INLINE size_t URLParser::currentPosition(const CodePointIterator<CharacterType>& iterator)
{
- if (UNLIKELY(m_didSeeSyntaxViolation)) {
- ASSERT(m_unicodeFragmentBuffer.isEmpty());
+ if (UNLIKELY(m_didSeeSyntaxViolation))
return m_asciiBuffer.size();
- }
return iterator.codeUnitsSince(reinterpret_cast<const CharacterType*>(m_inputBegin));
}
@@ -1160,7 +1130,6 @@
URL_PARSER_LOG("Parsing URL <%s> base <%s> encoding <%s>", String(input, length).utf8().data(), base.string().utf8().data(), encoding.name());
m_url = { };
ASSERT(m_asciiBuffer.isEmpty());
- ASSERT(m_unicodeFragmentBuffer.isEmpty());
bool isUTF8Encoding = encoding == UTF8Encoding();
Vector<UChar> queryBuffer;
@@ -1811,35 +1780,9 @@
} while (!c.atEnd());
break;
case State::Fragment:
- do {
- URL_PARSER_LOG("State Fragment");
- if (!c.atEnd() && isTabOrNewline(*c)) {
- if (m_didSeeUnicodeFragmentCodePoint)
- fragmentSyntaxViolation(c);
- else
- syntaxViolation(c);
- ++c;
- continue;
- }
- if (!m_didSeeUnicodeFragmentCodePoint && isASCII(*c)) {
- if (m_urlIsSpecial)
- appendToASCIIBuffer(*c);
- else
- utf8PercentEncode<isInSimpleEncodeSet>(c);
- } else {
- if (m_urlIsSpecial) {
- m_didSeeUnicodeFragmentCodePoint = true;
- if (UNLIKELY(m_didSeeSyntaxViolation))
- appendCodePoint(m_unicodeFragmentBuffer, *c);
- else {
- ASSERT(m_asciiBuffer.isEmpty());
- ASSERT(m_unicodeFragmentBuffer.isEmpty());
- }
- } else
- utf8PercentEncode<isInSimpleEncodeSet>(c);
- }
- ++c;
- } while (!c.atEnd());
+ URL_PARSER_LOG("State Fragment");
+ utf8PercentEncode<isInSimpleEncodeSet>(c);
+ ++c;
break;
}
}
@@ -2043,28 +1986,16 @@
m_url.m_fragmentEnd = m_url.m_queryEnd;
break;
case State::Fragment:
- {
- LOG_FINAL_STATE("Fragment");
- size_t length = m_didSeeSyntaxViolation ? m_asciiBuffer.size() + m_unicodeFragmentBuffer.size() : c.codeUnitsSince(reinterpret_cast<const CharacterType*>(m_inputBegin));
- m_url.m_fragmentEnd = length;
- break;
- }
+ LOG_FINAL_STATE("Fragment");
+ m_url.m_fragmentEnd = currentPosition(c);
+ break;
}
if (LIKELY(!m_didSeeSyntaxViolation)) {
m_url.m_string = m_inputString;
ASSERT(m_asciiBuffer.isEmpty());
- ASSERT(m_unicodeFragmentBuffer.isEmpty());
- } else if (!m_didSeeUnicodeFragmentCodePoint) {
- ASSERT(m_unicodeFragmentBuffer.isEmpty());
+ } else
m_url.m_string = String::adopt(WTFMove(m_asciiBuffer));
- } else {
- Vector<UChar> buffer;
- buffer.reserveInitialCapacity(m_asciiBuffer.size() + m_unicodeFragmentBuffer.size());
- buffer.appendVector(m_asciiBuffer);
- buffer.appendVector(m_unicodeFragmentBuffer);
- m_url.m_string = String::adopt(WTFMove(buffer));
- }
m_url.m_isValid = true;
URL_PARSER_LOG("Parsed URL <%s>", m_url.m_string.utf8().data());
}
Modified: trunk/Source/WebCore/platform/URLParser.h (207151 => 207152)
--- trunk/Source/WebCore/platform/URLParser.h 2016-10-11 18:20:23 UTC (rev 207151)
+++ trunk/Source/WebCore/platform/URLParser.h 2016-10-11 18:28:50 UTC (rev 207152)
@@ -51,8 +51,6 @@
private:
URL m_url;
Vector<LChar> m_asciiBuffer;
- Vector<UChar> m_unicodeFragmentBuffer;
- bool m_didSeeUnicodeFragmentCodePoint { false };
bool m_urlIsSpecial { false };
bool m_hostHasPercentOrNonASCII { false };
String m_inputString;
@@ -73,7 +71,6 @@
void advance(CodePointIterator<CharacterType>&, const CodePointIterator<CharacterType>& iteratorForSyntaxViolationPosition);
template<typename CharacterType> bool takesTwoAdvancesUntilEnd(CodePointIterator<CharacterType>);
template<typename CharacterType> void syntaxViolation(const CodePointIterator<CharacterType>&);
- template<typename CharacterType> void fragmentSyntaxViolation(const CodePointIterator<CharacterType>&);
template<typename CharacterType> bool isPercentEncodedDot(CodePointIterator<CharacterType>);
template<typename CharacterType> bool isWindowsDriveLetter(CodePointIterator<CharacterType>);
template<typename CharacterType> bool isSingleDotPathSegment(CodePointIterator<CharacterType>);
Modified: trunk/Tools/ChangeLog (207151 => 207152)
--- trunk/Tools/ChangeLog 2016-10-11 18:20:23 UTC (rev 207151)
+++ trunk/Tools/ChangeLog 2016-10-11 18:28:50 UTC (rev 207152)
@@ -1,5 +1,15 @@
2016-10-11 Alex Christensen <[email protected]>
+ URLParser should percent-encode non-ASCII and non-printable characters in fragment
+ https://bugs.webkit.org/show_bug.cgi?id=163287
+
+ Reviewed by Brady Eidson.
+
+ * TestWebKitAPI/Tests/WebCore/URLParser.cpp:
+ (TestWebKitAPI::TEST_F):
+
+2016-10-11 Alex Christensen <[email protected]>
+
Remove dead networking code
https://bugs.webkit.org/show_bug.cgi?id=163263
Modified: trunk/Tools/TestWebKitAPI/Tests/WebCore/URLParser.cpp (207151 => 207152)
--- trunk/Tools/TestWebKitAPI/Tests/WebCore/URLParser.cpp 2016-10-11 18:20:23 UTC (rev 207151)
+++ trunk/Tools/TestWebKitAPI/Tests/WebCore/URLParser.cpp 2016-10-11 18:28:50 UTC (rev 207152)
@@ -331,6 +331,10 @@
checkURL("aA://", {"aa", "", "", "", 0, "//", "", "", "aa://"});
checkURL(utf16String(u"foo://host/#ŠŠ\u0007 a</"), {"foo", "", "", "host", 0, "/", "", "%D0%9F%D0%9F%07 a</", "foo://host/#%D0%9F%D0%9F%07 a</"});
checkURL(utf16String(u"foo://host/#\u0007 a</"), {"foo", "", "", "host", 0, "/", "", "%07 a</", "foo://host/#%07 a</"});
+ checkURL(utf16String(u"http://host?Ćš#Ćš"), {"http", "", "", "host", 0, "/", "%C3%9F%F0%9F%98%8D", "%C3%9F%F0%9F%98%8D", "http://host/?%C3%9F%F0%9F%98%8D#%C3%9F%F0%9F%98%8D"}, testTabsValueForSurrogatePairs);
+ checkURL(utf16String(u"http://host/path#š©\tš©"), {"http", "", "", "host", 0, "/path", "", "%F0%9F%92%A9%F0%9F%92%A9", "http://host/path#%F0%9F%92%A9%F0%9F%92%A9"}, testTabsValueForSurrogatePairs);
+ checkURL(utf16String(u"http://host/#ŠŠ\u0007 a</"), {"http", "", "", "host", 0, "/", "", "%D0%9F%D0%9F%07 a</", "http://host/#%D0%9F%D0%9F%07 a</"});
+ checkURL(utf16String(u"http://host/#\u0007 a</"), {"http", "", "", "host", 0, "/", "", "%07 a</", "http://host/#%07 a</"});
// This disagrees with the web platform test for http://:@www.example.com but agrees with Chrome and URL::parse,
// and Firefox fails the web platform test differently. Maybe the web platform test ought to be changed.
@@ -456,6 +460,7 @@
checkRelativeURL(" ", "http://host/path?query#fra#gment", {"http", "", "", "host", 0, "/path", "query", "", "http://host/path?query"});
checkRelativeURL(" \a ", "http://host/#fragment", {"http", "", "", "host", 0, "/", "", "", "http://host/"});
checkRelativeURL("foo://", "http://example.org/foo/bar", {"foo", "", "", "", 0, "//", "", "", "foo://"});
+ checkRelativeURL(utf16String(u"#β"), "http://example.org/foo/bar", {"http", "", "", "example.org", 0, "/foo/bar", "", "%CE%B2", "http://example.org/foo/bar#%CE%B2"});
// The checking of slashes in SpecialAuthoritySlashes needed to get this to pass contradicts what is in the spec,
// but it is included in the web platform tests.
@@ -630,9 +635,6 @@
checkURLDifferences("file://[0:a:0:0:b:c:0:0]/path",
{"file", "", "", "[0:a::b:c:0:0]", 0, "/path", "", "", "file://[0:a::b:c:0:0]/path"},
{"file", "", "", "[0:a:0:0:b:c:0:0]", 0, "/path", "", "", "file://[0:a:0:0:b:c:0:0]/path"});
- checkRelativeURLDifferences(utf16String(u"#β"), "http://example.org/foo/bar",
- {"http", "", "", "example.org", 0, "/foo/bar", "", utf16String(u"β"), utf16String(u"http://example.org/foo/bar#β")},
- {"http", "", "", "example.org", 0, "/foo/bar", "", "%CE%B2", "http://example.org/foo/bar#%CE%B2"});
checkURLDifferences("http://",
{"", "", "", "", 0, "", "", "", "http://"},
{"http", "", "", "", 0, "/", "", "", "http:/"});
@@ -769,12 +771,6 @@
checkURLDifferences("notspecial://@test@test@example:800\\path@end",
{"notspecial", "@test@test@example", "800\\path", "end", 0, "/", "", "", "notspecial://%40test%40test%40example:800%5Cpath@end/"},
{"", "", "", "", 0, "", "", "", "notspecial://@test@test@example:800\\path@end"});
- checkURLDifferences(utf16String(u"http://host?Ćš#Ćš"),
- {"http", "", "", "host", 0, "/", "%C3%9F%F0%9F%98%8D", utf16String(u"Ćš"), utf16String(u"http://host/?%C3%9F%F0%9F%98%8D#Ćš")},
- {"http", "", "", "host", 0, "/", "%C3%9F%F0%9F%98%8D", "%C3%9F%F0%9F%98%8D", "http://host/?%C3%9F%F0%9F%98%8D#%C3%9F%F0%9F%98%8D"}, testTabsValueForSurrogatePairs);
- checkURLDifferences(utf16String(u"http://host/path#š©\tš©"),
- {"http", "", "", "host", 0, "/path", "", utf16String(u"š©š©"), utf16String(u"http://host/path#š©š©")},
- {"http", "", "", "host", 0, "/path", "", "%F0%9F%92%A9%F0%9F%92%A9", "http://host/path#%F0%9F%92%A9%F0%9F%92%A9"});
checkURLDifferences("http://%48OsT",
{"http", "", "", "host", 0, "/", "", "", "http://host/"},
{"http", "", "", "%48ost", 0, "/", "", "", "http://%48ost/"});
@@ -1060,12 +1056,6 @@
checkURLDifferences("file://:0/path",
{"", "", "", "", 0, "", "", "", "file://:0/path"},
{"file", "", "", "", 0, "/path", "", "", "file://:0/path"});
- checkURLDifferences(utf16String(u"http://host/#ŠŠ\u0007 a</"),
- {"http", "", "", "host", 0, "/", "", utf16String(u"ŠŠ\u0007 a</"), utf16String(u"http://host/#ŠŠ\u0007 a</")},
- {"http", "", "", "host", 0, "/", "", "%D0%9F%D0%9F%07 a</", "http://host/#%D0%9F%D0%9F%07 a</"});
- checkURLDifferences(utf16String(u"http://host/#\u0007 a</"),
- {"http", "", "", "host", 0, "/", "", "\a a</", "http://host/#\a a</"},
- {"http", "", "", "host", 0, "/", "", "%07 a</", "http://host/#%07 a</"});
}
static void shouldFail(const String& urlString)
@@ -1224,8 +1214,7 @@
TEST_F(URLParserTest, QueryEncoding)
{
- checkURL(utf16String(u"http://host?Ćš#Ćš"), UTF8Encoding(), {"http", "", "", "host", 0, "/", "%C3%9F%F0%9F%98%8D", utf16String(u"Ćš"), utf16String(u"http://host/?%C3%9F%F0%9F%98%8D#Ćš")}, testTabsValueForSurrogatePairs);
- checkURL(utf16String(u"http://host?Ćš#Ćš"), UTF8Encoding(), {"http", "", "", "host", 0, "/", "%C3%9F%F0%9F%98%8D", utf16String(u"Ćš"), utf16String(u"http://host/?%C3%9F%F0%9F%98%8D#Ćš")}, testTabsValueForSurrogatePairs);
+ checkURL(utf16String(u"http://host?Ćš#Ćš"), UTF8Encoding(), {"http", "", "", "host", 0, "/", "%C3%9F%F0%9F%98%8D", "%C3%9F%F0%9F%98%8D", utf16String(u"http://host/?%C3%9F%F0%9F%98%8D#%C3%9F%F0%9F%98%8D")}, testTabsValueForSurrogatePairs);
TextEncoding latin1(String("latin1"));
checkURL("http://host/?query with%20spaces", latin1, {"http", "", "", "host", 0, "/", "query%20with%20spaces", "", "http://host/?query%20with%20spaces"});
_______________________________________________ webkit-changes mailing list [email protected] https://lists.webkit.org/mailman/listinfo/webkit-changes
