- Revision
- 204544
- Author
- [email protected]
- Date
- 2016-08-16 17:41:30 -0700 (Tue, 16 Aug 2016)
Log Message
URLParser should parse URLs without credentials
https://bugs.webkit.org/show_bug.cgi?id=160913
Reviewed by Brady Eidson.
Source/WebCore:
When parsing a URL, after the scheme we do not know if we are parsing a username and password
or if we are parsing the host until we hit a '@' indicating the end of the credentials or a /, ?, or #
indicating the end of the host. Because there are significantly different rules for serializing usernames,
passwords, and hosts (all of which have yet to be implemented in URLParser) we put the code points after the
scheme in a special buffer that will be processed once we know what we are parsing.
In the future, this could be optimized by assuming that we are parsing the host and if we encounter a '@' character,
then do some extra work. This would save us the effort of copying the host twice because most URLs don't have credentials.
This is covered by a new URLParser API test.
* platform/Logging.h:
* platform/URLParser.cpp:
(WebCore::isC0Control):
(WebCore::isC0ControlOrSpace):
(WebCore::isTabOrNewline):
(WebCore::isSpecialScheme):
(WebCore::URLParser::parse):
(WebCore::URLParser::authorityEndReached):
(WebCore::URLParser::hostEndReached):
(WebCore::URLParser::allValuesEqual):
(WebCore::isASCIIDigit): Deleted.
(WebCore::isASCIIAlpha): Deleted.
(WebCore::isASCIIAlphanumeric): Deleted.
* platform/URLParser.h:
(WebCore::URLParser::parse):
Tools:
* TestWebKitAPI/Tests/WebCore/URLParser.cpp:
(TestWebKitAPI::s):
(TestWebKitAPI::checkURL):
(TestWebKitAPI::TEST_F):
Modified Paths
Diff
Modified: trunk/Source/WebCore/ChangeLog (204543 => 204544)
--- trunk/Source/WebCore/ChangeLog 2016-08-17 00:25:36 UTC (rev 204543)
+++ trunk/Source/WebCore/ChangeLog 2016-08-17 00:41:30 UTC (rev 204544)
@@ -1,3 +1,37 @@
+2016-08-16 Alex Christensen <[email protected]>
+
+ URLParser should parse URLs without credentials
+ https://bugs.webkit.org/show_bug.cgi?id=160913
+
+ Reviewed by Brady Eidson.
+
+ When parsing a URL, after the scheme we do not know if we are parsing a username and password
+ or if we are parsing the host until we hit a '@' indicating the end of the credentials or a /, ?, or #
+ indicating the end of the host. Because there are significantly different rules for serializing usernames,
+ passwords, and hosts (all of which have yet to be implemented in URLParser) we put the code points after the
+ scheme in a special buffer that will be processed once we know what we are parsing.
+
+ In the future, this could be optimized by assuming that we are parsing the host and if we encounter a '@' character,
+ then do some extra work. This would save us the effort of copying the host twice because most URLs don't have credentials.
+
+ This is covered by a new URLParser API test.
+
+ * platform/Logging.h:
+ * platform/URLParser.cpp:
+ (WebCore::isC0Control):
+ (WebCore::isC0ControlOrSpace):
+ (WebCore::isTabOrNewline):
+ (WebCore::isSpecialScheme):
+ (WebCore::URLParser::parse):
+ (WebCore::URLParser::authorityEndReached):
+ (WebCore::URLParser::hostEndReached):
+ (WebCore::URLParser::allValuesEqual):
+ (WebCore::isASCIIDigit): Deleted.
+ (WebCore::isASCIIAlpha): Deleted.
+ (WebCore::isASCIIAlphanumeric): Deleted.
+ * platform/URLParser.h:
+ (WebCore::URLParser::parse):
+
2016-08-16 Chris Dumez <[email protected]>
Add support for ShadowRoot.mode attribute
Modified: trunk/Source/WebCore/platform/Logging.h (204543 => 204544)
--- trunk/Source/WebCore/platform/Logging.h 2016-08-17 00:25:36 UTC (rev 204543)
+++ trunk/Source/WebCore/platform/Logging.h 2016-08-17 00:41:30 UTC (rev 204544)
@@ -78,6 +78,7 @@
M(StorageAPI) \
M(TextAutosizing) \
M(Threading) \
+ M(URLParser) \
M(WebAudio) \
M(WebGL) \
M(WebReplay) \
Modified: trunk/Source/WebCore/platform/URLParser.cpp (204543 => 204544)
--- trunk/Source/WebCore/platform/URLParser.cpp 2016-08-17 00:25:36 UTC (rev 204543)
+++ trunk/Source/WebCore/platform/URLParser.cpp 2016-08-17 00:41:30 UTC (rev 204544)
@@ -25,18 +25,16 @@
#include "config.h"
#include "URLParser.h"
+
+#include "Logging.h"
#include "NotImplemented.h"
-
#include <wtf/text/StringBuilder.h>
namespace WebCore {
-static bool isC0Control(const StringView::CodePoints::Iterator& c) { return *c <= 0x001F; }
-static bool isC0ControlOrSpace(const StringView::CodePoints::Iterator& c) { return isC0Control(c) || *c == 0x0020; }
-static bool isTabOrNewline(const StringView::CodePoints::Iterator& c) { return *c == 0x0009 || *c == 0x000A || *c == 0x000D; }
-static bool isASCIIDigit(const StringView::CodePoints::Iterator& c) { return *c >= 0x0030 && *c <= 0x0039; }
-static bool isASCIIAlpha(const StringView::CodePoints::Iterator& c) { return (*c >= 0x0041 && *c <= 0x005A) || (*c >= 0x0061 && *c <= 0x007A); }
-static bool isASCIIAlphanumeric(const StringView::CodePoints::Iterator& c) { return isASCIIDigit(c) || isASCIIAlpha(c); }
+template<typename CharacterType> static bool isC0Control(CharacterType character) { return character <= 0x0001F; }
+template<typename CharacterType> static bool isC0ControlOrSpace(CharacterType character) { return isC0Control(character) || character == 0x0020; }
+template<typename CharacterType> static bool isTabOrNewline(CharacterType character) { return character == 0x0009 || character == 0x000A || character == 0x000D; }
static bool isSpecialScheme(const String& scheme)
{
@@ -51,13 +49,14 @@
Optional<URL> URLParser::parse(const String& input, const URL& base, const TextEncoding&)
{
- URL url;
-
+ m_url = { };
+ m_buffer.clear();
+ m_authorityOrHostBuffer.clear();
+
auto codePoints = StringView(input).codePoints();
auto c = codePoints.begin();
auto end = codePoints.end();
- StringBuilder buffer;
- while (isC0ControlOrSpace(c))
+ while (c != end && isC0ControlOrSpace(*c))
++c;
enum class State : uint8_t {
@@ -71,10 +70,8 @@
RelativeSlash,
SpecialAuthoritySlashes,
SpecialAuthorityIgnoreSlashes,
- Authority,
+ AuthorityOrHost,
Host,
- Hostname,
- Port,
File,
FileSlash,
FileHost,
@@ -85,11 +82,12 @@
Fragment,
};
-#define LOG_STATE(x)
+#define LOG_STATE(x) LOG(URLParser, x)
+#define LOG_FINAL_STATE(x) LOG(URLParser, "Final State: %s", x)
State state = State::SchemeStart;
while (c != end) {
- if (isTabOrNewline(c)) {
+ if (isTabOrNewline(*c)) {
++c;
continue;
}
@@ -97,8 +95,8 @@
switch (state) {
case State::SchemeStart:
LOG_STATE("SchemeStart");
- if (isASCIIAlpha(c)) {
- buffer.append(toASCIILower(*c));
+ if (isASCIIAlpha(*c)) {
+ m_buffer.append(toASCIILower(*c));
state = State::Scheme;
} else
state = State::NoScheme;
@@ -106,12 +104,12 @@
break;
case State::Scheme:
LOG_STATE("Scheme");
- if (isASCIIAlphanumeric(c) || *c == '+' || *c == '-' || *c == '.')
- buffer.append(toASCIILower(*c));
+ if (isASCIIAlphanumeric(*c) || *c == '+' || *c == '-' || *c == '.')
+ m_buffer.append(toASCIILower(*c));
else if (*c == ':') {
- url.m_schemeEnd = buffer.length();
- String urlScheme = buffer.toString(); // FIXME: Find a way to do this without shrinking the buffer.
- url.m_protocolIsInHTTPFamily = urlScheme == "http" || urlScheme == "https";
+ m_url.m_schemeEnd = m_buffer.length();
+ String urlScheme = m_buffer.toString(); // FIXME: Find a way to do this without shrinking the m_buffer.
+ m_url.m_protocolIsInHTTPFamily = urlScheme == "http" || urlScheme == "https";
if (urlScheme == "file")
state = State::File;
else if (isSpecialScheme(urlScheme)) {
@@ -121,9 +119,9 @@
state = State::SpecialAuthoritySlashes;
} else
state = State::SchemeEndCheckForSlashes;
- buffer.append(':');
+ m_buffer.append(':');
} else {
- buffer.clear();
+ m_buffer.clear();
state = State::NoScheme;
// FIXME: Find a way to start over here.
notImplemented();
@@ -179,9 +177,9 @@
++c;
if (c == end)
return Nullopt;
- buffer.append('/');
+ m_buffer.append('/');
if (*c == '/') {
- buffer.append('/');
+ m_buffer.append('/');
state = State::SpecialAuthorityIgnoreSlashes;
++c;
break;
@@ -193,59 +191,32 @@
break;
case State::SpecialAuthorityIgnoreSlashes:
LOG_STATE("SpecialAuthorityIgnoreSlashes");
- if (*c != '/' && *c != '\\') {
- state = State::Authority;
- break;
- }
- notImplemented();
- ++c;
+ if (*c == '/' || *c == '\\')
+ ++c;
+ m_url.m_userStart = m_buffer.length();
+ state = State::AuthorityOrHost;
break;
- case State::Authority:
- LOG_STATE("Authority");
- if (!url.m_userStart)
- url.m_userStart = buffer.length();
+ case State::AuthorityOrHost:
+ LOG_STATE("AuthorityOrHost");
if (*c == '@') {
- url.m_passwordEnd = buffer.length();
- buffer.append('@');
+ authorityEndReached();
state = State::Host;
- notImplemented();
- } else if (*c == ':') {
- url.m_userEnd = buffer.length();
- buffer.append(*c);
- } else {
- if (*c == '/' || *c == '?' || *c == '#') {
- url.m_passwordEnd = buffer.length();
- state = State::Host;
- }
- buffer.append(*c);
- }
- ++c;
- break;
- case State::Host:
- case State::Hostname:
- LOG_STATE("Host/Hostname");
- if (*c == ':') {
- url.m_hostEnd = buffer.length();
- buffer.append(':');
- state = State::Port;
} else if (*c == '/' || *c == '?' || *c == '#') {
- url.m_hostEnd = buffer.length();
+ hostEndReached();
state = State::Path;
- continue;
+ break;
} else
- buffer.append(*c);
+ m_authorityOrHostBuffer.append(*c);
++c;
break;
- case State::Port:
- LOG_STATE("Port");
- if (isASCIIDigit(c)) {
- buffer.append(*c);
- } else if (*c == '/' || *c == '?' || *c == '#') {
- url.m_portEnd = buffer.length();
- state = State::PathStart;
+ case State::Host:
+ LOG_STATE("Host");
+ if (*c == '/' || *c == '?' || *c == '#') {
+ hostEndReached();
+ state = State::Path;
continue;
- } else
- return Nullopt;
+ }
+ m_authorityOrHostBuffer.append(*c);
++c;
break;
case State::File:
@@ -270,8 +241,8 @@
case State::Path:
LOG_STATE("Path");
if (*c == '/') {
- buffer.append('/');
- url.m_pathAfterLastSlash = buffer.length();
+ m_buffer.append('/');
+ m_url.m_pathAfterLastSlash = m_buffer.length();
++c;
if (c == end)
break;
@@ -284,16 +255,16 @@
notImplemented();
}
} else if (*c == '?') {
- url.m_pathEnd = buffer.length();
+ m_url.m_pathEnd = m_buffer.length();
state = State::Query;
continue;
} else if (*c == '#') {
- url.m_pathEnd = buffer.length();
+ m_url.m_pathEnd = m_buffer.length();
state = State::Fragment;
continue;
}
// FIXME: Percent encode c
- buffer.append(*c);
+ m_buffer.append(*c);
++c;
break;
case State::CannotBeABaseURLPath:
@@ -304,79 +275,177 @@
case State::Query:
LOG_STATE("Query");
if (*c == '#') {
- url.m_queryEnd = buffer.length();
+ m_url.m_queryEnd = m_buffer.length();
state = State::Fragment;
continue;
}
- buffer.append(*c);
+ m_buffer.append(*c);
++c;
break;
case State::Fragment:
LOG_STATE("Fragment");
- buffer.append(*c);
+ m_buffer.append(*c);
++c;
break;
}
}
-
+
switch (state) {
case State::SchemeStart:
+ LOG_FINAL_STATE("SchemeStart");
+ return Nullopt;
+ break;
case State::Scheme:
+ LOG_FINAL_STATE("Scheme");
+ break;
case State::SchemeEndCheckForSlashes:
+ LOG_FINAL_STATE("SchemeEndCheckForSlashes");
+ break;
case State::NoScheme:
+ LOG_FINAL_STATE("NoScheme");
+ break;
case State::SpecialRelativeOrAuthority:
+ LOG_FINAL_STATE("SpecialRelativeOrAuthority");
+ break;
case State::PathOrAuthority:
+ LOG_FINAL_STATE("PathOrAuthority");
+ break;
case State::Relative:
+ LOG_FINAL_STATE("Relative");
+ break;
case State::RelativeSlash:
+ LOG_FINAL_STATE("RelativeSlash");
+ break;
case State::SpecialAuthoritySlashes:
+ LOG_FINAL_STATE("SpecialAuthoritySlashes");
+ break;
case State::SpecialAuthorityIgnoreSlashes:
- case State::Authority:
+ LOG_FINAL_STATE("SpecialAuthorityIgnoreSlashes");
break;
+ case State::AuthorityOrHost:
+ LOG_FINAL_STATE("AuthorityOrHost");
+ m_url.m_userEnd = m_buffer.length();
+ m_url.m_passwordEnd = m_url.m_userEnd;
+ FALLTHROUGH;
case State::Host:
- case State::Hostname:
- url.m_hostEnd = buffer.length();
- url.m_portEnd = url.m_hostEnd;
- buffer.append('/');
- url.m_pathEnd = url.m_hostEnd + 1;
- url.m_pathAfterLastSlash = url.m_pathEnd;
- url.m_queryEnd = url.m_pathEnd;
- url.m_fragmentEnd = url.m_pathEnd;
+ if (state == State::Host)
+ LOG_FINAL_STATE("Host");
+ hostEndReached();
+ m_buffer.append('/');
+ m_url.m_pathEnd = m_url.m_portEnd + 1;
+ m_url.m_pathAfterLastSlash = m_url.m_pathEnd;
+ m_url.m_queryEnd = m_url.m_pathEnd;
+ m_url.m_fragmentEnd = m_url.m_pathEnd;
break;
- case State::Port:
- url.m_portEnd = buffer.length();
- buffer.append('/');
- url.m_pathEnd = url.m_portEnd + 1;
- url.m_pathAfterLastSlash = url.m_pathEnd;
- url.m_queryEnd = url.m_pathEnd;
- url.m_fragmentEnd = url.m_pathEnd;
+ case State::File:
+ LOG_FINAL_STATE("File");
break;
- case State::File:
case State::FileSlash:
+ LOG_FINAL_STATE("FileSlash");
+ break;
case State::FileHost:
+ LOG_FINAL_STATE("FileHost");
+ break;
case State::PathStart:
+ LOG_FINAL_STATE("PathStart");
+ break;
case State::Path:
- url.m_pathEnd = buffer.length();
- url.m_queryEnd = url.m_pathEnd;
- url.m_fragmentEnd = url.m_pathEnd;
+ LOG_FINAL_STATE("Path");
+ m_url.m_pathEnd = m_buffer.length();
+ m_url.m_queryEnd = m_url.m_pathEnd;
+ m_url.m_fragmentEnd = m_url.m_pathEnd;
break;
case State::CannotBeABaseURLPath:
+ LOG_FINAL_STATE("CannotBeABaseURLPath");
break;
case State::Query:
- url.m_queryEnd = buffer.length();
- url.m_fragmentEnd = url.m_queryEnd;
+ LOG_FINAL_STATE("Query");
+ m_url.m_queryEnd = m_buffer.length();
+ m_url.m_fragmentEnd = m_url.m_queryEnd;
break;
case State::Fragment:
- url.m_fragmentEnd = buffer.length();
+ LOG_FINAL_STATE("Fragment");
+ m_url.m_fragmentEnd = m_buffer.length();
break;
}
- url.m_string = buffer.toString();
- url.m_isValid = true;
- return url;
+ m_url.m_string = m_buffer.toString();
+ m_url.m_isValid = true;
+ return m_url;
}
+void URLParser::authorityEndReached()
+{
+ auto codePoints = StringView(m_authorityOrHostBuffer.toString()).codePoints();
+ auto iterator = codePoints.begin();
+ auto end = codePoints.end();
+ for (; iterator != end; ++iterator) {
+ m_buffer.append(*iterator);
+ if (*iterator == ':') {
+ ++iterator;
+ m_url.m_userEnd = m_buffer.length() - 1;
+ break;
+ }
+ }
+ for (; iterator != end; ++iterator)
+ m_buffer.append(*iterator);
+ m_url.m_passwordEnd = m_buffer.length();
+ m_buffer.append('@');
+ m_authorityOrHostBuffer.clear();
+}
+
+void URLParser::hostEndReached()
+{
+ auto codePoints = StringView(m_authorityOrHostBuffer.toString()).codePoints();
+ auto iterator = codePoints.begin();
+ auto end = codePoints.end();
+ for (; iterator != end; ++iterator) {
+ if (*iterator == ':') {
+ ++iterator;
+ m_url.m_hostEnd = m_buffer.length();
+ m_buffer.append(':');
+ for (; iterator != end; ++iterator)
+ m_buffer.append(*iterator);
+ m_url.m_portEnd = m_buffer.length();
+ return;
+ }
+ m_buffer.append(*iterator);
+ }
+ m_url.m_hostEnd = m_buffer.length();
+ m_url.m_portEnd = m_url.m_hostEnd;
+ m_authorityOrHostBuffer.clear();
+}
+
bool URLParser::allValuesEqual(const URL& a, const URL& b)
{
+ LOG(URLParser, "%d %d %d %d %d %d %d %d %d %d %d %d %s\n%d %d %d %d %d %d %d %d %d %d %d %d %s",
+ a.m_isValid,
+ a.m_protocolIsInHTTPFamily,
+ a.m_schemeEnd,
+ a.m_userStart,
+ a.m_userEnd,
+ a.m_passwordEnd,
+ a.m_hostEnd,
+ a.m_portEnd,
+ a.m_pathAfterLastSlash,
+ a.m_pathEnd,
+ a.m_queryEnd,
+ a.m_fragmentEnd,
+ a.m_string.utf8().data(),
+ b.m_isValid,
+ b.m_protocolIsInHTTPFamily,
+ b.m_schemeEnd,
+ b.m_userStart,
+ b.m_userEnd,
+ b.m_passwordEnd,
+ b.m_hostEnd,
+ b.m_portEnd,
+ b.m_pathAfterLastSlash,
+ b.m_pathEnd,
+ b.m_queryEnd,
+ b.m_fragmentEnd,
+ b.m_string.utf8().data());
+
return a.m_string == b.m_string
&& a.m_isValid == b.m_isValid
&& a.m_protocolIsInHTTPFamily == b.m_protocolIsInHTTPFamily
Modified: trunk/Source/WebCore/platform/URLParser.h (204543 => 204544)
--- trunk/Source/WebCore/platform/URLParser.h 2016-08-17 00:25:36 UTC (rev 204543)
+++ trunk/Source/WebCore/platform/URLParser.h 2016-08-17 00:41:30 UTC (rev 204544)
@@ -28,13 +28,20 @@
#include "TextEncoding.h"
#include "URL.h"
#include <wtf/Forward.h>
+#include <wtf/text/StringBuilder.h>
namespace WebCore {
class URLParser {
public:
- WEBCORE_EXPORT static Optional<URL> parse(const String&, const URL& = { }, const TextEncoding& = UTF8Encoding());
+ WEBCORE_EXPORT Optional<URL> parse(const String&, const URL& = { }, const TextEncoding& = UTF8Encoding());
WEBCORE_EXPORT static bool allValuesEqual(const URL&, const URL&);
+private:
+ URL m_url;
+ StringBuilder m_buffer;
+ StringBuilder m_authorityOrHostBuffer;
+ void authorityEndReached();
+ void hostEndReached();
};
}
Modified: trunk/Tools/ChangeLog (204543 => 204544)
--- trunk/Tools/ChangeLog 2016-08-17 00:25:36 UTC (rev 204543)
+++ trunk/Tools/ChangeLog 2016-08-17 00:41:30 UTC (rev 204544)
@@ -1,3 +1,15 @@
+2016-08-16 Alex Christensen <[email protected]>
+
+ URLParser should parse URLs without credentials
+ https://bugs.webkit.org/show_bug.cgi?id=160913
+
+ Reviewed by Brady Eidson.
+
+ * TestWebKitAPI/Tests/WebCore/URLParser.cpp:
+ (TestWebKitAPI::s):
+ (TestWebKitAPI::checkURL):
+ (TestWebKitAPI::TEST_F):
+
2016-08-16 Anders Carlsson <[email protected]>
Add WTF::ScopeExit
Modified: trunk/Tools/TestWebKitAPI/Tests/WebCore/URLParser.cpp (204543 => 204544)
--- trunk/Tools/TestWebKitAPI/Tests/WebCore/URLParser.cpp 2016-08-17 00:25:36 UTC (rev 204543)
+++ trunk/Tools/TestWebKitAPI/Tests/WebCore/URLParser.cpp 2016-08-17 00:41:30 UTC (rev 204544)
@@ -53,7 +53,8 @@
static const char* s(const String& s) { return s.utf8().data(); }
static void checkURL(const String& urlString, const ExpectedParts& parts)
{
- auto url = ""
+ URLParser parser;
+ auto url = ""
EXPECT_STREQ(s(parts.protocol), s(url->protocol()));
EXPECT_STREQ(s(parts.user), s(url->user()));
EXPECT_STREQ(s(parts.password), s(url->pass()));
@@ -86,6 +87,19 @@
checkURL("http://user:[email protected]:123/", {"http", "user", "pass", "webkit.org", 123, "/", "", "", "http://user:[email protected]:123/"});
checkURL("http://user:[email protected]:123", {"http", "user", "pass", "webkit.org", 123, "/", "", "", "http://user:[email protected]:123/"});
checkURL("http://user:[email protected]", {"http", "user", "pass", "webkit.org", 0, "/", "", "", "http://user:[email protected]/"});
+ checkURL("http://webkit.org", {"http", "", "", "webkit.org", 0, "/", "", "", "http://webkit.org/"});
}
+static void shouldFail(const String& urlString)
+{
+ URLParser parser;
+ auto invalidURL = parser.parse(urlString);
+ EXPECT_TRUE(invalidURL == Nullopt);
+}
+
+TEST_F(URLParserTest, ParserFailures)
+{
+ shouldFail(" ");
+}
+
} // namespace TestWebKitAPI