Diff
Modified: trunk/LayoutTests/imported/w3c/ChangeLog (259772 => 259773)
--- trunk/LayoutTests/imported/w3c/ChangeLog 2020-04-09 00:42:25 UTC (rev 259772)
+++ trunk/LayoutTests/imported/w3c/ChangeLog 2020-04-09 00:43:49 UTC (rev 259773)
@@ -1,3 +1,15 @@
+2020-04-08 Chris Dumez <[email protected]>
+
+ querySelector("#\u0000") should match an element with ID U+FFFD
+ https://bugs.webkit.org/show_bug.cgi?id=210119
+
+ Reviewed by Darin Adler.
+
+ Import test coverage from upstream WPT.
+
+ * web-platform-tests/dom/nodes/ParentNode-querySelector-escapes-expected.txt: Added.
+ * web-platform-tests/dom/nodes/ParentNode-querySelector-escapes.html: Added.
+
2020-04-08 Rob Buis <[email protected]>
Import fetch/origin/assorted.window.js
Added: trunk/LayoutTests/imported/w3c/web-platform-tests/dom/nodes/ParentNode-querySelector-escapes-expected.txt (0 => 259773)
--- trunk/LayoutTests/imported/w3c/web-platform-tests/dom/nodes/ParentNode-querySelector-escapes-expected.txt (rev 0)
+++ trunk/LayoutTests/imported/w3c/web-platform-tests/dom/nodes/ParentNode-querySelector-escapes-expected.txt 2020-04-09 00:43:49 UTC (rev 259773)
@@ -0,0 +1,70 @@
+
+PASS "nonescaped" should match with "#nonescaped"
+PASS "0nextIsWhiteSpace" should match with "#\\30 nextIsWhiteSpace"
+PASS "0nextIsNotHexLetters" should match with "#\\30nextIsNotHexLetters"
+PASS "0connectHexMoreThan6Hex" should match with "#\\000030connectHexMoreThan6Hex"
+PASS "0spaceMoreThan6Hex" should match with "#\\000030 spaceMoreThan6Hex"
+PASS "zero�" should match with "#zero\\0"
+PASS "zero\u0000" should never match with "#zero\\0"
+PASS "zero�" should match with "#zero\\000000"
+PASS "zero\u0000" should never match with "#zero\\000000"
+PASS "�surrogateFirst" should match with "#\\d83d surrogateFirst"
+PASS "\ud83dsurrogateFirst" should never match with "#\\d83d surrogateFirst"
+PASS "surrogateSecond�" should match with "#surrogateSecond\\dd11"
+PASS "surrogateSecond\udd11" should never match with "#surrogateSecond\\dd11"
+PASS "surrogatePair��" should match with "#surrogatePair\\d83d\\dd11"
+PASS "surrogatePair🔑" should never match with "#surrogatePair\\d83d\\dd11"
+PASS "outOfRange�" should match with "#outOfRange\\110000"
+PASS "outOfRange�" should match with "#outOfRange\\110030"
+PASS "outOfRange0" should never match with "#outOfRange\\110030"
+PASS "outOfRange�" should match with "#outOfRange\\555555"
+PASS "outOfRange�" should match with "#outOfRange\\ffffff"
+PASS "eof\\" should never match with "#eof\\"
+PASS ".comma" should match with "#\\.comma"
+PASS "-minus" should match with "#\\-minus"
+PASS "g" should match with "#\\g"
+PASS "aBMPRegular" should match with "#\\61 BMPRegular"
+PASS "🔑nonBMP" should match with "#\\1f511 nonBMP"
+PASS "00continueEscapes" should match with "#\\30\\30 continueEscapes"
+PASS "00continueEscapes" should match with "#\\30 \\30 continueEscapes"
+PASS "continueEscapes00" should match with "#continueEscapes\\30 \\30 "
+PASS "continueEscapes00" should match with "#continueEscapes\\30 \\30"
+PASS "continueEscapes00" should match with "#continueEscapes\\30\\30 "
+PASS "continueEscapes00" should match with "#continueEscapes\\30\\30"
+PASS "hello" should match with "#hel\\6Co"
+PASS "&B" should match with "#\\26 B"
+PASS "hello" should match with "#hel\\6C o"
+PASS "spaces" should match with "#spac\\65\r\ns"
+PASS "spaces" should match with "#sp\\61\tc\\65\fs"
+PASS "test힙" should match with "#test\\D799"
+PASS "" should match with "#\\E000"
+PASS "test" should match with "#te\\s\\t"
+PASS "spaces in\tident" should match with "#spaces\\ in\\\tident"
+PASS ".,:!" should match with "#\\.\\,\\:\\!"
+PASS "null�" should match with "#null\\0"
+PASS "null�" should match with "#null\\0000"
+PASS "large�" should match with "#large\\110000"
+PASS "large�" should match with "#large\\23456a"
+PASS "surrogate�" should match with "#surrogate\\D800"
+PASS "surrogate�" should match with "#surrogate\\0DBAC"
+PASS "�surrogate" should match with "#\\00DFFFsurrogate"
+PASS "\x{10FFFF}" should match with "#\\10fFfF"
+PASS "\x{10FFFF}0" should match with "#\\10fFfF0"
+PASS "00" should match with "#\\10000000"
+PASS "eof�" should match with "#eof\\"
+PASS "simple-ident" should match with "#simple-ident"
+PASS "testing123" should match with "#testing123"
+PASS "_underscore" should match with "#_underscore"
+PASS "-text" should match with "#-text"
+PASS "-m" should match with "#-\\6d"
+PASS "--abc" should match with "#--abc"
+PASS "--" should match with "#--"
+PASS "--11" should match with "#--11"
+PASS "---" should match with "#---"
+PASS " " should match with "# "
+PASS " " should match with "# "
+PASS "ሴ" should match with "#ሴ"
+PASS "𒍅" should match with "#𒍅"
+PASS "�" should match with "#\u0000"
+PASS "ab�c" should match with "#ab\u0000c"
+
Added: trunk/LayoutTests/imported/w3c/web-platform-tests/dom/nodes/ParentNode-querySelector-escapes.html (0 => 259773)
--- trunk/LayoutTests/imported/w3c/web-platform-tests/dom/nodes/ParentNode-querySelector-escapes.html (rev 0)
+++ trunk/LayoutTests/imported/w3c/web-platform-tests/dom/nodes/ParentNode-querySelector-escapes.html 2020-04-09 00:43:49 UTC (rev 259773)
@@ -0,0 +1,123 @@
+<!DOCTYPE html>
+<meta charset=utf-8>
+<title>querySelector() with CSS escapes</title>
+<link rel="help" href=""
+<link rel="help" href=""
+<link rel="author" title="Domenic Denicola" href=""
+<link rel="author" title="bellbind" href=""
+<script src=""
+<script src=""
+
+<script>
+"use strict";
+
+function testMatched(id, selector) {
+ test(() => {
+ const container = document.createElement("div");
+ const child = document.createElement("span");
+ child.id = id;
+
+ container.appendChild(child);
+
+ assert_equals(container.querySelector(selector), child);
+ }, `${JSON.stringify(id)} should match with ${JSON.stringify(selector)}`);
+}
+
+function testNeverMatched(id, selector) {
+ test(() => {
+ const container = document.createElement("div");
+ const child = document.createElement("span");
+ child.id = id;
+
+ container.appendChild(child);
+
+ assert_equals(container.querySelector(selector), null);
+ }, `${JSON.stringify(id)} should never match with ${JSON.stringify(selector)}`);
+}
+
+// 4.3.7 from https://drafts.csswg.org/css-syntax/#consume-escaped-code-point
+testMatched("nonescaped", "#nonescaped");
+
+// - escape hex digit
+testMatched("0nextIsWhiteSpace", "#\\30 nextIsWhiteSpace");
+testMatched("0nextIsNotHexLetters", "#\\30nextIsNotHexLetters");
+testMatched("0connectHexMoreThan6Hex", "#\\000030connectHexMoreThan6Hex");
+testMatched("0spaceMoreThan6Hex", "#\\000030 spaceMoreThan6Hex");
+
+// - hex digit special replacement
+// 1. zero points
+testMatched("zero\u{fffd}", "#zero\\0");
+testNeverMatched("zero\u{0}", "#zero\\0");
+testMatched("zero\u{fffd}", "#zero\\000000");
+testNeverMatched("zero\u{0}", "#zero\\000000");
+// 2. surrogate points
+testMatched("\u{fffd}surrogateFirst", "#\\d83d surrogateFirst");
+testNeverMatched("\ud83dsurrogateFirst", "#\\d83d surrogateFirst");
+testMatched("surrogateSecond\u{fffd}", "#surrogateSecond\\dd11");
+testNeverMatched("surrogateSecond\udd11", "#surrogateSecond\\dd11");
+testMatched("surrogatePair\u{fffd}\u{fffd}", "#surrogatePair\\d83d\\dd11");
+testNeverMatched("surrogatePair\u{1f511}", "#surrogatePair\\d83d\\dd11");
+// 3. out of range points
+testMatched("outOfRange\u{fffd}", "#outOfRange\\110000");
+testMatched("outOfRange\u{fffd}", "#outOfRange\\110030");
+testNeverMatched("outOfRange\u{30}", "#outOfRange\\110030");
+testMatched("outOfRange\u{fffd}", "#outOfRange\\555555");
+testMatched("outOfRange\u{fffd}", "#outOfRange\\ffffff");
+
+// - escape EOF
+testNeverMatched("eof\\", "#eof\\");
+
+// - escape anythong else
+testMatched(".comma", "#\\.comma");
+testMatched("-minus", "#\\-minus");
+testMatched("g", "#\\g");
+
+// non edge cases
+testMatched("aBMPRegular", "#\\61 BMPRegular");
+testMatched("\u{1f511}nonBMP", "#\\1f511 nonBMP");
+testMatched("00continueEscapes", "#\\30\\30 continueEscapes");
+testMatched("00continueEscapes", "#\\30 \\30 continueEscapes");
+testMatched("continueEscapes00", "#continueEscapes\\30 \\30 ");
+testMatched("continueEscapes00", "#continueEscapes\\30 \\30");
+testMatched("continueEscapes00", "#continueEscapes\\30\\30 ");
+testMatched("continueEscapes00", "#continueEscapes\\30\\30");
+
+// ident tests case from CSS tests of chromium source: https://goo.gl/3Cxdov
+testMatched("hello", "#hel\\6Co");
+testMatched("&B", "#\\26 B");
+testMatched("hello", "#hel\\6C o");
+testMatched("spaces", "#spac\\65\r\ns");
+testMatched("spaces", "#sp\\61\tc\\65\fs");
+testMatched("test\u{D799}", "#test\\D799");
+testMatched("\u{E000}", "#\\E000");
+testMatched("test", "#te\\s\\t");
+testMatched("spaces in\tident", "#spaces\\ in\\\tident");
+testMatched(".,:!", "#\\.\\,\\:\\!");
+testMatched("null\u{fffd}", "#null\\0");
+testMatched("null\u{fffd}", "#null\\0000");
+testMatched("large\u{fffd}", "#large\\110000");
+testMatched("large\u{fffd}", "#large\\23456a");
+testMatched("surrogate\u{fffd}", "#surrogate\\D800");
+testMatched("surrogate\u{fffd}", "#surrogate\\0DBAC");
+testMatched("\u{fffd}surrogate", "#\\00DFFFsurrogate");
+testMatched("\u{10ffff}", "#\\10fFfF");
+testMatched("\u{10ffff}0", "#\\10fFfF0");
+testMatched("\u{100000}00", "#\\10000000");
+testMatched("eof\u{fffd}", "#eof\\");
+
+testMatched("simple-ident", "#simple-ident");
+testMatched("testing123", "#testing123");
+testMatched("_underscore", "#_underscore");
+testMatched("-text", "#-text");
+testMatched("-m", "#-\\6d");
+testMatched("--abc", "#--abc");
+testMatched("--", "#--");
+testMatched("--11", "#--11");
+testMatched("---", "#---");
+testMatched("\u{2003}", "#\u{2003}");
+testMatched("\u{A0}", "#\u{A0}");
+testMatched("\u{1234}", "#\u{1234}");
+testMatched("\u{12345}", "#\u{12345}");
+testMatched("\u{fffd}", "#\u{0}");
+testMatched("ab\u{fffd}c", "#ab\u{0}c");
+</script>
Modified: trunk/Source/WTF/ChangeLog (259772 => 259773)
--- trunk/Source/WTF/ChangeLog 2020-04-09 00:42:25 UTC (rev 259772)
+++ trunk/Source/WTF/ChangeLog 2020-04-09 00:43:49 UTC (rev 259773)
@@ -1,3 +1,16 @@
+2020-04-08 Chris Dumez <[email protected]>
+
+ querySelector("#\u0000") should match an element with ID U+FFFD
+ https://bugs.webkit.org/show_bug.cgi?id=210119
+
+ Reviewed by Darin Adler.
+
+ * wtf/text/StringImpl.cpp:
+ (WTF::StringImpl::replace):
+ Slightly optimize the 16-bit code path of StringImpl::replace(). Since we know
+ there is no character match from indexes 0 to i, we can simply use memcpy for
+ this range.
+
2020-04-08 Ross Kirsling <[email protected]>
Remove ENABLE_INTL define
Modified: trunk/Source/WTF/wtf/text/StringImpl.cpp (259772 => 259773)
--- trunk/Source/WTF/wtf/text/StringImpl.cpp 2020-04-09 00:42:25 UTC (rev 259772)
+++ trunk/Source/WTF/wtf/text/StringImpl.cpp 2020-04-09 00:43:49 UTC (rev 259773)
@@ -1302,11 +1302,12 @@
UChar* data;
auto newImpl = createUninitializedInternalNonEmpty(m_length, data);
- for (i = 0; i != m_length; ++i) {
- UChar character = m_data16[i];
+ memcpy(data, m_data16, i * sizeof(UChar));
+ for (unsigned j = i; j != m_length; ++j) {
+ UChar character = m_data16[j];
if (character == target)
character = replacement;
- data[i] = character;
+ data[j] = character;
}
return newImpl;
}
Modified: trunk/Source/WebCore/ChangeLog (259772 => 259773)
--- trunk/Source/WebCore/ChangeLog 2020-04-09 00:42:25 UTC (rev 259772)
+++ trunk/Source/WebCore/ChangeLog 2020-04-09 00:43:49 UTC (rev 259773)
@@ -1,3 +1,55 @@
+2020-04-08 Chris Dumez <[email protected]>
+
+ querySelector("#\u0000") should match an element with ID U+FFFD
+ https://bugs.webkit.org/show_bug.cgi?id=210119
+
+ Reviewed by Darin Adler.
+
+ As per the specification [1][2], we should preprocess the input string before performing
+ CSS tokenization. The preprocessing step replaces certain characters in the input string.
+
+ However, our code did not have this preprocessing step and instead was trying to deal
+ with those characters during tokenization. This is however not working as expected for
+ the '\0' character (which is supposed to be replaced with U+FFFD REPLACEMENT CHARACTER)
+ because our code deals with StringViews of the input String and just converts part of
+ the input stream to Strings / AtomStrings.
+
+ To address the issue, this patch adds a preprocessing step that replaces the '\0'
+ character with the U+FFFD REPLACEMENT CHARACTER). I opted not to replace '\r' or '\f'
+ characters since our tokenizer seems to be dealing fine with those.
+
+ [1] https://drafts.csswg.org/css-syntax/#input-preprocessing
+ [2] https://drafts.csswg.org/css-syntax/#parser-entry-points
+
+ Test: imported/w3c/web-platform-tests/dom/nodes/ParentNode-querySelector-escapes.html
+
+ * css/parser/CSSTokenizer.cpp:
+ (WebCore::preprocessString):
+ (WebCore::CSSTokenizer::CSSTokenizer):
+ (WebCore::CSSTokenizer::lessThan):
+ (WebCore::CSSTokenizer::hyphenMinus):
+ (WebCore::CSSTokenizer::hash):
+ (WebCore::CSSTokenizer::reverseSolidus):
+ (WebCore::CSSTokenizer::letterU):
+ (WebCore::CSSTokenizer::consumeNumber):
+ (WebCore::CSSTokenizer::consumeIdentLikeToken):
+ (WebCore::CSSTokenizer::consumeStringTokenUntil):
+ (WebCore::CSSTokenizer::consumeUnicodeRange):
+ (WebCore::CSSTokenizer::consumeUrlToken):
+ (WebCore::CSSTokenizer::consumeBadUrlRemnants):
+ (WebCore::CSSTokenizer::consumeSingleWhitespaceIfNext):
+ (WebCore::CSSTokenizer::consumeIfNext):
+ (WebCore::CSSTokenizer::consumeName):
+ (WebCore::CSSTokenizer::consumeEscape):
+ (WebCore::CSSTokenizer::nextTwoCharsAreValidEscape):
+ (WebCore::CSSTokenizer::nextCharsAreNumber):
+ (WebCore::CSSTokenizer::nextCharsAreIdentifier):
+ * css/parser/CSSTokenizer.h:
+ * css/parser/CSSTokenizerInputStream.h:
+ (WebCore::CSSTokenizerInputStream::nextInputChar const):
+ (WebCore::CSSTokenizerInputStream::peek const):
+ (WebCore::CSSTokenizerInputStream::peekWithoutReplacement const): Deleted.
+
2020-04-08 Alex Christensen <[email protected]>
_corsDisablingPatterns should allow security policy access to those patterns
Modified: trunk/Source/WebCore/css/parser/CSSTokenizer.cpp (259772 => 259773)
--- trunk/Source/WebCore/css/parser/CSSTokenizer.cpp 2020-04-09 00:42:25 UTC (rev 259772)
+++ trunk/Source/WebCore/css/parser/CSSTokenizer.cpp 2020-04-09 00:43:49 UTC (rev 259773)
@@ -40,17 +40,27 @@
namespace WebCore {
+// See: http://dev.w3.org/csswg/css-syntax/#input-preprocessing
+static String preprocessString(String string)
+{
+ // According to the specification, we should replace '\r' and '\f' with '\n' but we do not need to
+ // because our CSSTokenizer treats all of them as new lines.
+ return string.replace('\0', replacementCharacter);
+}
+
CSSTokenizer::CSSTokenizer(const String& string)
+ : CSSTokenizer(preprocessString(string), nullptr)
+{
+}
+
+CSSTokenizer::CSSTokenizer(const String& string, CSSParserObserverWrapper& wrapper)
+ : CSSTokenizer(preprocessString(string), &wrapper)
+{
+}
+
+inline CSSTokenizer::CSSTokenizer(String&& string, CSSParserObserverWrapper* wrapper)
: m_input(string)
{
- // According to the spec, we should perform preprocessing here.
- // See: http://dev.w3.org/csswg/css-syntax/#input-preprocessing
- //
- // However, we can skip this step since:
- // * We're using HTML spaces (which accept \r and \f as a valid white space)
- // * Do not count white spaces
- // * CSSTokenizerInputStream::nextInputChar() replaces NULLs for replacement characters
-
if (string.isEmpty())
return;
@@ -58,38 +68,26 @@
// Most strings we tokenize have about 3.5 to 5 characters per token.
m_tokens.reserveInitialCapacity(string.length() / 3);
- while (true) {
- CSSParserToken token = nextToken();
- if (token.type() == CommentToken)
- continue;
- if (token.type() == EOFToken)
- return;
- m_tokens.append(token);
- }
-}
-
-CSSTokenizer::CSSTokenizer(const String& string, CSSParserObserverWrapper& wrapper)
- : m_input(string)
-{
- if (string.isEmpty())
- return;
-
unsigned offset = 0;
while (true) {
CSSParserToken token = nextToken();
if (token.type() == EOFToken)
break;
- if (token.type() == CommentToken)
- wrapper.addComment(offset, m_input.offset(), m_tokens.size());
- else {
+ if (token.type() == CommentToken) {
+ if (wrapper)
+ wrapper->addComment(offset, m_input.offset(), m_tokens.size());
+ } else {
m_tokens.append(token);
- wrapper.addToken(offset);
+ if (wrapper)
+ wrapper->addToken(offset);
}
offset = m_input.offset();
}
- wrapper.addToken(offset);
- wrapper.finalizeConstruction(m_tokens.begin());
+ if (wrapper) {
+ wrapper->addToken(offset);
+ wrapper->finalizeConstruction(m_tokens.begin());
+ }
}
CSSParserTokenRange CSSTokenizer::tokenRange() const
@@ -203,9 +201,7 @@
CSSParserToken CSSTokenizer::lessThan(UChar cc)
{
ASSERT_UNUSED(cc, cc == '<');
- if (m_input.peekWithoutReplacement(0) == '!'
- && m_input.peekWithoutReplacement(1) == '-'
- && m_input.peekWithoutReplacement(2) == '-') {
+ if (m_input.peek(0) == '!' && m_input.peek(1) == '-' && m_input.peek(2) == '-') {
m_input.advance(3);
return CSSParserToken(CDOToken);
}
@@ -223,8 +219,7 @@
reconsume(cc);
return consumeNumericToken();
}
- if (m_input.peekWithoutReplacement(0) == '-'
- && m_input.peekWithoutReplacement(1) == '>') {
+ if (m_input.peek(0) == '-' && m_input.peek(1) == '>') {
m_input.advance(2);
return CSSParserToken(CDCToken);
}
@@ -258,8 +253,8 @@
CSSParserToken CSSTokenizer::hash(UChar cc)
{
- UChar nextChar = m_input.peekWithoutReplacement(0);
- if (isNameCodePoint(nextChar) || twoCharsAreValidEscape(nextChar, m_input.peekWithoutReplacement(1))) {
+ UChar nextChar = m_input.peek(0);
+ if (isNameCodePoint(nextChar) || twoCharsAreValidEscape(nextChar, m_input.peek(1))) {
HashTokenType type = nextCharsAreIdentifier() ? HashTokenId : HashTokenUnrestricted;
return CSSParserToken(type, consumeName());
}
@@ -311,7 +306,7 @@
CSSParserToken CSSTokenizer::reverseSolidus(UChar cc)
{
- if (twoCharsAreValidEscape(cc, m_input.peekWithoutReplacement(0))) {
+ if (twoCharsAreValidEscape(cc, m_input.peek(0))) {
reconsume(cc);
return consumeIdentLikeToken();
}
@@ -326,8 +321,7 @@
CSSParserToken CSSTokenizer::letterU(UChar cc)
{
- if (m_input.peekWithoutReplacement(0) == '+'
- && (isASCIIHexDigit(m_input.peekWithoutReplacement(1)) || m_input.peekWithoutReplacement(1) == '?')) {
+ if (m_input.peek(0) == '+' && (isASCIIHexDigit(m_input.peek(1)) || m_input.peek(1) == '?')) {
m_input.advance();
return consumeUnicodeRange();
}
@@ -519,7 +513,7 @@
NumericSign sign = NoSign;
unsigned numberLength = 0;
- UChar next = m_input.peekWithoutReplacement(0);
+ UChar next = m_input.peek(0);
if (next == '+') {
++numberLength;
sign = PlusSign;
@@ -529,19 +523,19 @@
}
numberLength = m_input.skipWhilePredicate<isASCIIDigit>(numberLength);
- next = m_input.peekWithoutReplacement(numberLength);
- if (next == '.' && isASCIIDigit(m_input.peekWithoutReplacement(numberLength + 1))) {
+ next = m_input.peek(numberLength);
+ if (next == '.' && isASCIIDigit(m_input.peek(numberLength + 1))) {
type = NumberValueType;
numberLength = m_input.skipWhilePredicate<isASCIIDigit>(numberLength + 2);
- next = m_input.peekWithoutReplacement(numberLength);
+ next = m_input.peek(numberLength);
}
if (next == 'E' || next == 'e') {
- next = m_input.peekWithoutReplacement(numberLength + 1);
+ next = m_input.peek(numberLength + 1);
if (isASCIIDigit(next)) {
type = NumberValueType;
numberLength = m_input.skipWhilePredicate<isASCIIDigit>(numberLength + 1);
- } else if ((next == '+' || next == '-') && isASCIIDigit(m_input.peekWithoutReplacement(numberLength + 2))) {
+ } else if ((next == '+' || next == '-') && isASCIIDigit(m_input.peek(numberLength + 2))) {
type = NumberValueType;
numberLength = m_input.skipWhilePredicate<isASCIIDigit>(numberLength + 3);
}
@@ -573,7 +567,7 @@
// The spec is slightly different so as to avoid dropping whitespace
// tokens, but they wouldn't be used and this is easier.
m_input.advanceUntilNonWhitespace();
- UChar next = m_input.peekWithoutReplacement(0);
+ UChar next = m_input.peek(0);
if (next != '"' && next != '\'')
return consumeUrlToken();
}
@@ -587,7 +581,7 @@
{
// Strings without escapes get handled without allocations
for (unsigned size = 0; ; size++) {
- UChar cc = m_input.peekWithoutReplacement(size);
+ UChar cc = m_input.peek(size);
if (cc == endingCodePoint) {
unsigned startOffset = m_input.offset();
m_input.advance(size + 1);
@@ -597,7 +591,7 @@
m_input.advance(size);
return CSSParserToken(BadStringToken);
}
- if (cc == '\0' || cc == '\\')
+ if (cc == kEndOfFileMarker || cc == '\\')
break;
}
@@ -613,7 +607,7 @@
if (cc == '\\') {
if (m_input.nextInputChar() == kEndOfFileMarker)
continue;
- if (isNewLine(m_input.peekWithoutReplacement(0)))
+ if (isNewLine(m_input.peek(0)))
consumeSingleWhitespaceIfNext(); // This handles \r\n for us
else
output.appendCharacter(consumeEscape());
@@ -624,11 +618,11 @@
CSSParserToken CSSTokenizer::consumeUnicodeRange()
{
- ASSERT(isASCIIHexDigit(m_input.peekWithoutReplacement(0)) || m_input.peekWithoutReplacement(0) == '?');
+ ASSERT(isASCIIHexDigit(m_input.peek(0)) || m_input.peek(0) == '?');
int lengthRemaining = 6;
UChar32 start = 0;
- while (lengthRemaining && isASCIIHexDigit(m_input.peekWithoutReplacement(0))) {
+ while (lengthRemaining && isASCIIHexDigit(m_input.peek(0))) {
start = start * 16 + toASCIIHexValue(consume());
--lengthRemaining;
}
@@ -640,7 +634,7 @@
end = end * 16 + 0xF;
--lengthRemaining;
} while (lengthRemaining && consumeIfNext('?'));
- } else if (m_input.peekWithoutReplacement(0) == '-' && isASCIIHexDigit(m_input.peekWithoutReplacement(1))) {
+ } else if (m_input.peek(0) == '-' && isASCIIHexDigit(m_input.peek(1))) {
m_input.advance();
lengthRemaining = 6;
end = 0;
@@ -647,7 +641,7 @@
do {
end = end * 16 + toASCIIHexValue(consume());
--lengthRemaining;
- } while (lengthRemaining && isASCIIHexDigit(m_input.peekWithoutReplacement(0)));
+ } while (lengthRemaining && isASCIIHexDigit(m_input.peek(0)));
}
return CSSParserToken(UnicodeRangeToken, start, end);
@@ -666,7 +660,7 @@
// URL tokens without escapes get handled without allocations
for (unsigned size = 0; ; size++) {
- UChar cc = m_input.peekWithoutReplacement(size);
+ UChar cc = m_input.peek(size);
if (cc == ')') {
unsigned startOffset = m_input.offset();
m_input.advance(size + 1);
@@ -693,7 +687,7 @@
break;
if (cc == '\\') {
- if (twoCharsAreValidEscape(cc, m_input.peekWithoutReplacement(0))) {
+ if (twoCharsAreValidEscape(cc, m_input.peek(0))) {
result.appendCharacter(consumeEscape());
continue;
}
@@ -714,7 +708,7 @@
UChar cc = consume();
if (cc == ')' || cc == kEndOfFileMarker)
return;
- if (twoCharsAreValidEscape(cc, m_input.peekWithoutReplacement(0)))
+ if (twoCharsAreValidEscape(cc, m_input.peek(0)))
consumeEscape();
}
}
@@ -722,8 +716,8 @@
void CSSTokenizer::consumeSingleWhitespaceIfNext()
{
// We check for \r\n and HTML spaces since we don't do preprocessing
- UChar next = m_input.peekWithoutReplacement(0);
- if (next == '\r' && m_input.peekWithoutReplacement(1) == '\n')
+ UChar next = m_input.peek(0);
+ if (next == '\r' && m_input.peek(1) == '\n')
m_input.advance(2);
else if (isHTMLSpace(next))
m_input.advance();
@@ -751,7 +745,7 @@
// a NUL in the middle and the kEndOfFileMarker, so character must not be
// NUL.
ASSERT(character);
- if (m_input.peekWithoutReplacement(0) == character) {
+ if (m_input.peek(0) == character) {
m_input.advance();
return true;
}
@@ -763,13 +757,13 @@
{
// Names without escapes get handled without allocations
for (unsigned size = 0; ; ++size) {
- UChar cc = m_input.peekWithoutReplacement(size);
+ UChar cc = m_input.peek(size);
if (isNameCodePoint(cc))
continue;
- // peekWithoutReplacement will return NUL when we hit the end of the
+ // peek will return NUL when we hit the end of the
// input. In that case we want to still use the rangeAt() fast path
// below.
- if (cc == '\0' && m_input.offset() + size < m_input.length())
+ if (cc == kEndOfFileMarker && m_input.offset() + size < m_input.length())
break;
if (cc == '\\')
break;
@@ -785,7 +779,7 @@
result.append(cc);
continue;
}
- if (twoCharsAreValidEscape(cc, m_input.peekWithoutReplacement(0))) {
+ if (twoCharsAreValidEscape(cc, m_input.peek(0))) {
result.appendCharacter(consumeEscape());
continue;
}
@@ -803,7 +797,7 @@
unsigned consumedHexDigits = 1;
StringBuilder hexChars;
hexChars.append(cc);
- while (consumedHexDigits < 6 && isASCIIHexDigit(m_input.peekWithoutReplacement(0))) {
+ while (consumedHexDigits < 6 && isASCIIHexDigit(m_input.peek(0))) {
cc = consume();
hexChars.append(cc);
consumedHexDigits++;
@@ -824,17 +818,17 @@
bool CSSTokenizer::nextTwoCharsAreValidEscape()
{
- return twoCharsAreValidEscape(m_input.peekWithoutReplacement(0), m_input.peekWithoutReplacement(1));
+ return twoCharsAreValidEscape(m_input.peek(0), m_input.peek(1));
}
// http://www.w3.org/TR/css3-syntax/#starts-with-a-number
bool CSSTokenizer::nextCharsAreNumber(UChar first)
{
- UChar second = m_input.peekWithoutReplacement(0);
+ UChar second = m_input.peek(0);
if (isASCIIDigit(first))
return true;
if (first == '+' || first == '-')
- return ((isASCIIDigit(second)) || (second == '.' && isASCIIDigit(m_input.peekWithoutReplacement(1))));
+ return ((isASCIIDigit(second)) || (second == '.' && isASCIIDigit(m_input.peek(1))));
if (first =='.')
return (isASCIIDigit(second));
return false;
@@ -851,7 +845,7 @@
// http://dev.w3.org/csswg/css-syntax/#would-start-an-identifier
bool CSSTokenizer::nextCharsAreIdentifier(UChar first)
{
- UChar second = m_input.peekWithoutReplacement(0);
+ UChar second = m_input.peek(0);
if (isNameStartCodePoint(first) || twoCharsAreValidEscape(first, second))
return true;
Modified: trunk/Source/WebCore/css/parser/CSSTokenizer.h (259772 => 259773)
--- trunk/Source/WebCore/css/parser/CSSTokenizer.h 2020-04-09 00:42:25 UTC (rev 259772)
+++ trunk/Source/WebCore/css/parser/CSSTokenizer.h 2020-04-09 00:43:49 UTC (rev 259773)
@@ -54,6 +54,8 @@
Vector<String>&& escapedStringsForAdoption() { return WTFMove(m_stringPool); }
private:
+ CSSTokenizer(String&&, CSSParserObserverWrapper*);
+
CSSParserToken nextToken();
UChar consume();
Modified: trunk/Source/WebCore/css/parser/CSSTokenizerInputStream.h (259772 => 259773)
--- trunk/Source/WebCore/css/parser/CSSTokenizerInputStream.h 2020-04-09 00:42:25 UTC (rev 259772)
+++ trunk/Source/WebCore/css/parser/CSSTokenizerInputStream.h 2020-04-09 00:43:49 UTC (rev 259773)
@@ -47,19 +47,16 @@
UChar nextInputChar() const
{
if (m_offset >= m_stringLength)
- return '\0';
- UChar result = (*m_string)[m_offset];
- return result ? result : 0xFFFD;
+ return kEndOfFileMarker;
+ return (*m_string)[m_offset];
}
// Gets the char at lookaheadOffset from the current stream position. Will
// return NUL (kEndOfFileMarker) if the stream position is at the end.
- // NOTE: This may *also* return NUL if there's one in the input! Never
- // compare the return value to '\0'.
- UChar peekWithoutReplacement(unsigned lookaheadOffset) const
+ UChar peek(unsigned lookaheadOffset) const
{
if ((m_offset + lookaheadOffset) >= m_stringLength)
- return '\0';
+ return kEndOfFileMarker;
return (*m_string)[m_offset + lookaheadOffset];
}