Reviewers: Dmitry Lomov (chromium), marja,
Message:
PTAL
I'm tempted to use a template<bool> so that the overhead for ordinary
strings
would be 0. WDYT?
Description:
Make template scan related function take a parameter
This is for performance. Having to do the test in every Advance was too
expensive.
BUG=438991, v8:3230
LOG=N
[email protected], marja
Please review this at https://codereview.chromium.org/766193003/
Base URL: https://chromium.googlesource.com/v8/v8.git@master
Affected files (+43, -43 lines):
M src/scanner.h
M src/scanner.cc
Index: src/scanner.cc
diff --git a/src/scanner.cc b/src/scanner.cc
index
561c30b58a12e0cb2c5e54d16bfc2e1572c9cd3e..0286755c96757a47484d649cea4c93a448b7f23a
100644
--- a/src/scanner.cc
+++ b/src/scanner.cc
@@ -34,7 +34,6 @@ Handle<String> LiteralBuffer::Internalize(Isolate*
isolate) const {
Scanner::Scanner(UnicodeCache* unicode_cache)
: unicode_cache_(unicode_cache),
- capturing_raw_literal_(false),
octal_pos_(Location::invalid()),
harmony_scoping_(false),
harmony_modules_(false),
@@ -57,7 +56,7 @@ void Scanner::Initialize(Utf16CharacterStream* source) {
}
-uc32 Scanner::ScanHexNumber(int expected_length) {
+uc32 Scanner::ScanHexNumber(int expected_length, bool capture_raw) {
DCHECK(expected_length <= 4); // prevent overflow
uc32 x = 0;
@@ -67,14 +66,14 @@ uc32 Scanner::ScanHexNumber(int expected_length) {
return -1;
}
x = x * 16 + d;
- Advance();
+ AdvanceMaybeRaw(capture_raw);
}
return x;
}
-uc32 Scanner::ScanUnlimitedLengthHexNumber(int max_value) {
+uc32 Scanner::ScanUnlimitedLengthHexNumber(int max_value, bool
capture_raw) {
uc32 x = 0;
int d = HexValue(c0_);
if (d < 0) {
@@ -83,7 +82,7 @@ uc32 Scanner::ScanUnlimitedLengthHexNumber(int max_value)
{
while (d >= 0) {
x = x * 16 + d;
if (x > max_value) return -1;
- Advance();
+ AdvanceMaybeRaw(capture_raw);
d = HexValue(c0_);
}
return x;
@@ -696,16 +695,16 @@ void Scanner::SeekForward(int pos) {
}
-bool Scanner::ScanEscape() {
+bool Scanner::ScanEscape(bool capture_raw) {
uc32 c = c0_;
- Advance();
+ AdvanceMaybeRaw(capture_raw);
// Skip escaped newlines.
if (c0_ >= 0 && unicode_cache_->IsLineTerminator(c)) {
// Allow CR+LF newlines in multiline string literals.
- if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance();
+ if (IsCarriageReturn(c) && IsLineFeed(c0_))
AdvanceMaybeRaw(capture_raw);
// Allow LF+CR newlines in multiline string literals.
- if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance();
+ if (IsLineFeed(c) && IsCarriageReturn(c0_))
AdvanceMaybeRaw(capture_raw);
return true;
}
@@ -719,13 +718,13 @@ bool Scanner::ScanEscape() {
case 'r' : c = '\r'; break;
case 't' : c = '\t'; break;
case 'u' : {
- c = ScanUnicodeEscape();
+ c = ScanUnicodeEscape(capture_raw);
if (c < 0) return false;
break;
}
case 'v' : c = '\v'; break;
case 'x' : {
- c = ScanHexNumber(2);
+ c = ScanHexNumber(2, capture_raw);
if (c < 0) return false;
break;
}
@@ -782,7 +781,7 @@ Token::Value Scanner::ScanString() {
uc32 c = c0_;
Advance();
if (c == '\\') {
- if (c0_ < 0 || !ScanEscape()) return Token::ILLEGAL;
+ if (c0_ < 0 || !ScanEscape(false)) return Token::ILLEGAL;
} else {
AddLiteralChar(c);
}
@@ -814,13 +813,13 @@ Token::Value Scanner::ScanTemplateSpan() {
while (true) {
uc32 c = c0_;
- Advance();
+ AdvanceRaw();
if (c == '`') {
result = Token::TEMPLATE_TAIL;
ReduceRawLiteralLength(1);
break;
} else if (c == '$' && c0_ == '{') {
- Advance(); // Consume '{'
+ AdvanceRaw(); // Consume '{'
ReduceRawLiteralLength(2);
break;
} else if (c == '\\') {
@@ -828,20 +827,20 @@ Token::Value Scanner::ScanTemplateSpan() {
// The TV of LineContinuation :: \ LineTerminatorSequence is the
empty
// code unit sequence.
uc32 lastChar = c0_;
- Advance();
+ AdvanceRaw();
if (lastChar == '\r') {
ReduceRawLiteralLength(1); // Remove \r
if (c0_ == '\n') {
- Advance(); // Adds \n
+ AdvanceRaw(); // Adds \n
} else {
AddRawLiteralChar('\n');
}
}
} else if (c0_ == '0') {
- Advance();
+ AdvanceRaw();
AddLiteralChar('0');
} else {
- ScanEscape();
+ ScanEscape(true);
}
} else if (c < 0) {
// Unterminated template literal
@@ -854,7 +853,7 @@ Token::Value Scanner::ScanTemplateSpan() {
if (c == '\r') {
ReduceRawLiteralLength(1); // Remove \r
if (c0_ == '\n') {
- Advance(); // Adds \n
+ AdvanceRaw(); // Adds \n
} else {
AddRawLiteralChar('\n');
}
@@ -1002,27 +1001,27 @@ uc32 Scanner::ScanIdentifierUnicodeEscape() {
Advance();
if (c0_ != 'u') return -1;
Advance();
- return ScanUnicodeEscape();
+ return ScanUnicodeEscape(false);
}
-uc32 Scanner::ScanUnicodeEscape() {
+uc32 Scanner::ScanUnicodeEscape(bool capture_raw) {
// Accept both \uxxxx and \u{xxxxxx} (if harmony unicode escapes are
// allowed). In the latter case, the number of hex digits between { } is
// arbitrary. \ and u have already been read.
if (c0_ == '{' && HarmonyUnicode()) {
- Advance();
- uc32 cp = ScanUnlimitedLengthHexNumber(0x10ffff);
+ AdvanceMaybeRaw(capture_raw);
+ uc32 cp = ScanUnlimitedLengthHexNumber(0x10ffff, capture_raw);
if (cp < 0) {
return -1;
}
if (c0_ != '}') {
return -1;
}
- Advance();
+ AdvanceMaybeRaw(capture_raw);
return cp;
}
- return ScanHexNumber(4);
+ return ScanHexNumber(4, capture_raw);
}
Index: src/scanner.h
diff --git a/src/scanner.h b/src/scanner.h
index
87ff20b753a0f4314dc0553d2ea3ac7847789811..c476831b3213d4b5f44a33e259986fb127a6fc8e
100644
--- a/src/scanner.h
+++ b/src/scanner.h
@@ -529,7 +529,6 @@ class Scanner {
inline void StartRawLiteral() {
raw_literal_buffer_.Reset();
next_.raw_literal_chars = &raw_literal_buffer_;
- capturing_raw_literal_ = true;
}
INLINE(void AddLiteralChar(uc32 c)) {
@@ -538,26 +537,25 @@ class Scanner {
}
INLINE(void AddRawLiteralChar(uc32 c)) {
- DCHECK(capturing_raw_literal_);
DCHECK_NOT_NULL(next_.raw_literal_chars);
next_.raw_literal_chars->AddChar(c);
}
INLINE(void ReduceRawLiteralLength(int delta)) {
- DCHECK(capturing_raw_literal_);
DCHECK_NOT_NULL(next_.raw_literal_chars);
next_.raw_literal_chars->ReduceLength(delta);
}
// Complete scanning of a literal.
- inline void TerminateLiteral() { capturing_raw_literal_ = false; }
+ inline void TerminateLiteral() {
+ // Does nothing in the current implementation.
+ }
// Stops scanning of a literal and drop the collected characters,
// e.g., due to an encountered error.
inline void DropLiteral() {
next_.literal_chars = NULL;
next_.raw_literal_chars = NULL;
- capturing_raw_literal_ = false;
}
inline void AddLiteralCharAdvance() {
@@ -567,9 +565,6 @@ class Scanner {
// Low-level scanning support.
void Advance() {
- if (capturing_raw_literal_) {
- AddRawLiteralChar(c0_);
- }
c0_ = source_->Advance();
if (unibrow::Utf16::IsLeadSurrogate(c0_)) {
uc32 c1 = source_->Advance();
@@ -581,14 +576,24 @@ class Scanner {
}
}
+ void AdvanceRaw() {
+ AddRawLiteralChar(c0_);
+ Advance();
+ }
+
+ void AdvanceMaybeRaw(bool capture_raw) {
+ if (capture_raw) {
+ AddRawLiteralChar(c0_);
+ }
+ Advance();
+ }
+
void PushBack(uc32 ch) {
if (ch > static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) {
source_->PushBack(unibrow::Utf16::TrailSurrogate(c0_));
source_->PushBack(unibrow::Utf16::LeadSurrogate(c0_));
- if (capturing_raw_literal_) ReduceRawLiteralLength(2);
} else {
source_->PushBack(c0_);
- if (capturing_raw_literal_) ReduceRawLiteralLength(1);
}
c0_ = ch;
}
@@ -659,11 +664,11 @@ class Scanner {
}
- uc32 ScanHexNumber(int expected_length);
+ uc32 ScanHexNumber(int expected_length, bool capture_raw);
// Scan a number of any length but not bigger than max_value. For
example, the
// number can be 000000001, so it's very long in characters but its
value is
// small.
- uc32 ScanUnlimitedLengthHexNumber(int max_value);
+ uc32 ScanUnlimitedLengthHexNumber(int max_value, bool capture_raw);
// Scans a single JavaScript token.
void Scan();
@@ -686,12 +691,12 @@ class Scanner {
// Scans an escape-sequence which is part of a string and adds the
// decoded character to the current literal. Returns true if a pattern
// is scanned.
- bool ScanEscape();
+ bool ScanEscape(bool capture_raw);
// Decodes a Unicode escape-sequence which is part of an identifier.
// If the escape sequence cannot be decoded the result is kBadChar.
uc32 ScanIdentifierUnicodeEscape();
// Helper for the above functions.
- uc32 ScanUnicodeEscape();
+ uc32 ScanUnicodeEscape(bool capture_raw);
Token::Value ScanTemplateSpan();
@@ -713,10 +718,6 @@ class Scanner {
// Buffer to store raw string values
LiteralBuffer raw_literal_buffer_;
- // We only need to capture the raw literal when we are scanning template
- // literal spans.
- bool capturing_raw_literal_;
-
TokenDesc current_; // desc for current token (as returned by Next())
TokenDesc next_; // desc for next token (one token look-ahead)
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/d/optout.