Reviewers: Dmitry Lomov (chromium), marja,

Message:
PTAL

I'm tempted to use a template<bool> so that the overhead for ordinary strings
would be 0. WDYT?

Description:
Make template scan related function take a parameter

This is for performance. Having to do the test in every Advance was too
expensive.

BUG=438991, v8:3230
LOG=N
[email protected], marja

Please review this at https://codereview.chromium.org/766193003/

Base URL: https://chromium.googlesource.com/v8/v8.git@master

Affected files (+43, -43 lines):
  M src/scanner.h
  M src/scanner.cc


Index: src/scanner.cc
diff --git a/src/scanner.cc b/src/scanner.cc
index 561c30b58a12e0cb2c5e54d16bfc2e1572c9cd3e..0286755c96757a47484d649cea4c93a448b7f23a 100644
--- a/src/scanner.cc
+++ b/src/scanner.cc
@@ -34,7 +34,6 @@ Handle<String> LiteralBuffer::Internalize(Isolate* isolate) const {

 Scanner::Scanner(UnicodeCache* unicode_cache)
     : unicode_cache_(unicode_cache),
-      capturing_raw_literal_(false),
       octal_pos_(Location::invalid()),
       harmony_scoping_(false),
       harmony_modules_(false),
@@ -57,7 +56,7 @@ void Scanner::Initialize(Utf16CharacterStream* source) {
 }


-uc32 Scanner::ScanHexNumber(int expected_length) {
+uc32 Scanner::ScanHexNumber(int expected_length, bool capture_raw) {
   DCHECK(expected_length <= 4);  // prevent overflow

   uc32 x = 0;
@@ -67,14 +66,14 @@ uc32 Scanner::ScanHexNumber(int expected_length) {
       return -1;
     }
     x = x * 16 + d;
-    Advance();
+    AdvanceMaybeRaw(capture_raw);
   }

   return x;
 }


-uc32 Scanner::ScanUnlimitedLengthHexNumber(int max_value) {
+uc32 Scanner::ScanUnlimitedLengthHexNumber(int max_value, bool capture_raw) {
   uc32 x = 0;
   int d = HexValue(c0_);
   if (d < 0) {
@@ -83,7 +82,7 @@ uc32 Scanner::ScanUnlimitedLengthHexNumber(int max_value) {
   while (d >= 0) {
     x = x * 16 + d;
     if (x > max_value) return -1;
-    Advance();
+    AdvanceMaybeRaw(capture_raw);
     d = HexValue(c0_);
   }
   return x;
@@ -696,16 +695,16 @@ void Scanner::SeekForward(int pos) {
 }


-bool Scanner::ScanEscape() {
+bool Scanner::ScanEscape(bool capture_raw) {
   uc32 c = c0_;
-  Advance();
+  AdvanceMaybeRaw(capture_raw);

   // Skip escaped newlines.
   if (c0_ >= 0 && unicode_cache_->IsLineTerminator(c)) {
     // Allow CR+LF newlines in multiline string literals.
-    if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance();
+ if (IsCarriageReturn(c) && IsLineFeed(c0_)) AdvanceMaybeRaw(capture_raw);
     // Allow LF+CR newlines in multiline string literals.
-    if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance();
+ if (IsLineFeed(c) && IsCarriageReturn(c0_)) AdvanceMaybeRaw(capture_raw);
     return true;
   }

@@ -719,13 +718,13 @@ bool Scanner::ScanEscape() {
     case 'r' : c = '\r'; break;
     case 't' : c = '\t'; break;
     case 'u' : {
-      c = ScanUnicodeEscape();
+      c = ScanUnicodeEscape(capture_raw);
       if (c < 0) return false;
       break;
     }
     case 'v' : c = '\v'; break;
     case 'x' : {
-      c = ScanHexNumber(2);
+      c = ScanHexNumber(2, capture_raw);
       if (c < 0) return false;
       break;
     }
@@ -782,7 +781,7 @@ Token::Value Scanner::ScanString() {
     uc32 c = c0_;
     Advance();
     if (c == '\\') {
-      if (c0_ < 0 || !ScanEscape()) return Token::ILLEGAL;
+      if (c0_ < 0 || !ScanEscape(false)) return Token::ILLEGAL;
     } else {
       AddLiteralChar(c);
     }
@@ -814,13 +813,13 @@ Token::Value Scanner::ScanTemplateSpan() {

   while (true) {
     uc32 c = c0_;
-    Advance();
+    AdvanceRaw();
     if (c == '`') {
       result = Token::TEMPLATE_TAIL;
       ReduceRawLiteralLength(1);
       break;
     } else if (c == '$' && c0_ == '{') {
-      Advance();  // Consume '{'
+      AdvanceRaw();  // Consume '{'
       ReduceRawLiteralLength(2);
       break;
     } else if (c == '\\') {
@@ -828,20 +827,20 @@ Token::Value Scanner::ScanTemplateSpan() {
// The TV of LineContinuation :: \ LineTerminatorSequence is the empty
         // code unit sequence.
         uc32 lastChar = c0_;
-        Advance();
+        AdvanceRaw();
         if (lastChar == '\r') {
           ReduceRawLiteralLength(1);  // Remove \r
           if (c0_ == '\n') {
-            Advance();  // Adds \n
+            AdvanceRaw();  // Adds \n
           } else {
             AddRawLiteralChar('\n');
           }
         }
       } else if (c0_ == '0') {
-        Advance();
+        AdvanceRaw();
         AddLiteralChar('0');
       } else {
-        ScanEscape();
+        ScanEscape(true);
       }
     } else if (c < 0) {
       // Unterminated template literal
@@ -854,7 +853,7 @@ Token::Value Scanner::ScanTemplateSpan() {
       if (c == '\r') {
         ReduceRawLiteralLength(1);  // Remove \r
         if (c0_ == '\n') {
-          Advance();  // Adds \n
+          AdvanceRaw();  // Adds \n
         } else {
           AddRawLiteralChar('\n');
         }
@@ -1002,27 +1001,27 @@ uc32 Scanner::ScanIdentifierUnicodeEscape() {
   Advance();
   if (c0_ != 'u') return -1;
   Advance();
-  return ScanUnicodeEscape();
+  return ScanUnicodeEscape(false);
 }


-uc32 Scanner::ScanUnicodeEscape() {
+uc32 Scanner::ScanUnicodeEscape(bool capture_raw) {
   // Accept both \uxxxx and \u{xxxxxx} (if harmony unicode escapes are
   // allowed). In the latter case, the number of hex digits between { } is
   // arbitrary. \ and u have already been read.
   if (c0_ == '{' && HarmonyUnicode()) {
-    Advance();
-    uc32 cp = ScanUnlimitedLengthHexNumber(0x10ffff);
+    AdvanceMaybeRaw(capture_raw);
+    uc32 cp = ScanUnlimitedLengthHexNumber(0x10ffff, capture_raw);
     if (cp < 0) {
       return -1;
     }
     if (c0_ != '}') {
       return -1;
     }
-    Advance();
+    AdvanceMaybeRaw(capture_raw);
     return cp;
   }
-  return ScanHexNumber(4);
+  return ScanHexNumber(4, capture_raw);
 }


Index: src/scanner.h
diff --git a/src/scanner.h b/src/scanner.h
index 87ff20b753a0f4314dc0553d2ea3ac7847789811..c476831b3213d4b5f44a33e259986fb127a6fc8e 100644
--- a/src/scanner.h
+++ b/src/scanner.h
@@ -529,7 +529,6 @@ class Scanner {
   inline void StartRawLiteral() {
     raw_literal_buffer_.Reset();
     next_.raw_literal_chars = &raw_literal_buffer_;
-    capturing_raw_literal_ = true;
   }

   INLINE(void AddLiteralChar(uc32 c)) {
@@ -538,26 +537,25 @@ class Scanner {
   }

   INLINE(void AddRawLiteralChar(uc32 c)) {
-    DCHECK(capturing_raw_literal_);
     DCHECK_NOT_NULL(next_.raw_literal_chars);
     next_.raw_literal_chars->AddChar(c);
   }

   INLINE(void ReduceRawLiteralLength(int delta)) {
-    DCHECK(capturing_raw_literal_);
     DCHECK_NOT_NULL(next_.raw_literal_chars);
     next_.raw_literal_chars->ReduceLength(delta);
   }

   // Complete scanning of a literal.
-  inline void TerminateLiteral() { capturing_raw_literal_ = false; }
+  inline void TerminateLiteral() {
+    // Does nothing in the current implementation.
+  }

   // Stops scanning of a literal and drop the collected characters,
   // e.g., due to an encountered error.
   inline void DropLiteral() {
     next_.literal_chars = NULL;
     next_.raw_literal_chars = NULL;
-    capturing_raw_literal_ = false;
   }

   inline void AddLiteralCharAdvance() {
@@ -567,9 +565,6 @@ class Scanner {

   // Low-level scanning support.
   void Advance() {
-    if (capturing_raw_literal_) {
-      AddRawLiteralChar(c0_);
-    }
     c0_ = source_->Advance();
     if (unibrow::Utf16::IsLeadSurrogate(c0_)) {
       uc32 c1 = source_->Advance();
@@ -581,14 +576,24 @@ class Scanner {
     }
   }

+  void AdvanceRaw() {
+    AddRawLiteralChar(c0_);
+    Advance();
+  }
+
+  void AdvanceMaybeRaw(bool capture_raw) {
+    if (capture_raw) {
+      AddRawLiteralChar(c0_);
+    }
+    Advance();
+  }
+
   void PushBack(uc32 ch) {
     if (ch > static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) {
       source_->PushBack(unibrow::Utf16::TrailSurrogate(c0_));
       source_->PushBack(unibrow::Utf16::LeadSurrogate(c0_));
-      if (capturing_raw_literal_) ReduceRawLiteralLength(2);
     } else {
       source_->PushBack(c0_);
-      if (capturing_raw_literal_) ReduceRawLiteralLength(1);
     }
     c0_ = ch;
   }
@@ -659,11 +664,11 @@ class Scanner {
   }


-  uc32 ScanHexNumber(int expected_length);
+  uc32 ScanHexNumber(int expected_length, bool capture_raw);
// Scan a number of any length but not bigger than max_value. For example, the // number can be 000000001, so it's very long in characters but its value is
   // small.
-  uc32 ScanUnlimitedLengthHexNumber(int max_value);
+  uc32 ScanUnlimitedLengthHexNumber(int max_value, bool capture_raw);

   // Scans a single JavaScript token.
   void Scan();
@@ -686,12 +691,12 @@ class Scanner {
   // Scans an escape-sequence which is part of a string and adds the
   // decoded character to the current literal. Returns true if a pattern
   // is scanned.
-  bool ScanEscape();
+  bool ScanEscape(bool capture_raw);
   // Decodes a Unicode escape-sequence which is part of an identifier.
   // If the escape sequence cannot be decoded the result is kBadChar.
   uc32 ScanIdentifierUnicodeEscape();
   // Helper for the above functions.
-  uc32 ScanUnicodeEscape();
+  uc32 ScanUnicodeEscape(bool capture_raw);

   Token::Value ScanTemplateSpan();

@@ -713,10 +718,6 @@ class Scanner {
   // Buffer to store raw string values
   LiteralBuffer raw_literal_buffer_;

-  // We only need to capture the raw literal when we are scanning template
-  // literal spans.
-  bool capturing_raw_literal_;
-
   TokenDesc current_;  // desc for current token (as returned by Next())
   TokenDesc next_;     // desc for next token (one token look-ahead)



--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
--- You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/d/optout.

Reply via email to