Revision: 5327
Author: [email protected]
Date: Tue Aug 24 05:29:50 2010
Log: Ensure that scanner state is correctly reset when an error is encountered.

Add a scope object to ensure that leaving a literal scanning prematurely
will clean up after itself.
Also reset the literal buffer if a scanner is reinitialized with a new
source code.

Review URL: http://codereview.chromium.org/3137037
http://code.google.com/p/v8/source/detail?r=5327

Modified:
 /branches/bleeding_edge/src/scanner.cc
 /branches/bleeding_edge/src/scanner.h
 /branches/bleeding_edge/src/utils.h

=======================================
--- /branches/bleeding_edge/src/scanner.cc      Tue Aug 24 03:53:44 2010
+++ /branches/bleeding_edge/src/scanner.cc      Tue Aug 24 05:29:50 2010
@@ -317,7 +317,27 @@
   // On fallthrough, it's a failure.
   state_ = UNMATCHABLE;
 }
-
+
+
+
+// ----------------------------------------------------------------------------
+// Scanner::LiteralScope
+
+Scanner::LiteralScope::LiteralScope(Scanner* self)
+    : scanner_(self), complete_(false) {
+  self->StartLiteral();
+}
+
+
+Scanner::LiteralScope::~LiteralScope() {
+  if (!complete_) scanner_->DropLiteral();
+}
+
+
+void Scanner::LiteralScope::Complete() {
+  scanner_->TerminateLiteral();
+  complete_ = true;
+}

// ----------------------------------------------------------------------------
 // Scanner
@@ -386,8 +406,10 @@
   // Set c0_ (one character ahead)
   ASSERT(kCharacterLookaheadBufferSize == 1);
   Advance();
-  // Initialise current_ to not refer to a literal.
+  // Initialize current_ to not refer to a literal.
   current_.literal_chars = Vector<const char>();
+  // Reset literal buffer.
+  literal_buffer_.Reset();

   // Skip initial whitespace allowing HTML comment ends just like
   // after a newline and scan first token.
@@ -422,10 +444,16 @@
 void Scanner::AddChar(uc32 c) {
   literal_buffer_.AddChar(c);
 }
+

 void Scanner::TerminateLiteral() {
   next_.literal_chars = literal_buffer_.EndLiteral();
 }
+
+
+void Scanner::DropLiteral() {
+  literal_buffer_.DropLiteral();
+}


 void Scanner::AddCharAdvance() {
@@ -636,7 +664,7 @@
 Token::Value Scanner::ScanJsonString() {
   ASSERT_EQ('"', c0_);
   Advance();
-  StartLiteral();
+  LiteralScope literal(this);
   while (c0_ != '"' && c0_ > 0) {
     // Check for control character (0x00-0x1f) or unterminated string (<0).
     if (c0_ < 0x20) return Token::ILLEGAL;
@@ -670,7 +698,9 @@
           for (int i = 0; i < 4; i++) {
             Advance();
             int digit = HexValue(c0_);
-            if (digit < 0) return Token::ILLEGAL;
+            if (digit < 0) {
+              return Token::ILLEGAL;
+            }
             value = value * 16 + digit;
           }
           AddChar(value);
@@ -685,14 +715,14 @@
   if (c0_ != '"') {
     return Token::ILLEGAL;
   }
-  TerminateLiteral();
+  literal.Complete();
   Advance();
   return Token::STRING;
 }


 Token::Value Scanner::ScanJsonNumber() {
-  StartLiteral();
+  LiteralScope literal(this);
   if (c0_ == '-') AddCharAdvance();
   if (c0_ == '0') {
     AddCharAdvance();
@@ -720,21 +750,21 @@
       AddCharAdvance();
     } while (c0_ >= '0' && c0_ <= '9');
   }
-  TerminateLiteral();
+  literal.Complete();
   return Token::NUMBER;
 }


 Token::Value Scanner::ScanJsonIdentifier(const char* text,
                                          Token::Value token) {
-  StartLiteral();
+  LiteralScope literal(this);
   while (*text != '\0') {
     if (c0_ != *text) return Token::ILLEGAL;
     Advance();
     text++;
   }
   if (kIsIdentifierPart.get(c0_)) return Token::ILLEGAL;
-  TerminateLiteral();
+  literal.Complete();
   return token;
 }

@@ -1077,7 +1107,7 @@
   uc32 quote = c0_;
   Advance();  // consume quote

-  StartLiteral();
+  LiteralScope literal(this);
   while (c0_ != quote && c0_ >= 0 && !kIsLineTerminator.get(c0_)) {
     uc32 c = c0_;
     Advance();
@@ -1088,10 +1118,8 @@
       AddChar(c);
     }
   }
-  if (c0_ != quote) {
-    return Token::ILLEGAL;
-  }
-  TerminateLiteral();
+  if (c0_ != quote) return Token::ILLEGAL;
+  literal.Complete();

   Advance();  // consume quote
   return Token::STRING;
@@ -1127,7 +1155,7 @@

   enum { DECIMAL, HEX, OCTAL } kind = DECIMAL;

-  StartLiteral();
+  LiteralScope literal(this);
   if (seen_period) {
     // we have already seen a decimal point of the float
     AddChar('.');
@@ -1143,12 +1171,13 @@
         // hex number
         kind = HEX;
         AddCharAdvance();
-        if (!IsHexDigit(c0_))
+        if (!IsHexDigit(c0_)) {
           // we must have at least one hex digit after 'x'/'X'
           return Token::ILLEGAL;
-        while (IsHexDigit(c0_))
+        }
+        while (IsHexDigit(c0_)) {
           AddCharAdvance();
-
+        }
       } else if ('0' <= c0_ && c0_ <= '7') {
         // (possible) octal number
         kind = OCTAL;
@@ -1181,12 +1210,12 @@
     AddCharAdvance();
     if (c0_ == '+' || c0_ == '-')
       AddCharAdvance();
-    if (!IsDecimalDigit(c0_))
+    if (!IsDecimalDigit(c0_)) {
       // we must have at least one decimal digit after 'e'/'E'
       return Token::ILLEGAL;
+    }
     ScanDecimalDigits();
   }
-  TerminateLiteral();

   // The source character immediately following a numeric literal must
   // not be an identifier start or a decimal digit; see ECMA-262
@@ -1195,6 +1224,8 @@
   if (IsDecimalDigit(c0_) || kIsIdentifierStart.get(c0_))
     return Token::ILLEGAL;

+  literal.Complete();
+
   return Token::NUMBER;
 }

@@ -1214,7 +1245,7 @@
 Token::Value Scanner::ScanIdentifier() {
   ASSERT(kIsIdentifierStart.get(c0_));

-  StartLiteral();
+  LiteralScope literal(this);
   KeywordMatcher keyword_match;

   // Scan identifier start character.
@@ -1244,7 +1275,7 @@
       Advance();
     }
   }
-  TerminateLiteral();
+  literal.Complete();

   return keyword_match.token();
 }
@@ -1274,36 +1305,32 @@
   // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,
   // the scanner should pass uninterpreted bodies to the RegExp
   // constructor.
-  StartLiteral();
+  LiteralScope literal(this);
   if (seen_equal)
     AddChar('=');

   while (c0_ != '/' || in_character_class) {
-    if (kIsLineTerminator.get(c0_) || c0_ < 0)
-      return false;
+    if (kIsLineTerminator.get(c0_) || c0_ < 0) return false;
     if (c0_ == '\\') {  // escaped character
       AddCharAdvance();
-      if (kIsLineTerminator.get(c0_) || c0_ < 0)
-        return false;
+      if (kIsLineTerminator.get(c0_) || c0_ < 0) return false;
       AddCharAdvance();
     } else {  // unescaped character
-      if (c0_ == '[')
-        in_character_class = true;
-      if (c0_ == ']')
-        in_character_class = false;
+      if (c0_ == '[') in_character_class = true;
+      if (c0_ == ']') in_character_class = false;
       AddCharAdvance();
     }
   }
   Advance();  // consume '/'

-  TerminateLiteral();
+  literal.Complete();

   return true;
 }

 bool Scanner::ScanRegExpFlags() {
   // Scan regular expression flags.
-  StartLiteral();
+  LiteralScope literal(this);
   while (kIsIdentifierPart.get(c0_)) {
     if (c0_ == '\\') {
       uc32 c = ScanIdentifierUnicodeEscape();
@@ -1316,7 +1343,7 @@
     }
     AddCharAdvance();
   }
-  TerminateLiteral();
+  literal.Complete();

   next_.location.end_pos = source_pos() - 1;
   return true;
=======================================
--- /branches/bleeding_edge/src/scanner.h       Tue Aug 24 03:53:44 2010
+++ /branches/bleeding_edge/src/scanner.h       Tue Aug 24 05:29:50 2010
@@ -57,6 +57,14 @@
     Vector<char> sequence = buffer_.EndSequence();
     return Vector<const char>(sequence.start(), sequence.length());
   }
+
+  void DropLiteral() {
+    buffer_.DropSequence();
+  }
+
+  void Reset() {
+    buffer_.Reset();
+  }

   // The end marker added after a parsed literal.
   // Using zero allows the usage of strlen and similar functions on
@@ -262,6 +270,17 @@
  public:
   typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder;

+  class LiteralScope {
+   public:
+    explicit LiteralScope(Scanner* self);
+    ~LiteralScope();
+    void Complete();
+
+   private:
+    Scanner* scanner_;
+    bool complete_;
+  };
+
   // Construction
   explicit Scanner(ParserMode parse_mode);

@@ -382,6 +401,8 @@
   inline void AddChar(uc32 ch);
   inline void AddCharAdvance();
   inline void TerminateLiteral();
+  // Stops scanning of a literal, e.g., due to an encountered error.
+  inline void DropLiteral();

   // Low-level scanning support.
   void Advance() { c0_ = source_->Advance(); }
=======================================
--- /branches/bleeding_edge/src/utils.h Tue Aug 24 03:53:44 2010
+++ /branches/bleeding_edge/src/utils.h Tue Aug 24 05:29:50 2010
@@ -559,6 +559,15 @@
     }
     return Vector<T>(new_store, total_length);
   }
+
+  // Resets the collector to be empty.
+  virtual void Reset() {
+    for (int i = chunks_.length() - 1; i >= 0; i--) {
+      chunks_.at(i).Dispose();
+    }
+    chunks_.Rewind(0);
+    index_ = 0;
+  }

  protected:
   static const int kMinCapacity = 16;
@@ -631,6 +640,18 @@
     return Vector<T>(this->current_chunk_ + sequence_start,
                      this->index_ - sequence_start);
   }
+
+  // Drops the currently added sequence, and all collected elements in it.
+  void DropSequence() {
+    ASSERT(sequence_start_ != kNoSequence);
+    this->index_ = sequence_start_;
+    sequence_start_ = kNoSequence;
+  }
+
+  virtual void Reset() {
+    sequence_start_ = kNoSequence;
+    this->Collector<T>::Reset();
+  }

  private:
   static const int kNoSequence = -1;

--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev

Reply via email to