Revision: 5327
Author: [email protected]
Date: Tue Aug 24 05:29:50 2010
Log: Ensure that scanner state is correctly reset when an error is
encountered.
Add a scope object to ensure that leaving a literal scanning prematurely
will clean up after itself.
Also reset the literal buffer if a scanner is reinitialized with a new
source code.
Review URL: http://codereview.chromium.org/3137037
http://code.google.com/p/v8/source/detail?r=5327
Modified:
/branches/bleeding_edge/src/scanner.cc
/branches/bleeding_edge/src/scanner.h
/branches/bleeding_edge/src/utils.h
=======================================
--- /branches/bleeding_edge/src/scanner.cc Tue Aug 24 03:53:44 2010
+++ /branches/bleeding_edge/src/scanner.cc Tue Aug 24 05:29:50 2010
@@ -317,7 +317,27 @@
// On fallthrough, it's a failure.
state_ = UNMATCHABLE;
}
-
+
+
+
+//
----------------------------------------------------------------------------
+// Scanner::LiteralScope
+
+Scanner::LiteralScope::LiteralScope(Scanner* self)
+ : scanner_(self), complete_(false) {
+ self->StartLiteral();
+}
+
+
+Scanner::LiteralScope::~LiteralScope() {
+ if (!complete_) scanner_->DropLiteral();
+}
+
+
+void Scanner::LiteralScope::Complete() {
+ scanner_->TerminateLiteral();
+ complete_ = true;
+}
//
----------------------------------------------------------------------------
// Scanner
@@ -386,8 +406,10 @@
// Set c0_ (one character ahead)
ASSERT(kCharacterLookaheadBufferSize == 1);
Advance();
- // Initialise current_ to not refer to a literal.
+ // Initialize current_ to not refer to a literal.
current_.literal_chars = Vector<const char>();
+ // Reset literal buffer.
+ literal_buffer_.Reset();
// Skip initial whitespace allowing HTML comment ends just like
// after a newline and scan first token.
@@ -422,10 +444,16 @@
void Scanner::AddChar(uc32 c) {
literal_buffer_.AddChar(c);
}
+
void Scanner::TerminateLiteral() {
next_.literal_chars = literal_buffer_.EndLiteral();
}
+
+
+void Scanner::DropLiteral() {
+ literal_buffer_.DropLiteral();
+}
void Scanner::AddCharAdvance() {
@@ -636,7 +664,7 @@
Token::Value Scanner::ScanJsonString() {
ASSERT_EQ('"', c0_);
Advance();
- StartLiteral();
+ LiteralScope literal(this);
while (c0_ != '"' && c0_ > 0) {
// Check for control character (0x00-0x1f) or unterminated string (<0).
if (c0_ < 0x20) return Token::ILLEGAL;
@@ -670,7 +698,9 @@
for (int i = 0; i < 4; i++) {
Advance();
int digit = HexValue(c0_);
- if (digit < 0) return Token::ILLEGAL;
+ if (digit < 0) {
+ return Token::ILLEGAL;
+ }
value = value * 16 + digit;
}
AddChar(value);
@@ -685,14 +715,14 @@
if (c0_ != '"') {
return Token::ILLEGAL;
}
- TerminateLiteral();
+ literal.Complete();
Advance();
return Token::STRING;
}
Token::Value Scanner::ScanJsonNumber() {
- StartLiteral();
+ LiteralScope literal(this);
if (c0_ == '-') AddCharAdvance();
if (c0_ == '0') {
AddCharAdvance();
@@ -720,21 +750,21 @@
AddCharAdvance();
} while (c0_ >= '0' && c0_ <= '9');
}
- TerminateLiteral();
+ literal.Complete();
return Token::NUMBER;
}
Token::Value Scanner::ScanJsonIdentifier(const char* text,
Token::Value token) {
- StartLiteral();
+ LiteralScope literal(this);
while (*text != '\0') {
if (c0_ != *text) return Token::ILLEGAL;
Advance();
text++;
}
if (kIsIdentifierPart.get(c0_)) return Token::ILLEGAL;
- TerminateLiteral();
+ literal.Complete();
return token;
}
@@ -1077,7 +1107,7 @@
uc32 quote = c0_;
Advance(); // consume quote
- StartLiteral();
+ LiteralScope literal(this);
while (c0_ != quote && c0_ >= 0 && !kIsLineTerminator.get(c0_)) {
uc32 c = c0_;
Advance();
@@ -1088,10 +1118,8 @@
AddChar(c);
}
}
- if (c0_ != quote) {
- return Token::ILLEGAL;
- }
- TerminateLiteral();
+ if (c0_ != quote) return Token::ILLEGAL;
+ literal.Complete();
Advance(); // consume quote
return Token::STRING;
@@ -1127,7 +1155,7 @@
enum { DECIMAL, HEX, OCTAL } kind = DECIMAL;
- StartLiteral();
+ LiteralScope literal(this);
if (seen_period) {
// we have already seen a decimal point of the float
AddChar('.');
@@ -1143,12 +1171,13 @@
// hex number
kind = HEX;
AddCharAdvance();
- if (!IsHexDigit(c0_))
+ if (!IsHexDigit(c0_)) {
// we must have at least one hex digit after 'x'/'X'
return Token::ILLEGAL;
- while (IsHexDigit(c0_))
+ }
+ while (IsHexDigit(c0_)) {
AddCharAdvance();
-
+ }
} else if ('0' <= c0_ && c0_ <= '7') {
// (possible) octal number
kind = OCTAL;
@@ -1181,12 +1210,12 @@
AddCharAdvance();
if (c0_ == '+' || c0_ == '-')
AddCharAdvance();
- if (!IsDecimalDigit(c0_))
+ if (!IsDecimalDigit(c0_)) {
// we must have at least one decimal digit after 'e'/'E'
return Token::ILLEGAL;
+ }
ScanDecimalDigits();
}
- TerminateLiteral();
// The source character immediately following a numeric literal must
// not be an identifier start or a decimal digit; see ECMA-262
@@ -1195,6 +1224,8 @@
if (IsDecimalDigit(c0_) || kIsIdentifierStart.get(c0_))
return Token::ILLEGAL;
+ literal.Complete();
+
return Token::NUMBER;
}
@@ -1214,7 +1245,7 @@
Token::Value Scanner::ScanIdentifier() {
ASSERT(kIsIdentifierStart.get(c0_));
- StartLiteral();
+ LiteralScope literal(this);
KeywordMatcher keyword_match;
// Scan identifier start character.
@@ -1244,7 +1275,7 @@
Advance();
}
}
- TerminateLiteral();
+ literal.Complete();
return keyword_match.token();
}
@@ -1274,36 +1305,32 @@
// Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,
// the scanner should pass uninterpreted bodies to the RegExp
// constructor.
- StartLiteral();
+ LiteralScope literal(this);
if (seen_equal)
AddChar('=');
while (c0_ != '/' || in_character_class) {
- if (kIsLineTerminator.get(c0_) || c0_ < 0)
- return false;
+ if (kIsLineTerminator.get(c0_) || c0_ < 0) return false;
if (c0_ == '\\') { // escaped character
AddCharAdvance();
- if (kIsLineTerminator.get(c0_) || c0_ < 0)
- return false;
+ if (kIsLineTerminator.get(c0_) || c0_ < 0) return false;
AddCharAdvance();
} else { // unescaped character
- if (c0_ == '[')
- in_character_class = true;
- if (c0_ == ']')
- in_character_class = false;
+ if (c0_ == '[') in_character_class = true;
+ if (c0_ == ']') in_character_class = false;
AddCharAdvance();
}
}
Advance(); // consume '/'
- TerminateLiteral();
+ literal.Complete();
return true;
}
bool Scanner::ScanRegExpFlags() {
// Scan regular expression flags.
- StartLiteral();
+ LiteralScope literal(this);
while (kIsIdentifierPart.get(c0_)) {
if (c0_ == '\\') {
uc32 c = ScanIdentifierUnicodeEscape();
@@ -1316,7 +1343,7 @@
}
AddCharAdvance();
}
- TerminateLiteral();
+ literal.Complete();
next_.location.end_pos = source_pos() - 1;
return true;
=======================================
--- /branches/bleeding_edge/src/scanner.h Tue Aug 24 03:53:44 2010
+++ /branches/bleeding_edge/src/scanner.h Tue Aug 24 05:29:50 2010
@@ -57,6 +57,14 @@
Vector<char> sequence = buffer_.EndSequence();
return Vector<const char>(sequence.start(), sequence.length());
}
+
+ void DropLiteral() {
+ buffer_.DropSequence();
+ }
+
+ void Reset() {
+ buffer_.Reset();
+ }
// The end marker added after a parsed literal.
// Using zero allows the usage of strlen and similar functions on
@@ -262,6 +270,17 @@
public:
typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder;
+ class LiteralScope {
+ public:
+ explicit LiteralScope(Scanner* self);
+ ~LiteralScope();
+ void Complete();
+
+ private:
+ Scanner* scanner_;
+ bool complete_;
+ };
+
// Construction
explicit Scanner(ParserMode parse_mode);
@@ -382,6 +401,8 @@
inline void AddChar(uc32 ch);
inline void AddCharAdvance();
inline void TerminateLiteral();
+ // Stops scanning of a literal, e.g., due to an encountered error.
+ inline void DropLiteral();
// Low-level scanning support.
void Advance() { c0_ = source_->Advance(); }
=======================================
--- /branches/bleeding_edge/src/utils.h Tue Aug 24 03:53:44 2010
+++ /branches/bleeding_edge/src/utils.h Tue Aug 24 05:29:50 2010
@@ -559,6 +559,15 @@
}
return Vector<T>(new_store, total_length);
}
+
+ // Resets the collector to be empty.
+ virtual void Reset() {
+ for (int i = chunks_.length() - 1; i >= 0; i--) {
+ chunks_.at(i).Dispose();
+ }
+ chunks_.Rewind(0);
+ index_ = 0;
+ }
protected:
static const int kMinCapacity = 16;
@@ -631,6 +640,18 @@
return Vector<T>(this->current_chunk_ + sequence_start,
this->index_ - sequence_start);
}
+
+ // Drops the currently added sequence, and all collected elements in it.
+ void DropSequence() {
+ ASSERT(sequence_start_ != kNoSequence);
+ this->index_ = sequence_start_;
+ sequence_start_ = kNoSequence;
+ }
+
+ virtual void Reset() {
+ sequence_start_ = kNoSequence;
+ this->Collector<T>::Reset();
+ }
private:
static const int kNoSequence = -1;
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev