Revision: 3246 Author: [email protected] Date: Mon Nov 9 03:52:18 2009 Log: Remove unnecessary buffer doubling and content copying.
Review URL: http://codereview.chromium.org/377006 http://code.google.com/p/v8/source/detail?r=3246 Modified: /branches/bleeding_edge/src/scanner.cc /branches/bleeding_edge/src/scanner.h ======================================= --- /branches/bleeding_edge/src/scanner.cc Thu Nov 5 02:24:20 2009 +++ /branches/bleeding_edge/src/scanner.cc Mon Nov 9 03:52:18 2009 @@ -42,35 +42,27 @@ unibrow::Predicate<unibrow::LineTerminator, 128> Scanner::kIsLineTerminator; unibrow::Predicate<unibrow::WhiteSpace, 128> Scanner::kIsWhiteSpace; - StaticResource<Scanner::Utf8Decoder> Scanner::utf8_decoder_; - // ---------------------------------------------------------------------------- // UTF8Buffer -UTF8Buffer::UTF8Buffer() { - static const int kInitialCapacity = 1 * KB; - data_ = NewArray<char>(kInitialCapacity); - limit_ = ComputeLimit(data_, kInitialCapacity); - Reset(); - ASSERT(Capacity() == kInitialCapacity && pos() == 0); -} - +UTF8Buffer::UTF8Buffer() : + data_(NULL), limit_(NULL) { +} UTF8Buffer::~UTF8Buffer() { DeleteArray(data_); } - void UTF8Buffer::AddCharSlow(uc32 c) { static const int kCapacityGrowthLimit = 1 * MB; if (cursor_ > limit_) { int old_capacity = Capacity(); int old_position = pos(); - int new_capacity = - Min(old_capacity * 2, old_capacity + kCapacityGrowthLimit); - char* new_data = NewArray<char>(new_capacity); + int new_capacity = Min(old_capacity * 3, old_capacity + + kCapacityGrowthLimit); + char* new_data = NewArray<char> (new_capacity); memcpy(new_data, data_, old_position); DeleteArray(data_); data_ = new_data; @@ -78,32 +70,30 @@ limit_ = ComputeLimit(new_data, new_capacity); ASSERT(Capacity() == new_capacity && pos() == old_position); } - if (static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) { - *cursor_++ = c; // Common case: 7-bit ASCII. + if (static_cast<unsigned> (c) <= unibrow::Utf8::kMaxOneByteChar) { + *cursor_++ = c; // Common case: 7-bit ASCII. } else { cursor_ += unibrow::Utf8::Encode(cursor_, c); } ASSERT(pos() <= Capacity()); } - // ---------------------------------------------------------------------------- // UTF16Buffer -UTF16Buffer::UTF16Buffer() - : pos_(0), size_(0) { } - +UTF16Buffer::UTF16Buffer() : + pos_(0), size_(0) { +} Handle<String> UTF16Buffer::SubString(int start, int end) { return internal::SubString(data_, start, end); } - // CharacterStreamUTF16Buffer -CharacterStreamUTF16Buffer::CharacterStreamUTF16Buffer() - : pushback_buffer_(0), last_(0), stream_(NULL) { } - +CharacterStreamUTF16Buffer::CharacterStreamUTF16Buffer() : + pushback_buffer_(0), last_(0), stream_(NULL) { +} void CharacterStreamUTF16Buffer::Initialize(Handle<String> data, unibrow::CharacterStream* input) { @@ -111,14 +101,12 @@ pos_ = 0; stream_ = input; } - void CharacterStreamUTF16Buffer::PushBack(uc32 ch) { pushback_buffer()->Add(last_); last_ = ch; pos_--; } - uc32 CharacterStreamUTF16Buffer::Advance() { // NOTE: It is of importance to Persian / Farsi resources that we do @@ -140,25 +128,22 @@ // Note: currently the following increment is necessary to avoid a // test-parser problem! pos_++; - return last_ = static_cast<uc32>(-1); + return last_ = static_cast<uc32> (-1); } } - void CharacterStreamUTF16Buffer::SeekForward(int pos) { pos_ = pos; ASSERT(pushback_buffer()->is_empty()); stream_->Seek(pos); } - // TwoByteStringUTF16Buffer -TwoByteStringUTF16Buffer::TwoByteStringUTF16Buffer() - : raw_data_(NULL) { } - - -void TwoByteStringUTF16Buffer::Initialize( - Handle<ExternalTwoByteString> data) { +TwoByteStringUTF16Buffer::TwoByteStringUTF16Buffer() : + raw_data_(NULL) { +} + +void TwoByteStringUTF16Buffer::Initialize(Handle<ExternalTwoByteString> data) { ASSERT(!data.is_null()); data_ = data; @@ -167,7 +152,6 @@ raw_data_ = data->resource()->data(); size_ = data->length(); } - uc32 TwoByteStringUTF16Buffer::Advance() { if (pos_ < size_) { @@ -176,50 +160,35 @@ // note: currently the following increment is necessary to avoid a // test-parser problem! pos_++; - return static_cast<uc32>(-1); + return static_cast<uc32> (-1); } } - void TwoByteStringUTF16Buffer::PushBack(uc32 ch) { pos_--; ASSERT(pos_ >= Scanner::kCharacterLookaheadBufferSize); ASSERT(raw_data_[pos_ - Scanner::kCharacterLookaheadBufferSize] == ch); } - void TwoByteStringUTF16Buffer::SeekForward(int pos) { pos_ = pos; } - // ---------------------------------------------------------------------------- // Keyword Matcher -KeywordMatcher::FirstState KeywordMatcher::first_states_[] = { - { "break", KEYWORD_PREFIX, Token::BREAK }, - { NULL, C, Token::ILLEGAL }, - { NULL, D, Token::ILLEGAL }, - { "else", KEYWORD_PREFIX, Token::ELSE }, - { NULL, F, Token::ILLEGAL }, - { NULL, UNMATCHABLE, Token::ILLEGAL }, - { NULL, UNMATCHABLE, Token::ILLEGAL }, - { NULL, I, Token::ILLEGAL }, - { NULL, UNMATCHABLE, Token::ILLEGAL }, - { NULL, UNMATCHABLE, Token::ILLEGAL }, - { NULL, UNMATCHABLE, Token::ILLEGAL }, - { NULL, UNMATCHABLE, Token::ILLEGAL }, - { NULL, N, Token::ILLEGAL }, - { NULL, UNMATCHABLE, Token::ILLEGAL }, - { NULL, UNMATCHABLE, Token::ILLEGAL }, - { NULL, UNMATCHABLE, Token::ILLEGAL }, - { "return", KEYWORD_PREFIX, Token::RETURN }, - { "switch", KEYWORD_PREFIX, Token::SWITCH }, - { NULL, T, Token::ILLEGAL }, - { NULL, UNMATCHABLE, Token::ILLEGAL }, - { NULL, V, Token::ILLEGAL }, - { NULL, W, Token::ILLEGAL } -}; - +KeywordMatcher::FirstState KeywordMatcher::first_states_[] = { { "break", + KEYWORD_PREFIX, Token::BREAK }, { NULL, C, Token::ILLEGAL }, { NULL, D, + Token::ILLEGAL }, { "else", KEYWORD_PREFIX, Token::ELSE }, { NULL, F, + Token::ILLEGAL }, { NULL, UNMATCHABLE, Token::ILLEGAL }, { NULL, + UNMATCHABLE, Token::ILLEGAL }, { NULL, I, Token::ILLEGAL }, { NULL, + UNMATCHABLE, Token::ILLEGAL }, { NULL, UNMATCHABLE, Token::ILLEGAL }, { + NULL, UNMATCHABLE, Token::ILLEGAL }, { NULL, UNMATCHABLE, Token::ILLEGAL }, + { NULL, N, Token::ILLEGAL }, { NULL, UNMATCHABLE, Token::ILLEGAL }, { NULL, + UNMATCHABLE, Token::ILLEGAL }, { NULL, UNMATCHABLE, Token::ILLEGAL }, { + "return", KEYWORD_PREFIX, Token::RETURN }, { "switch", KEYWORD_PREFIX, + Token::SWITCH }, { NULL, T, Token::ILLEGAL }, { NULL, UNMATCHABLE, + Token::ILLEGAL }, { NULL, V, Token::ILLEGAL }, { NULL, W, + Token::ILLEGAL } }; void KeywordMatcher::Step(uc32 input) { switch (state_) { @@ -253,38 +222,56 @@ token_ = Token::IDENTIFIER; break; case C: - if (MatchState(input, 'a', CA)) return; - if (MatchState(input, 'o', CO)) return; + if (MatchState(input, 'a', CA)) + return; + if (MatchState(input, 'o', CO)) + return; break; case CA: - if (MatchKeywordStart(input, "case", 2, Token::CASE)) return; - if (MatchKeywordStart(input, "catch", 2, Token::CATCH)) return; + if (MatchKeywordStart(input, "case", 2, Token::CASE)) + return; + if (MatchKeywordStart(input, "catch", 2, Token::CATCH)) + return; break; case CO: - if (MatchState(input, 'n', CON)) return; + if (MatchState(input, 'n', CON)) + return; break; case CON: - if (MatchKeywordStart(input, "const", 3, Token::CONST)) return; - if (MatchKeywordStart(input, "continue", 3, Token::CONTINUE)) return; + if (MatchKeywordStart(input, "const", 3, Token::CONST)) + return; + if (MatchKeywordStart(input, "continue", 3, Token::CONTINUE)) + return; break; case D: - if (MatchState(input, 'e', DE)) return; - if (MatchKeyword(input, 'o', KEYWORD_MATCHED, Token::DO)) return; + if (MatchState(input, 'e', DE)) + return; + if (MatchKeyword(input, 'o', KEYWORD_MATCHED, Token::DO)) + return; break; case DE: - if (MatchKeywordStart(input, "debugger", 2, Token::DEBUGGER)) return; - if (MatchKeywordStart(input, "default", 2, Token::DEFAULT)) return; - if (MatchKeywordStart(input, "delete", 2, Token::DELETE)) return; + if (MatchKeywordStart(input, "debugger", 2, Token::DEBUGGER)) + return; + if (MatchKeywordStart(input, "default", 2, Token::DEFAULT)) + return; + if (MatchKeywordStart(input, "delete", 2, Token::DELETE)) + return; break; case F: - if (MatchKeywordStart(input, "false", 1, Token::FALSE_LITERAL)) return; - if (MatchKeywordStart(input, "finally", 1, Token::FINALLY)) return; - if (MatchKeywordStart(input, "for", 1, Token::FOR)) return; - if (MatchKeywordStart(input, "function", 1, Token::FUNCTION)) return; + if (MatchKeywordStart(input, "false", 1, Token::FALSE_LITERAL)) + return; + if (MatchKeywordStart(input, "finally", 1, Token::FINALLY)) + return; + if (MatchKeywordStart(input, "for", 1, Token::FOR)) + return; + if (MatchKeywordStart(input, "function", 1, Token::FUNCTION)) + return; break; case I: - if (MatchKeyword(input, 'f', KEYWORD_MATCHED, Token::IF)) return; - if (MatchKeyword(input, 'n', IN, Token::IN)) return; + if (MatchKeyword(input, 'f', KEYWORD_MATCHED, Token::IF)) + return; + if (MatchKeyword(input, 'n', IN, Token::IN)) + return; break; case IN: token_ = Token::IDENTIFIER; @@ -293,30 +280,44 @@ } break; case N: - if (MatchKeywordStart(input, "native", 1, Token::NATIVE)) return; - if (MatchKeywordStart(input, "new", 1, Token::NEW)) return; - if (MatchKeywordStart(input, "null", 1, Token::NULL_LITERAL)) return; + if (MatchKeywordStart(input, "native", 1, Token::NATIVE)) + return; + if (MatchKeywordStart(input, "new", 1, Token::NEW)) + return; + if (MatchKeywordStart(input, "null", 1, Token::NULL_LITERAL)) + return; break; case T: - if (MatchState(input, 'h', TH)) return; - if (MatchState(input, 'r', TR)) return; - if (MatchKeywordStart(input, "typeof", 1, Token::TYPEOF)) return; + if (MatchState(input, 'h', TH)) + return; + if (MatchState(input, 'r', TR)) + return; + if (MatchKeywordStart(input, "typeof", 1, Token::TYPEOF)) + return; break; case TH: - if (MatchKeywordStart(input, "this", 2, Token::THIS)) return; - if (MatchKeywordStart(input, "throw", 2, Token::THROW)) return; + if (MatchKeywordStart(input, "this", 2, Token::THIS)) + return; + if (MatchKeywordStart(input, "throw", 2, Token::THROW)) + return; break; case TR: - if (MatchKeywordStart(input, "true", 2, Token::TRUE_LITERAL)) return; - if (MatchKeyword(input, 'y', KEYWORD_MATCHED, Token::TRY)) return; + if (MatchKeywordStart(input, "true", 2, Token::TRUE_LITERAL)) + return; + if (MatchKeyword(input, 'y', KEYWORD_MATCHED, Token::TRY)) + return; break; case V: - if (MatchKeywordStart(input, "var", 1, Token::VAR)) return; - if (MatchKeywordStart(input, "void", 1, Token::VOID)) return; + if (MatchKeywordStart(input, "var", 1, Token::VAR)) + return; + if (MatchKeywordStart(input, "void", 1, Token::VOID)) + return; break; case W: - if (MatchKeywordStart(input, "while", 1, Token::WHILE)) return; - if (MatchKeywordStart(input, "with", 1, Token::WITH)) return; + if (MatchKeywordStart(input, "while", 1, Token::WHILE)) + return; + if (MatchKeywordStart(input, "with", 1, Token::WITH)) + return; break; default: UNREACHABLE(); @@ -324,20 +325,21 @@ // On fallthrough, it's a failure. state_ = UNMATCHABLE; } - // ---------------------------------------------------------------------------- // Scanner -Scanner::Scanner(bool pre) : stack_overflow_(false), is_pre_parsing_(pre) { } - +Scanner::Scanner(bool pre) : + stack_overflow_(false), is_pre_parsing_(pre) { +} void Scanner::Init(Handle<String> source, unibrow::CharacterStream* stream, - int position) { + int position) { // Initialize the source buffer. if (!source.is_null() && StringShape(*source).IsExternalTwoByte()) { two_byte_string_buffer_.Initialize( - Handle<ExternalTwoByteString>::cast(source)); + Handle<ExternalTwoByteString>::cast( + source)); source_ = &two_byte_string_buffer_; } else { char_stream_buffer_.Initialize(source, stream); @@ -346,9 +348,6 @@ position_ = position; - // Reset literals buffer - literals_.Reset(); - // Set c0_ (one character ahead) ASSERT(kCharacterLookaheadBufferSize == 1); Advance(); @@ -359,12 +358,10 @@ SkipWhiteSpace(); Scan(); } - Handle<String> Scanner::SubString(int start, int end) { return source_->SubString(start - position_, end - position_); } - Token::Value Scanner::Next() { // BUG 1215673: Find a thread safe way to set a stack limit in @@ -376,34 +373,36 @@ if (check.HasOverflowed()) { stack_overflow_ = true; next_.token = Token::ILLEGAL; + next_.literal_buffer = NULL; } else { Scan(); } return current_.token; } - void Scanner::StartLiteral() { - next_.literal_pos = literals_.pos(); -} - + // Use the first buffer unless it's currently in use by the current_ token. + // In most cases we won't have two literals/identifiers in a row, so + // the second buffer won't be used very often and is unlikely to grow much. + UTF8Buffer* free_buffer = + (current_.literal_buffer != &literal_buffer_1_) ? &literal_buffer_1_ + : &literal_buffer_2_; + next_.literal_buffer = free_buffer; + free_buffer->Reset(); +} void Scanner::AddChar(uc32 c) { - literals_.AddChar(c); -} - + next_.literal_buffer->AddChar(c); +} void Scanner::TerminateLiteral() { - next_.literal_end = literals_.pos(); AddChar(0); } - void Scanner::AddCharAdvance() { AddChar(c0_); Advance(); } - static inline bool IsByteOrderMark(uc32 c) { // The Unicode value U+FFFE is guaranteed never to be assigned as a @@ -415,7 +414,6 @@ // Spidermonkey. return c == 0xFEFF || c == 0xFFFE; } - bool Scanner::SkipWhiteSpace() { int start_position = source_pos(); @@ -447,15 +445,14 @@ // Continue skipping white space after the comment. continue; } - PushBack('-'); // undo Advance() - } - PushBack('-'); // undo Advance() + PushBack('-'); // undo Advance() + } + PushBack('-'); // undo Advance() } // Return whether or not we skipped any characters. return source_pos() != start_position; } } - Token::Value Scanner::SkipSingleLineComment() { Advance(); @@ -471,7 +468,6 @@ return Token::WHITESPACE; } - Token::Value Scanner::SkipMultiLineComment() { ASSERT(c0_ == '*'); @@ -496,7 +492,6 @@ // Unterminated multi-line comment. return Token::ILLEGAL; } - Token::Value Scanner::ScanHtmlComment() { // Check for <!-- comments. @@ -504,14 +499,14 @@ Advance(); if (c0_ == '-') { Advance(); - if (c0_ == '-') return SkipSingleLineComment(); - PushBack('-'); // undo Advance() - } - PushBack('!'); // undo Advance() + if (c0_ == '-') + return SkipSingleLineComment(); + PushBack('-'); // undo Advance() + } + PushBack('!'); // undo Advance() ASSERT(c0_ == '!'); return Token::LT; } - void Scanner::Scan() { Token::Value token; @@ -533,7 +528,8 @@ token = Token::WHITESPACE; break; - case '"': case '\'': + case '"': + case '\'': token = ScanString(); break; @@ -751,17 +747,15 @@ next_.location.end_pos = source_pos(); next_.token = token; } - void Scanner::SeekForward(int pos) { source_->SeekForward(pos - 1); Advance(); Scan(); } - uc32 Scanner::ScanHexEscape(uc32 c, int length) { - ASSERT(length <= 4); // prevent overflow + ASSERT(length <= 4); // prevent overflow uc32 digits[4]; uc32 x = 0; @@ -774,7 +768,7 @@ // non-escaped version of the original character. // Push back digits read, except the last one (in c0_). - for (int j = i-1; j >= 0; j--) { + for (int j = i - 1; j >= 0; j--) { PushBack(digits[j]); } // Notice: No handling of error - treat it as "\u"->"u". @@ -786,7 +780,6 @@ return x; } - // Octal escapes of the forms '\0xx' and '\xxx' are not a part of // ECMA-262. Other JS VMs support them. @@ -794,15 +787,16 @@ uc32 x = c - '0'; for (int i = 0; i < length; i++) { int d = c0_ - '0'; - if (d < 0 || d > 7) break; + if (d < 0 || d > 7) + break; int nx = x * 8 + d; - if (nx >= 256) break; + if (nx >= 256) + break; x = nx; Advance(); } return x; } - void Scanner::ScanEscape() { uc32 c = c0_; @@ -811,32 +805,53 @@ // Skip escaped newlines. if (kIsLineTerminator.get(c)) { // Allow CR+LF newlines in multiline string literals. - if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance(); + if (IsCarriageReturn(c) && IsLineFeed(c0_)) + Advance(); // Allow LF+CR newlines in multiline string literals. - if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance(); + if (IsLineFeed(c) && IsCarriageReturn(c0_)) + Advance(); return; } switch (c) { - case '\'': // fall through - case '"' : // fall through - case '\\': break; - case 'b' : c = '\b'; break; - case 'f' : c = '\f'; break; - case 'n' : c = '\n'; break; - case 'r' : c = '\r'; break; - case 't' : c = '\t'; break; - case 'u' : c = ScanHexEscape(c, 4); break; - case 'v' : c = '\v'; break; - case 'x' : c = ScanHexEscape(c, 2); break; - case '0' : // fall through - case '1' : // fall through - case '2' : // fall through - case '3' : // fall through - case '4' : // fall through - case '5' : // fall through - case '6' : // fall through - case '7' : c = ScanOctalEscape(c, 2); break; + case '\'': // fall through + case '"': // fall through + case '\\': + break; + case 'b': + c = '\b'; + break; + case 'f': + c = '\f'; + break; + case 'n': + c = '\n'; + break; + case 'r': + c = '\r'; + break; + case 't': + c = '\t'; + break; + case 'u': + c = ScanHexEscape(c, 4); + break; + case 'v': + c = '\v'; + break; + case 'x': + c = ScanHexEscape(c, 2); + break; + case '0': // fall through + case '1': // fall through + case '2': // fall through + case '3': // fall through + case '4': // fall through + case '5': // fall through + case '6': // fall through + case '7': + c = ScanOctalEscape(c, 2); + break; } // According to ECMA-262, 3rd, 7.8.4 (p 18ff) these @@ -844,18 +859,18 @@ // as non-escaped characters by JS VMs. AddChar(c); } - Token::Value Scanner::ScanString() { uc32 quote = c0_; - Advance(); // consume quote + Advance(); // consume quote StartLiteral(); while (c0_ != quote && c0_ >= 0 && !kIsLineTerminator.get(c0_)) { uc32 c = c0_; Advance(); if (c == '\\') { - if (c0_ < 0) return Token::ILLEGAL; + if (c0_ < 0) + return Token::ILLEGAL; ScanEscape(); } else { AddChar(c); @@ -866,16 +881,14 @@ } TerminateLiteral(); - Advance(); // consume quote + Advance(); // consume quote return Token::STRING; } - Token::Value Scanner::Select(Token::Value tok) { Advance(); return tok; } - Token::Value Scanner::Select(uc32 next, Token::Value then, Token::Value else_) { Advance(); @@ -886,25 +899,25 @@ return else_; } } - // Returns true if any decimal digits were scanned, returns false otherwise. void Scanner::ScanDecimalDigits() { while (IsDecimalDigit(c0_)) AddCharAdvance(); } - Token::Value Scanner::ScanNumber(bool seen_period) { - ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction - - enum { DECIMAL, HEX, OCTAL } kind = DECIMAL; + ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction + + enum { + DECIMAL, HEX, OCTAL + } kind = DECIMAL; StartLiteral(); if (seen_period) { // we have already seen a decimal point of the float AddChar('.'); - ScanDecimalDigits(); // we know we have at least one digit + ScanDecimalDigits(); // we know we have at least one digit } else { // if the first character is '0' we must check for octals and hex @@ -930,7 +943,8 @@ kind = DECIMAL; break; } - if (c0_ < '0' || '7' < c0_) break; + if (c0_ < '0' || '7' < c0_) + break; AddCharAdvance(); } } @@ -938,18 +952,19 @@ // Parse decimal digits and allow trailing fractional part. if (kind == DECIMAL) { - ScanDecimalDigits(); // optional + ScanDecimalDigits(); // optional if (c0_ == '.') { AddCharAdvance(); - ScanDecimalDigits(); // optional + ScanDecimalDigits(); // optional } } } // scan exponent, if any if (c0_ == 'e' || c0_ == 'E') { - ASSERT(kind != HEX); // 'e'/'E' must be scanned as part of the hex number - if (kind == OCTAL) return Token::ILLEGAL; // no exponent for octals allowed + ASSERT(kind != HEX); // 'e'/'E' must be scanned as part of the hex number + if (kind == OCTAL) + return Token::ILLEGAL; // no exponent for octals allowed // scan exponent AddCharAdvance(); if (c0_ == '+' || c0_ == '-') @@ -970,19 +985,19 @@ return Token::NUMBER; } - uc32 Scanner::ScanIdentifierUnicodeEscape() { Advance(); - if (c0_ != 'u') return unibrow::Utf8::kBadChar; + if (c0_ != 'u') + return unibrow::Utf8::kBadChar; Advance(); uc32 c = ScanHexEscape('u', 4); // We do not allow a unicode escape sequence to start another // unicode escape sequence. - if (c == '\\') return unibrow::Utf8::kBadChar; + if (c == '\\') + return unibrow::Utf8::kBadChar; return c; } - Token::Value Scanner::ScanIdentifier() { ASSERT(kIsIdentifierStart.get(c0_)); @@ -994,7 +1009,8 @@ if (c0_ == '\\') { uc32 c = ScanIdentifierUnicodeEscape(); // Only allow legal identifier start characters. - if (!kIsIdentifierStart.get(c)) return Token::ILLEGAL; + if (!kIsIdentifierStart.get(c)) + return Token::ILLEGAL; AddChar(c); keyword_match.Fail(); } else { @@ -1008,7 +1024,8 @@ if (c0_ == '\\') { uc32 c = ScanIdentifierUnicodeEscape(); // Only allow legal identifier part characters. - if (!kIsIdentifierPart.get(c)) return Token::ILLEGAL; + if (!kIsIdentifierPart.get(c)) + return Token::ILLEGAL; AddChar(c); keyword_match.Fail(); } else { @@ -1021,19 +1038,19 @@ return keyword_match.token(); } - - bool Scanner::IsIdentifier(unibrow::CharacterStream* buffer) { // Checks whether the buffer contains an identifier (no escape). - if (!buffer->has_more()) return false; - if (!kIsIdentifierStart.get(buffer->GetNext())) return false; + if (!buffer->has_more()) + return false; + if (!kIsIdentifierStart.get(buffer->GetNext())) + return false; while (buffer->has_more()) { - if (!kIsIdentifierPart.get(buffer->GetNext())) return false; + if (!kIsIdentifierPart.get(buffer->GetNext())) + return false; } return true; } - bool Scanner::ScanRegExpPattern(bool seen_equal) { // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags @@ -1054,12 +1071,12 @@ while (c0_ != '/' || in_character_class) { if (kIsLineTerminator.get(c0_) || c0_ < 0) return false; - if (c0_ == '\\') { // escaped character + if (c0_ == '\\') { // escaped character AddCharAdvance(); if (kIsLineTerminator.get(c0_) || c0_ < 0) return false; AddCharAdvance(); - } else { // unescaped character + } else { // unescaped character if (c0_ == '[') in_character_class = true; if (c0_ == ']') @@ -1067,7 +1084,7 @@ AddCharAdvance(); } } - Advance(); // consume '/' + Advance(); // consume '/' TerminateLiteral(); @@ -1080,7 +1097,7 @@ while (kIsIdentifierPart.get(c0_)) { if (c0_ == '\\') { uc32 c = ScanIdentifierUnicodeEscape(); - if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) { + if (c != static_cast<uc32> (unibrow::Utf8::kBadChar)) { // We allow any escaped character, unlike the restriction on // IdentifierPart when it is used to build an IdentifierName. AddChar(c); @@ -1095,4 +1112,5 @@ return true; } -} } // namespace v8::internal +} +} // namespace v8::internal ======================================= --- /branches/bleeding_edge/src/scanner.h Thu Nov 5 02:11:38 2009 +++ /branches/bleeding_edge/src/scanner.h Mon Nov 9 03:52:18 2009 @@ -41,6 +41,7 @@ ~UTF8Buffer(); void AddChar(uc32 c) { + ASSERT_NOT_NULL(data_); if (cursor_ <= limit_ && static_cast<unsigned>(c) <= unibrow::Utf8::kMaxOneByteChar) { *cursor_++ = static_cast<char>(c); @@ -49,16 +50,29 @@ } } - void Reset() { cursor_ = data_; } - int pos() const { return cursor_ - data_; } + void Reset() { + if (data_ == NULL) { + data_ = NewArray<char>(kInitialCapacity); + limit_ = ComputeLimit(data_, kInitialCapacity); + } + cursor_ = data_; + } + + int pos() const { + ASSERT_NOT_NULL(data_); + return cursor_ - data_; + } + char* data() const { return data_; } private: + static const int kInitialCapacity = 256; char* data_; char* cursor_; char* limit_; int Capacity() const { + ASSERT_NOT_NULL(data_); return (limit_ - data_) + unibrow::Utf8::kMaxEncodedSize; } @@ -278,26 +292,30 @@ // token returned by Next()). The string is 0-terminated and in // UTF-8 format; they may contain 0-characters. Literal strings are // collected for identifiers, strings, and numbers. + // These functions only give the correct result if the literal + // was scanned between calls to StartLiteral() and TerminateLiteral(). const char* literal_string() const { - return &literals_.data()[current_.literal_pos]; + return current_.literal_buffer->data(); } int literal_length() const { - return current_.literal_end - current_.literal_pos; - } - - Vector<const char> next_literal() const { - return Vector<const char>(next_literal_string(), next_literal_length()); + // Excluding terminal '\0' added by TerminateLiteral(). + return current_.literal_buffer->pos() - 1; } // Returns the literal string for the next token (the token that // would be returned if Next() were called). const char* next_literal_string() const { - return &literals_.data()[next_.literal_pos]; + return next_.literal_buffer->data(); } // Returns the length of the next token (that would be returned if // Next() were called). int next_literal_length() const { - return next_.literal_end - next_.literal_pos; + return next_.literal_buffer->pos() - 1; + } + + Vector<const char> next_literal() const { + return Vector<const char>(next_literal_string(), + next_literal_length()); } // Scans the input as a regular expression pattern, previous @@ -339,7 +357,8 @@ // Buffer to hold literal values (identifiers, strings, numbers) // using 0-terminated UTF-8 encoding. - UTF8Buffer literals_; + UTF8Buffer literal_buffer_1_; + UTF8Buffer literal_buffer_2_; bool stack_overflow_; static StaticResource<Utf8Decoder> utf8_decoder_; @@ -351,7 +370,7 @@ struct TokenDesc { Token::Value token; Location location; - int literal_pos, literal_end; + UTF8Buffer* literal_buffer; }; TokenDesc current_; // desc for current token (as returned by Next()) --~--~---------~--~----~------------~-------~--~----~ v8-dev mailing list [email protected] http://groups.google.com/group/v8-dev -~----------~----~----~----~------~----~------~--~---
