Revision: 2703 Author: [email protected] Date: Tue Aug 18 00:14:02 2009 Log: Streamline the scanner for external two byte string input.
Review URL: http://codereview.chromium.org/165403 http://code.google.com/p/v8/source/detail?r=2703 Modified: /branches/bleeding_edge/src/api.cc /branches/bleeding_edge/src/compiler.cc /branches/bleeding_edge/src/factory.cc /branches/bleeding_edge/src/factory.h /branches/bleeding_edge/src/parser.cc /branches/bleeding_edge/src/parser.h /branches/bleeding_edge/src/scanner.cc /branches/bleeding_edge/src/scanner.h ======================================= --- /branches/bleeding_edge/src/api.cc Mon Aug 17 07:26:48 2009 +++ /branches/bleeding_edge/src/api.cc Tue Aug 18 00:14:02 2009 @@ -1046,7 +1046,7 @@ ScriptData* ScriptData::PreCompile(const char* input, int length) { unibrow::Utf8InputBuffer<> buf(input, length); - return i::PreParse(&buf, NULL); + return i::PreParse(i::Handle<i::String>(), &buf, NULL); } ======================================= --- /branches/bleeding_edge/src/compiler.cc Fri Aug 14 04:05:42 2009 +++ /branches/bleeding_edge/src/compiler.cc Tue Aug 18 00:14:02 2009 @@ -266,7 +266,7 @@ if (pre_data == NULL && source_length >= FLAG_min_preparse_length) { Access<SafeStringInputBuffer> buf(&safe_string_input_buffer); buf->Reset(source.location()); - pre_data = PreParse(buf.value(), extension); + pre_data = PreParse(source, buf.value(), extension); } // Create a script object describing the script to be compiled. ======================================= --- /branches/bleeding_edge/src/factory.cc Tue Jul 28 01:43:51 2009 +++ /branches/bleeding_edge/src/factory.cc Tue Aug 18 00:14:02 2009 @@ -87,8 +87,10 @@ } -Handle<String> Factory::NewStringFromTwoByte(Vector<const uc16> string) { - CALL_HEAP_FUNCTION(Heap::AllocateStringFromTwoByte(string), String); +Handle<String> Factory::NewStringFromTwoByte(Vector<const uc16> string, + PretenureFlag pretenure) { + CALL_HEAP_FUNCTION(Heap::AllocateStringFromTwoByte(string, pretenure), + String); } ======================================= --- /branches/bleeding_edge/src/factory.h Tue Jul 28 01:43:51 2009 +++ /branches/bleeding_edge/src/factory.h Tue Aug 18 00:14:02 2009 @@ -92,7 +92,8 @@ Vector<const char> str, PretenureFlag pretenure = NOT_TENURED); - static Handle<String> NewStringFromTwoByte(Vector<const uc16> str); + static Handle<String> NewStringFromTwoByte(Vector<const uc16> str, + PretenureFlag pretenure = NOT_TENURED); // Allocates and partially initializes a TwoByte String. The characters of // the string are uninitialized. Currently used in regexp code only, where ======================================= --- /branches/bleeding_edge/src/parser.cc Thu Jul 30 04:53:29 2009 +++ /branches/bleeding_edge/src/parser.cc Tue Aug 18 00:14:02 2009 @@ -97,7 +97,7 @@ // Pre-parse the program from the character stream; returns true on // success, false if a stack-overflow happened during parsing. - bool PreParseProgram(unibrow::CharacterStream* stream); + bool PreParseProgram(Handle<String> source, unibrow::CharacterStream* stream); void ReportMessage(const char* message, Vector<const char*> args); virtual void ReportMessageAt(Scanner::Location loc, @@ -1167,13 +1167,14 @@ } -bool Parser::PreParseProgram(unibrow::CharacterStream* stream) { +bool Parser::PreParseProgram(Handle<String> source, + unibrow::CharacterStream* stream) { HistogramTimerScope timer(&Counters::pre_parse); StackGuard guard; AssertNoZoneAllocation assert_no_zone_allocation; AssertNoAllocation assert_no_allocation; NoHandleAllocation no_handle_allocation; - scanner_.Init(Handle<String>(), stream, 0); + scanner_.Init(source, stream, 0); ASSERT(target_stack_ == NULL); mode_ = PARSE_EAGERLY; DummyScope top_scope; @@ -4593,7 +4594,8 @@ } -ScriptDataImpl* PreParse(unibrow::CharacterStream* stream, +ScriptDataImpl* PreParse(Handle<String> source, + unibrow::CharacterStream* stream, v8::Extension* extension) { Handle<Script> no_script; bool allow_natives_syntax = @@ -4601,7 +4603,7 @@ FLAG_allow_natives_syntax || Bootstrapper::IsActive(); PreParser parser(no_script, allow_natives_syntax, extension); - if (!parser.PreParseProgram(stream)) return NULL; + if (!parser.PreParseProgram(source, stream)) return NULL; // The list owns the backing store so we need to clone the vector. // That way, the result will be exactly the right size rather than // the expected 50% too large. ======================================= --- /branches/bleeding_edge/src/parser.h Mon May 25 03:05:56 2009 +++ /branches/bleeding_edge/src/parser.h Tue Aug 18 00:14:02 2009 @@ -143,7 +143,8 @@ ScriptDataImpl* pre_data); -ScriptDataImpl* PreParse(unibrow::CharacterStream* stream, +ScriptDataImpl* PreParse(Handle<String> source, + unibrow::CharacterStream* stream, v8::Extension* extension); ======================================= --- /branches/bleeding_edge/src/scanner.cc Mon May 25 03:05:56 2009 +++ /branches/bleeding_edge/src/scanner.cc Tue Aug 18 00:14:02 2009 @@ -92,33 +92,35 @@ UTF16Buffer::UTF16Buffer() - : pos_(0), - pushback_buffer_(0), - last_(0), - stream_(NULL) { } + : pos_(0), size_(0) { } -void UTF16Buffer::Initialize(Handle<String> data, - unibrow::CharacterStream* input) { +Handle<String> UTF16Buffer::SubString(int start, int end) { + return internal::SubString(data_, start, end); +} + + +// CharacterStreamUTF16Buffer +CharacterStreamUTF16Buffer::CharacterStreamUTF16Buffer() + : pushback_buffer_(0), last_(0), stream_(NULL) { } + + +void CharacterStreamUTF16Buffer::Initialize(Handle<String> data, + unibrow::CharacterStream* input) { data_ = data; pos_ = 0; stream_ = input; } -Handle<String> UTF16Buffer::SubString(int start, int end) { - return internal::SubString(data_, start, end); -} - - -void UTF16Buffer::PushBack(uc32 ch) { +void CharacterStreamUTF16Buffer::PushBack(uc32 ch) { pushback_buffer()->Add(last_); last_ = ch; pos_--; } -uc32 UTF16Buffer::Advance() { +uc32 CharacterStreamUTF16Buffer::Advance() { // NOTE: It is of importance to Persian / Farsi resources that we do // *not* strip format control characters in the scanner; see // @@ -135,7 +137,7 @@ uc32 next = stream_->GetNext(); return last_ = next; } else { - // note: currently the following increment is necessary to avoid a + // Note: currently the following increment is necessary to avoid a // test-parser problem! pos_++; return last_ = static_cast<uc32>(-1); @@ -143,11 +145,51 @@ } -void UTF16Buffer::SeekForward(int pos) { +void CharacterStreamUTF16Buffer::SeekForward(int pos) { pos_ = pos; ASSERT(pushback_buffer()->is_empty()); stream_->Seek(pos); } + + +// TwoByteStringUTF16Buffer +TwoByteStringUTF16Buffer::TwoByteStringUTF16Buffer() + : raw_data_(NULL) { } + + +void TwoByteStringUTF16Buffer::Initialize( + Handle<ExternalTwoByteString> data) { + ASSERT(!data.is_null()); + + data_ = data; + pos_ = 0; + + raw_data_ = data->resource()->data(); + size_ = data->length(); +} + + +uc32 TwoByteStringUTF16Buffer::Advance() { + if (pos_ < size_) { + return raw_data_[pos_++]; + } else { + // note: currently the following increment is necessary to avoid a + // test-parser problem! + pos_++; + return static_cast<uc32>(-1); + } +} + + +void TwoByteStringUTF16Buffer::PushBack(uc32 ch) { + pos_--; + ASSERT(pos_ >= 0 && raw_data_[pos_] == ch); +} + + +void TwoByteStringUTF16Buffer::SeekForward(int pos) { + pos_ = pos; +} // ---------------------------------------------------------------------------- @@ -161,7 +203,15 @@ void Scanner::Init(Handle<String> source, unibrow::CharacterStream* stream, int position) { // Initialize the source buffer. - source_.Initialize(source, stream); + if (!source.is_null() && StringShape(*source).IsExternalTwoByte()) { + two_byte_string_buffer_.Initialize( + Handle<ExternalTwoByteString>::cast(source)); + source_ = &two_byte_string_buffer_; + } else { + char_stream_buffer_.Initialize(source, stream); + source_ = &char_stream_buffer_; + } + position_ = position; // Reset literals buffer @@ -180,7 +230,7 @@ Handle<String> Scanner::SubString(int start, int end) { - return source_.SubString(start - position_, end - position_); + return source_->SubString(start - position_, end - position_); } @@ -221,17 +271,6 @@ AddChar(c0_); Advance(); } - - -void Scanner::Advance() { - c0_ = source_.Advance(); -} - - -void Scanner::PushBack(uc32 ch) { - source_.PushBack(ch); - c0_ = ch; -} static inline bool IsByteOrderMark(uc32 c) { @@ -583,7 +622,7 @@ void Scanner::SeekForward(int pos) { - source_.SeekForward(pos - 1); + source_->SeekForward(pos - 1); Advance(); Scan(); } ======================================= --- /branches/bleeding_edge/src/scanner.h Mon May 25 03:05:56 2009 +++ /branches/bleeding_edge/src/scanner.h Tue Aug 18 00:14:02 2009 @@ -73,27 +73,56 @@ class UTF16Buffer { public: UTF16Buffer(); - - void Initialize(Handle<String> data, unibrow::CharacterStream* stream); - void PushBack(uc32 ch); - uc32 Advance(); // returns a value < 0 when the buffer end is reached - uint16_t CharAt(int index); + virtual ~UTF16Buffer() {} + + virtual void PushBack(uc32 ch) = 0; + // returns a value < 0 when the buffer end is reached + virtual uc32 Advance() = 0; + virtual void SeekForward(int pos) = 0; + int pos() const { return pos_; } int size() const { return size_; } Handle<String> SubString(int start, int end); - List<uc32>* pushback_buffer() { return &pushback_buffer_; } - void SeekForward(int pos); - - private: + + protected: Handle<String> data_; int pos_; int size_; +}; + + +class CharacterStreamUTF16Buffer: public UTF16Buffer { + public: + CharacterStreamUTF16Buffer(); + virtual ~CharacterStreamUTF16Buffer() {} + void Initialize(Handle<String> data, unibrow::CharacterStream* stream); + virtual void PushBack(uc32 ch); + virtual uc32 Advance(); + virtual void SeekForward(int pos); + + private: List<uc32> pushback_buffer_; uc32 last_; unibrow::CharacterStream* stream_; + + List<uc32>* pushback_buffer() { return &pushback_buffer_; } }; +class TwoByteStringUTF16Buffer: public UTF16Buffer { + public: + TwoByteStringUTF16Buffer(); + virtual ~TwoByteStringUTF16Buffer() {} + void Initialize(Handle<ExternalTwoByteString> data); + virtual void PushBack(uc32 ch); + virtual uc32 Advance(); + virtual void SeekForward(int pos); + + private: + const uint16_t* raw_data_; +}; + + class Scanner { public: @@ -184,8 +213,11 @@ static unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace; private: + CharacterStreamUTF16Buffer char_stream_buffer_; + TwoByteStringUTF16Buffer two_byte_string_buffer_; + // Source. - UTF16Buffer source_; + UTF16Buffer* source_; int position_; // Buffer to hold literal values (identifiers, strings, numbers) @@ -219,8 +251,11 @@ void TerminateLiteral(); // Low-level scanning support. - void Advance(); - void PushBack(uc32 ch); + void Advance() { c0_ = source_->Advance(); } + void PushBack(uc32 ch) { + source_->PushBack(ch); + c0_ = ch; + } bool SkipWhiteSpace(); Token::Value SkipSingleLineComment(); @@ -243,7 +278,7 @@ // Return the current source position. int source_pos() { - return source_.pos() - kCharacterLookaheadBufferSize + position_; + return source_->pos() - kCharacterLookaheadBufferSize + position_; } // Decodes a unicode escape-sequence which is part of an identifier. --~--~---------~--~----~------------~-------~--~----~ v8-dev mailing list [email protected] http://groups.google.com/group/v8-dev -~----------~----~----~----~------~----~------~--~---
