Revision: 2703
Author: [email protected]
Date: Tue Aug 18 00:14:02 2009
Log: Streamline the scanner for external two byte string input.

Review URL: http://codereview.chromium.org/165403
http://code.google.com/p/v8/source/detail?r=2703

Modified:
  /branches/bleeding_edge/src/api.cc
  /branches/bleeding_edge/src/compiler.cc
  /branches/bleeding_edge/src/factory.cc
  /branches/bleeding_edge/src/factory.h
  /branches/bleeding_edge/src/parser.cc
  /branches/bleeding_edge/src/parser.h
  /branches/bleeding_edge/src/scanner.cc
  /branches/bleeding_edge/src/scanner.h

=======================================
--- /branches/bleeding_edge/src/api.cc  Mon Aug 17 07:26:48 2009
+++ /branches/bleeding_edge/src/api.cc  Tue Aug 18 00:14:02 2009
@@ -1046,7 +1046,7 @@

  ScriptData* ScriptData::PreCompile(const char* input, int length) {
    unibrow::Utf8InputBuffer<> buf(input, length);
-  return i::PreParse(&buf, NULL);
+  return i::PreParse(i::Handle<i::String>(), &buf, NULL);
  }


=======================================
--- /branches/bleeding_edge/src/compiler.cc     Fri Aug 14 04:05:42 2009
+++ /branches/bleeding_edge/src/compiler.cc     Tue Aug 18 00:14:02 2009
@@ -266,7 +266,7 @@
      if (pre_data == NULL && source_length >= FLAG_min_preparse_length) {
        Access<SafeStringInputBuffer> buf(&safe_string_input_buffer);
        buf->Reset(source.location());
-      pre_data = PreParse(buf.value(), extension);
+      pre_data = PreParse(source, buf.value(), extension);
      }

      // Create a script object describing the script to be compiled.
=======================================
--- /branches/bleeding_edge/src/factory.cc      Tue Jul 28 01:43:51 2009
+++ /branches/bleeding_edge/src/factory.cc      Tue Aug 18 00:14:02 2009
@@ -87,8 +87,10 @@
  }


-Handle<String> Factory::NewStringFromTwoByte(Vector<const uc16> string) {
-  CALL_HEAP_FUNCTION(Heap::AllocateStringFromTwoByte(string), String);
+Handle<String> Factory::NewStringFromTwoByte(Vector<const uc16> string,
+                                             PretenureFlag pretenure) {
+  CALL_HEAP_FUNCTION(Heap::AllocateStringFromTwoByte(string, pretenure),
+                     String);
  }


=======================================
--- /branches/bleeding_edge/src/factory.h       Tue Jul 28 01:43:51 2009
+++ /branches/bleeding_edge/src/factory.h       Tue Aug 18 00:14:02 2009
@@ -92,7 +92,8 @@
        Vector<const char> str,
        PretenureFlag pretenure = NOT_TENURED);

-  static Handle<String> NewStringFromTwoByte(Vector<const uc16> str);
+  static Handle<String> NewStringFromTwoByte(Vector<const uc16> str,
+      PretenureFlag pretenure = NOT_TENURED);

    // Allocates and partially initializes a TwoByte String. The characters  
of
    // the string are uninitialized. Currently used in regexp code only,  
where
=======================================
--- /branches/bleeding_edge/src/parser.cc       Thu Jul 30 04:53:29 2009
+++ /branches/bleeding_edge/src/parser.cc       Tue Aug 18 00:14:02 2009
@@ -97,7 +97,7 @@

    // Pre-parse the program from the character stream; returns true on
    // success, false if a stack-overflow happened during parsing.
-  bool PreParseProgram(unibrow::CharacterStream* stream);
+  bool PreParseProgram(Handle<String> source, unibrow::CharacterStream*  
stream);

    void ReportMessage(const char* message, Vector<const char*> args);
    virtual void ReportMessageAt(Scanner::Location loc,
@@ -1167,13 +1167,14 @@
  }


-bool Parser::PreParseProgram(unibrow::CharacterStream* stream) {
+bool Parser::PreParseProgram(Handle<String> source,
+                             unibrow::CharacterStream* stream) {
    HistogramTimerScope timer(&Counters::pre_parse);
    StackGuard guard;
    AssertNoZoneAllocation assert_no_zone_allocation;
    AssertNoAllocation assert_no_allocation;
    NoHandleAllocation no_handle_allocation;
-  scanner_.Init(Handle<String>(), stream, 0);
+  scanner_.Init(source, stream, 0);
    ASSERT(target_stack_ == NULL);
    mode_ = PARSE_EAGERLY;
    DummyScope top_scope;
@@ -4593,7 +4594,8 @@
  }


-ScriptDataImpl* PreParse(unibrow::CharacterStream* stream,
+ScriptDataImpl* PreParse(Handle<String> source,
+                         unibrow::CharacterStream* stream,
                           v8::Extension* extension) {
    Handle<Script> no_script;
    bool allow_natives_syntax =
@@ -4601,7 +4603,7 @@
        FLAG_allow_natives_syntax ||
        Bootstrapper::IsActive();
    PreParser parser(no_script, allow_natives_syntax, extension);
-  if (!parser.PreParseProgram(stream)) return NULL;
+  if (!parser.PreParseProgram(source, stream)) return NULL;
    // The list owns the backing store so we need to clone the vector.
    // That way, the result will be exactly the right size rather than
    // the expected 50% too large.
=======================================
--- /branches/bleeding_edge/src/parser.h        Mon May 25 03:05:56 2009
+++ /branches/bleeding_edge/src/parser.h        Tue Aug 18 00:14:02 2009
@@ -143,7 +143,8 @@
                           ScriptDataImpl* pre_data);


-ScriptDataImpl* PreParse(unibrow::CharacterStream* stream,
+ScriptDataImpl* PreParse(Handle<String> source,
+                         unibrow::CharacterStream* stream,
                           v8::Extension* extension);


=======================================
--- /branches/bleeding_edge/src/scanner.cc      Mon May 25 03:05:56 2009
+++ /branches/bleeding_edge/src/scanner.cc      Tue Aug 18 00:14:02 2009
@@ -92,33 +92,35 @@


  UTF16Buffer::UTF16Buffer()
-  : pos_(0),
-    pushback_buffer_(0),
-    last_(0),
-    stream_(NULL) { }
+    : pos_(0), size_(0) { }


-void UTF16Buffer::Initialize(Handle<String> data,
-                             unibrow::CharacterStream* input) {
+Handle<String> UTF16Buffer::SubString(int start, int end) {
+  return internal::SubString(data_, start, end);
+}
+
+
+// CharacterStreamUTF16Buffer
+CharacterStreamUTF16Buffer::CharacterStreamUTF16Buffer()
+    : pushback_buffer_(0), last_(0), stream_(NULL) { }
+
+
+void CharacterStreamUTF16Buffer::Initialize(Handle<String> data,
+                                            unibrow::CharacterStream*  
input) {
    data_ = data;
    pos_ = 0;
    stream_ = input;
  }


-Handle<String> UTF16Buffer::SubString(int start, int end) {
-  return internal::SubString(data_, start, end);
-}
-
-
-void UTF16Buffer::PushBack(uc32 ch) {
+void CharacterStreamUTF16Buffer::PushBack(uc32 ch) {
    pushback_buffer()->Add(last_);
    last_ = ch;
    pos_--;
  }


-uc32 UTF16Buffer::Advance() {
+uc32 CharacterStreamUTF16Buffer::Advance() {
    // NOTE: It is of importance to Persian / Farsi resources that we do
    // *not* strip format control characters in the scanner; see
    //
@@ -135,7 +137,7 @@
      uc32 next = stream_->GetNext();
      return last_ = next;
    } else {
-    // note: currently the following increment is necessary to avoid a
+    // Note: currently the following increment is necessary to avoid a
      // test-parser problem!
      pos_++;
      return last_ = static_cast<uc32>(-1);
@@ -143,11 +145,51 @@
  }


-void UTF16Buffer::SeekForward(int pos) {
+void CharacterStreamUTF16Buffer::SeekForward(int pos) {
    pos_ = pos;
    ASSERT(pushback_buffer()->is_empty());
    stream_->Seek(pos);
  }
+
+
+// TwoByteStringUTF16Buffer
+TwoByteStringUTF16Buffer::TwoByteStringUTF16Buffer()
+    : raw_data_(NULL) { }
+
+
+void TwoByteStringUTF16Buffer::Initialize(
+     Handle<ExternalTwoByteString> data) {
+  ASSERT(!data.is_null());
+
+  data_ = data;
+  pos_ = 0;
+
+  raw_data_ = data->resource()->data();
+  size_ = data->length();
+}
+
+
+uc32 TwoByteStringUTF16Buffer::Advance() {
+  if (pos_ < size_) {
+    return raw_data_[pos_++];
+  } else {
+    // note: currently the following increment is necessary to avoid a
+    // test-parser problem!
+    pos_++;
+    return static_cast<uc32>(-1);
+  }
+}
+
+
+void TwoByteStringUTF16Buffer::PushBack(uc32 ch) {
+  pos_--;
+  ASSERT(pos_ >= 0 && raw_data_[pos_] == ch);
+}
+
+
+void TwoByteStringUTF16Buffer::SeekForward(int pos) {
+  pos_ = pos;
+}


  //  
----------------------------------------------------------------------------
@@ -161,7 +203,15 @@
  void Scanner::Init(Handle<String> source, unibrow::CharacterStream* stream,
      int position) {
    // Initialize the source buffer.
-  source_.Initialize(source, stream);
+  if (!source.is_null() && StringShape(*source).IsExternalTwoByte()) {
+    two_byte_string_buffer_.Initialize(
+        Handle<ExternalTwoByteString>::cast(source));
+    source_ = &two_byte_string_buffer_;
+  } else {
+    char_stream_buffer_.Initialize(source, stream);
+    source_ = &char_stream_buffer_;
+  }
+
    position_ = position;

    // Reset literals buffer
@@ -180,7 +230,7 @@


  Handle<String> Scanner::SubString(int start, int end) {
-  return source_.SubString(start - position_, end - position_);
+  return source_->SubString(start - position_, end - position_);
  }


@@ -221,17 +271,6 @@
    AddChar(c0_);
    Advance();
  }
-
-
-void Scanner::Advance() {
-  c0_ = source_.Advance();
-}
-
-
-void Scanner::PushBack(uc32 ch) {
-  source_.PushBack(ch);
-  c0_ = ch;
-}


  static inline bool IsByteOrderMark(uc32 c) {
@@ -583,7 +622,7 @@


  void Scanner::SeekForward(int pos) {
-  source_.SeekForward(pos - 1);
+  source_->SeekForward(pos - 1);
    Advance();
    Scan();
  }
=======================================
--- /branches/bleeding_edge/src/scanner.h       Mon May 25 03:05:56 2009
+++ /branches/bleeding_edge/src/scanner.h       Tue Aug 18 00:14:02 2009
@@ -73,27 +73,56 @@
  class UTF16Buffer {
   public:
    UTF16Buffer();
-
-  void Initialize(Handle<String> data, unibrow::CharacterStream* stream);
-  void PushBack(uc32 ch);
-  uc32 Advance();  // returns a value < 0 when the buffer end is reached
-  uint16_t CharAt(int index);
+  virtual ~UTF16Buffer() {}
+
+  virtual void PushBack(uc32 ch) = 0;
+  // returns a value < 0 when the buffer end is reached
+  virtual uc32 Advance() = 0;
+  virtual void SeekForward(int pos) = 0;
+
    int pos() const { return pos_; }
    int size() const { return size_; }
    Handle<String> SubString(int start, int end);
-  List<uc32>* pushback_buffer() { return &pushback_buffer_; }
-  void SeekForward(int pos);
-
- private:
+
+ protected:
    Handle<String> data_;
    int pos_;
    int size_;
+};
+
+
+class CharacterStreamUTF16Buffer: public UTF16Buffer {
+ public:
+  CharacterStreamUTF16Buffer();
+  virtual ~CharacterStreamUTF16Buffer() {}
+  void Initialize(Handle<String> data, unibrow::CharacterStream* stream);
+  virtual void PushBack(uc32 ch);
+  virtual uc32 Advance();
+  virtual void SeekForward(int pos);
+
+ private:
    List<uc32> pushback_buffer_;
    uc32 last_;
    unibrow::CharacterStream* stream_;
+
+  List<uc32>* pushback_buffer() { return &pushback_buffer_; }
  };


+class TwoByteStringUTF16Buffer: public UTF16Buffer {
+ public:
+  TwoByteStringUTF16Buffer();
+  virtual ~TwoByteStringUTF16Buffer() {}
+  void Initialize(Handle<ExternalTwoByteString> data);
+  virtual void PushBack(uc32 ch);
+  virtual uc32 Advance();
+  virtual void SeekForward(int pos);
+
+ private:
+  const uint16_t* raw_data_;
+};
+
+
  class Scanner {
   public:

@@ -184,8 +213,11 @@
    static unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace;

   private:
+  CharacterStreamUTF16Buffer char_stream_buffer_;
+  TwoByteStringUTF16Buffer two_byte_string_buffer_;
+
    // Source.
-  UTF16Buffer source_;
+  UTF16Buffer* source_;
    int position_;

    // Buffer to hold literal values (identifiers, strings, numbers)
@@ -219,8 +251,11 @@
    void TerminateLiteral();

    // Low-level scanning support.
-  void Advance();
-  void PushBack(uc32 ch);
+  void Advance() { c0_ = source_->Advance(); }
+  void PushBack(uc32 ch) {
+    source_->PushBack(ch);
+    c0_ = ch;
+  }

    bool SkipWhiteSpace();
    Token::Value SkipSingleLineComment();
@@ -243,7 +278,7 @@

    // Return the current source position.
    int source_pos() {
-    return source_.pos() - kCharacterLookaheadBufferSize + position_;
+    return source_->pos() - kCharacterLookaheadBufferSize + position_;
    }

    // Decodes a unicode escape-sequence which is part of an identifier.

--~--~---------~--~----~------------~-------~--~----~
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
-~----------~----~----~----~------~----~------~--~---

Reply via email to