[v8-dev] [v8] r19678 committed - Experimental parser: make utf8 sort of work...

codesite-noreply Wed, 05 Mar 2014 06:16:40 -0800

Revision: 19678
Author:   [email protected]
Date:     Wed Mar  5 14:15:39 2014 UTC
Log:      Experimental parser: make utf8 sort of work


[email protected]

BUG=

Review URL: https://codereview.chromium.org/187603004
http://code.google.com/p/v8/source/detail?r=19678

Modified:
 /branches/experimental/parser/src/lexer/lexer.cc
 /branches/experimental/parser/src/lexer/lexer.h

=======================================

--- /branches/experimental/parser/src/lexer/lexer.cc Wed Mar 5 08:32:502014 UTC+++ /branches/experimental/parser/src/lexer/lexer.cc Wed Mar 5 14:15:392014 UTC

@@ -139,10 +139,10 @@

 LexerBase::LexerBase(UnicodeCache* unicode_cache)
     : unicode_cache_(unicode_cache),
+      current_literal_(&literals_[0]),
+      next_literal_(&literals_[1]),
       has_line_terminator_before_next_(true),
       has_multiline_comment_before_next_(false),
-      current_literal_(&literals_[0]),
-      next_literal_(&literals_[1]),
       harmony_numeric_literals_(false),
       harmony_modules_(false),
       harmony_scoping_(false) {
@@ -170,14 +170,13 @@
     : LexerBase(unicode_cache),
       isolate_(NULL),
       source_ptr_(source_ptr),
-      start_position_(0),
       end_position_(length),
-      buffer_(NULL),
-      buffer_end_(NULL),
-      start_(NULL),
-      cursor_(NULL),
+      buffer_(source_ptr),
+      buffer_end_(source_ptr + length),
+      start_(source_ptr),
+      cursor_(source_ptr),
       last_octal_end_(NULL) {
-  CHECK(false);  // not yet supported
+  current_.beg_pos = current_.end_pos = next_.beg_pos = next_.end_pos = 0;
 }


@@ -190,20 +189,16 @@
       isolate_(source->GetIsolate()),
       source_handle_(FlattenGetString(source)),
       source_ptr_(NULL),
-      start_position_(start_position),
       end_position_(end_position),
       buffer_(NULL),
       buffer_end_(NULL),
       start_(NULL),
       cursor_(NULL),
       last_octal_end_(NULL) {
+  cursor_ +=  start_position;
   UpdateBufferBasedOnHandle();
+  isolate_->lexer_gc_handler()->AddLexer(this);
   current_.beg_pos = current_.end_pos = next_.beg_pos = next_.end_pos = 0;
-  isolate_->lexer_gc_handler()->AddLexer(this);
-  // TODO(dcarney): move this to UpdateBufferBasedOnHandle
-  cursor_ = buffer_ + start_position;
-  buffer_end_ = buffer_ + end_position;
-  start_ = cursor_;
 }


@@ -215,16 +210,19 @@
 }


+// TODO(dcarney): utf8 handling
 template<typename Char>
 void Lexer<Char>::SeekForward(int pos) {
+  // TODO(dcarney): utf8 handling
   cursor_ = buffer_ + pos;
   start_ = cursor_;
   has_line_terminator_before_next_ = false;
   has_multiline_comment_before_next_ = false;
-  Scan();  // Fills in next_.
+  Scan();
 }


+// TODO(dcarney): utf8 handling
 template<typename Char>
 bool Lexer<Char>::ScanRegExpPattern(bool seen_equal) {
   // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags
@@ -269,6 +267,7 @@
 }


+// TODO(dcarney): utf8 handling
 template<typename Char>
 bool Lexer<Char>::ScanRegExpFlags() {
   next_.beg_pos = cursor_ - buffer_;
@@ -302,7 +301,7 @@


 template<typename Char>
-const Char* Lexer<Char>::ScanHexNumber(
+static const Char* ScanHexNumber(
     const Char* cursor, const Char* end, uc32* result) {
   uc32 x = 0;
   for ( ; cursor < end; ++cursor) {
@@ -321,7 +320,7 @@
 // Octal escapes of the forms '\0xx' and '\xxx' are not a part of
 // ECMA-262. Other JS VMs support them.
 template<typename Char>
-const Char* Lexer<Char>::ScanOctalEscape(
+static const Char* ScanOctalEscape(
     const Char* start, const Char* end, uc32* result) {
   uc32 x = *result - '0';
   const Char* cursor;
@@ -337,6 +336,7 @@
 }


+// TODO(dcarney): utf8 handling
 template<typename Char>
 bool Lexer<Char>::ScanLiteralUnicodeEscape() {
   ASSERT(cursor_ < buffer_end_);
@@ -359,7 +359,7 @@


 template<typename Char>
-const Char* Lexer<Char>::ScanIdentifierUnicodeEscape(
+static const Char* ScanIdentifierUnicodeEscape(
     const Char* cursor, const Char* end, uc32* result) {
   ASSERT(*cursor == '\\');
   if (++cursor >= end) return NULL;
@@ -372,14 +372,16 @@


 template<typename Char>
-const Char* Lexer<Char>::ScanEscape(
-    const Char* cursor, const Char* end, LiteralBuffer* literal) {
+static const Char* ScanEscape(UnicodeCache* cache,
+                              const Char* cursor,
+                              const Char* end,
+                              LiteralBuffer* literal) {
   ASSERT(*cursor == '\\');
   if (++cursor >= end) return NULL;
   uc32 c = *cursor;
   if (++cursor > end) return NULL;
   // Skip escaped newlines.
-  if (unicode_cache_->IsLineTerminator(c)) {
+  if (cache->IsLineTerminator(c)) {
     uc32 peek = *cursor;
     // Allow CR+LF newlines in multiline string literals.
     if (IsCarriageReturn(c) && IsLineFeed(peek)) cursor++;
@@ -432,14 +434,14 @@

 template<typename Char>
 LexerBase::Location Lexer<Char>::octal_position() const {
-  if (!last_octal_end_)
-    return Location::invalid();
+  if (!last_octal_end_) return Location::invalid();

// The last octal might be an octal escape or an octal number. Whicheverit// is, we'll find the start by just scanning back until we hit anon-octal

   // character.
   const Char* temp_cursor = last_octal_end_ - 1;

- while (temp_cursor >= buffer_ && *temp_cursor >= '0' && *temp_cursor<= '7')+ while (temp_cursor >= buffer_ && *temp_cursor >= '0' && *temp_cursor<= '7') {

     --temp_cursor;
+  }
   return Location(temp_cursor - buffer_ + 1, last_octal_end_ - buffer_);
 }

@@ -477,102 +479,153 @@
     int cursor_offset = cursor_ - buffer_;
     int last_octal_end_offset = last_octal_end_ - buffer_;
     buffer_ = new_buffer;
-    buffer_end_ = buffer_ + source_handle_->length();
+    buffer_end_ = buffer_ + end_position_;
     start_ = buffer_ + start_offset;
     cursor_ = buffer_ + cursor_offset;
     if (last_octal_end_ != NULL) {
       last_octal_end_ = buffer_ + last_octal_end_offset;
     }
-    ResetLiterals();
+    current_literal_->Invalidate();
+    next_literal_->Invalidate();
+  }
+}
+
+
+void LexerBase::LiteralDesc::SetOneByteString(
+    Vector<const uint8_t> string, bool owned) {
+  is_in_buffer_ = false;
+  if (is_one_byte_string_owned_) {
+    one_byte_string_.Dispose();
+  }
+  is_one_byte_string_owned_ = owned;
+  is_one_byte_ = true;
+  one_byte_string_ = string;
+}
+
+

+void LexerBase::LiteralDesc::SetTwoByteString(Vector<const uint16_t>string) {

+  is_in_buffer_ = false;
+  is_one_byte_ = false;
+  two_byte_string_ = string;
+}
+
+
+void LexerBase::LiteralDesc::SetStringFromLiteralBuffer() {
+  is_one_byte_ = buffer.is_ascii();
+  is_in_buffer_ = true;
+  length = buffer.length();
+  if (is_one_byte_) {
+    if (is_one_byte_string_owned_) {
+      one_byte_string_.Dispose();
+    }
+    is_one_byte_string_owned_ = false;
+    one_byte_string_ = Vector<const uint8_t>::cast(buffer.ascii_literal());
+  } else {
+    two_byte_string_ = buffer.utf16_literal();
+  }
+}
+
+
+static inline bool IsOneByte(const uint8_t* cursor, const uint8_t* end) {
+  return true;
+}
+
+
+static inline bool IsOneByte(const uint16_t* cursor, const uint16_t* end) {
+  uint16_t acc = 0;
+  while (cursor != end) {
+    acc |= *cursor++ >> 8;
   }
+  return acc == 0;
 }


-template<>
-bool Lexer<uint8_t>::IsSubstringOfSource(const TokenDesc& token) {
-  return !token.has_escapes;
+static inline bool IsOneByte(const int8_t* cursor, const int8_t* end) {
+  int8_t acc = 0;
+  while (cursor != end) {
+    acc |= *cursor++ >> 7;
+  }
+  return acc == 0;
 }


+template<>
 template<>
-bool Lexer<uint16_t>::IsSubstringOfSource(
-    const TokenDesc& token) {
-  if (token.has_escapes) return false;
-  const uint16_t* start = buffer_ + token.beg_pos;
-  const uint16_t* end = buffer_ + token.end_pos;
-  for (const uint16_t* cursor = start; cursor != end; ++cursor) {
-    if (*cursor >= unibrow::Latin1::kMaxChar) return true;
+inline void Lexer<uint16_t>::SetLiteral<true>(const uint16_t* cursor,
+                                              const uint16_t* end,
+                                              LiteralDesc* literal) {
+  Vector<uint8_t> vector = Vector<uint8_t>::New(literal->length);
+  uint8_t* data = vector.start();
+  while (cursor < end) {
+    *data++ = *cursor++;
   }
-  return false;
+  literal->SetOneByteString(Vector<const uint8_t>::cast(vector), true);
 }


+template<>
 template<>
-bool Lexer<int8_t>::IsSubstringOfSource(const TokenDesc& token) {
-  // FIXME: implement.
-  UNREACHABLE();
-  return false;
+inline void Lexer<uint16_t>::SetLiteral<false>(const uint16_t* start,
+                                        const uint16_t* end,
+                                        LiteralDesc* literal) {

+ literal->SetTwoByteString(Vector<const uint16_t>(start,literal->length));

 }


+template<>
 template<>
-bool Lexer<uint8_t>::FillLiteral(
-    const TokenDesc& token, LiteralDesc* literal) {
-  literal->beg_pos = token.beg_pos;
-  const uint8_t* start = buffer_ + token.beg_pos;
-  const uint8_t* end = buffer_ + token.end_pos;
-  if (token.token == Token::STRING) {
-    ++start;
-    --end;
-  }
-  if (IsSubstringOfSource(token)) {
-    literal->is_one_byte = true;
-    literal->is_in_buffer = false;
-    literal->offset = start - buffer_;
-    literal->length = end - start;

- literal->one_byte_string = Vector<const uint8_t>(start,literal->length);

-    return true;
-  }
-  return CopyToLiteralBuffer(start, end, token, literal);
+inline void Lexer<uint8_t>::SetLiteral<true>(const uint8_t* start,
+                                             const uint8_t* end,
+                                             LiteralDesc* literal) {
+  literal->SetOneByteString(
+      Vector<const uint8_t>(start, literal->length), false);
 }


+template<>
 template<>
-bool Lexer<uint16_t>::FillLiteral(
-    const TokenDesc& token, LiteralDesc* literal) {
+inline void Lexer<int8_t>::SetLiteral<true>(const int8_t* start,
+                                            const int8_t* end,
+                                            LiteralDesc* literal) {
+  const uint8_t* cast = reinterpret_cast<const uint8_t*>(start);
+  literal->SetOneByteString(
+      Vector<const uint8_t>(cast, literal->length), false);
+}
+
+
+template<class Char>

+bool Lexer<Char>::FillLiteral(const TokenDesc& token, LiteralDesc*literal) {

   literal->beg_pos = token.beg_pos;
-  const uint16_t* start = buffer_ + token.beg_pos;
-  const uint16_t* end = buffer_ + token.end_pos;
+  const Char* start = buffer_ + token.beg_pos;
+  const Char* end = buffer_ + token.end_pos;
   if (token.token == Token::STRING) {
     ++start;
     --end;
   }
-  if (IsSubstringOfSource(token)) {
-    literal->is_one_byte = false;
-    literal->is_in_buffer = false;
-    literal->offset = start - buffer_;
-    literal->length = end - start;

- literal->two_byte_string = Vector<const uint16_t>(start,literal->length);

-    return true;
+  if (!token.has_escapes) {
+    bool is_one_byte = IsOneByte(start, end);
+    if (sizeof(Char) == 2 || is_one_byte) {
+      literal->offset = start - buffer_;
+      literal->length = end - start;
+      if (sizeof(Char) == 1) {
+        SetLiteral<true>(start, end, literal);
+      } else if (is_one_byte) {
+        SetLiteral<true>(start, end, literal);
+      } else {
+        SetLiteral<false>(start, end, literal);
+      }
+      return true;
+    }
   }
   return CopyToLiteralBuffer(start, end, token, literal);
 }
-
-
-template<>
-bool Lexer<int8_t>::FillLiteral(
-    const TokenDesc& token, LiteralDesc* literal) {
-  // FIXME: implement.
-  UNREACHABLE();
-  return false;
-}


 template<class Char>
 bool Lexer<Char>::CopyToLiteralBuffer(const Char* start,
-                                                    const Char* end,
-                                                    const TokenDesc& token,
-                                                    LiteralDesc* literal) {
+                                      const Char* end,
+                                      const TokenDesc& token,
+                                      LiteralDesc* literal) {
   literal->buffer.Reset();
   if (token.has_escapes) {
     for (const Char* cursor = start; cursor != end;) {
@@ -585,25 +638,19 @@
         if (cursor == NULL) return false;
         literal->buffer.AddChar(c);
       } else {
-        cursor = ScanEscape(cursor, end, &literal->buffer);
+        cursor = ScanEscape(unicode_cache_, cursor, end, &literal->buffer);
         ASSERT(cursor != NULL);
         if (cursor == NULL) return false;
       }
     }
   } else {
+    // TODO(dcarney): This can only happen for utf8 strings
+    // use a helper function.
     for (const Char* cursor = start; cursor != end;) {
         literal->buffer.AddChar(*cursor++);
     }
   }
-  literal->is_one_byte = literal->buffer.is_ascii();
-  literal->is_in_buffer = true;
-  literal->length = literal->buffer.length();
-  if (literal->is_one_byte) {
-    literal->one_byte_string =
-        Vector<const uint8_t>::cast(literal->buffer.ascii_literal());
-  } else {
-    literal->two_byte_string = literal->buffer.utf16_literal();
-  }
+  literal->SetStringFromLiteralBuffer();
   return true;
 }

@@ -611,72 +658,78 @@
 template<class Char>
 Handle<String> Lexer<Char>::InternalizeLiteral(
     LiteralDesc* literal) {
-  Factory* factory = isolate_->factory();
-  if (literal->is_in_buffer) {
-    return literal->is_one_byte
-        ? factory->InternalizeOneByteString(
-            Vector<const uint8_t>::cast(literal->one_byte_string))
-        : factory->InternalizeTwoByteString(literal->two_byte_string);
-  }
-  if (sizeof(Char) == 1) {
-    SubStringKey<uint8_t> key(
-        source_handle_, literal->offset, literal->length);
-    return factory->InternalizeStringWithKey(&key);
-  } else {
-    SubStringKey<uint16_t> key(
-        source_handle_, literal->offset, literal->length);
-    return factory->InternalizeStringWithKey(&key);
-  }
+  // Factory* factory = isolate_->factory();
+  // if (literal->is_in_buffer) {
+  //   return literal->is_one_byte
+  //       ? factory->InternalizeOneByteString(
+  //           Vector<const uint8_t>::cast(literal->one_byte_string))
+  //       : factory->InternalizeTwoByteString(literal->two_byte_string);
+  // }
+  // if (sizeof(Char) == 1) {
+  //   SubStringKey<uint8_t> key(
+  //       source_handle_, literal->offset, literal->length);
+  //   return factory->InternalizeStringWithKey(&key);
+  // } else {
+  //   SubStringKey<uint16_t> key(
+  //       source_handle_, literal->offset, literal->length);
+  //   return factory->InternalizeStringWithKey(&key);
+  // }
+  CHECK(false);
+  return Handle<String>();
 }


 template<>
 Handle<String> Lexer<uint8_t>::AllocateLiteral(
     LiteralDesc* literal, PretenureFlag pretenured) {
-  Factory* factory = isolate_->factory();
-  if (literal->is_in_buffer) {
-    return literal->is_one_byte

- ? factory->NewStringFromOneByte(literal->one_byte_string,pretenured)- : factory->NewStringFromTwoByte(literal->two_byte_string,pretenured);

-  }
-  int from = literal->offset;
-  int length = literal->length;
-  // Save the offset and the length before allocating the string as it may
-  // cause a GC, invalidate the literal, and move the source.
-  Handle<String> result = factory->NewRawOneByteString(length, pretenured);
-  uint8_t* chars = SeqOneByteString::cast(*result)->GetChars();
-  String::WriteToFlat(*source_handle_, chars, from, from + length);
-  return result;
+  // Factory* factory = isolate_->factory();
+  // if (literal->is_in_buffer) {
+  //   return literal->is_one_byte

+ // ? factory->NewStringFromOneByte(literal->one_byte_string,pretenured)+ // : factory->NewStringFromTwoByte(literal->two_byte_string,pretenured)

+  // }
+  // int from = literal->offset;
+  // int length = literal->length;

+ // // Save the offset and the length before allocating the string as itmay

+  // // cause a GC, invalidate the literal, and move the source.

+ // Handle<String> result = factory->NewRawOneByteString(length,pretenured);

+  // uint8_t* chars = SeqOneByteString::cast(*result)->GetChars();
+  // String::WriteToFlat(*source_handle_, chars, from, from + length);
+  // return result;
+  CHECK(false);
+  return Handle<String>();
 }


 template<>
 Handle<String> Lexer<uint16_t>::AllocateLiteral(
     LiteralDesc* literal, PretenureFlag pretenured) {
-  Factory* factory = isolate_->factory();
-  if (literal->is_in_buffer) {
-    return literal->is_one_byte

- ? factory->NewStringFromOneByte(literal->one_byte_string,pretenured)- : factory->NewStringFromTwoByte(literal->two_byte_string,pretenured);

-  }
-  // Save the offset and the length before allocating the string as it may
-  // cause a GC, invalidate the literal, and move the source.
-  int from = literal->offset;
-  int length = literal->length;
-  Handle<String> result = factory->NewRawTwoByteString(length, pretenured);
-  uint16_t* chars = SeqTwoByteString::cast(*result)->GetChars();
-  String::WriteToFlat(*source_handle_, chars, from, from + length);
-  return result;
+  // Factory* factory = isolate_->factory();
+  // if (literal->is_in_buffer) {
+  //   return literal->is_one_byte

+ // ? factory->NewStringFromOneByte(literal->one_byte_string,pretenured)+ // : factory->NewStringFromTwoByte(literal->two_byte_string,pretenured)

+  // }

+ // // Save the offset and the length before allocating the string as itmay

+  // // cause a GC, invalidate the literal, and move the source.
+  // int from = literal->offset;
+  // int length = literal->length;

+ // Handle<String> result = factory->NewRawTwoByteString(length,pretenured);

+  // uint16_t* chars = SeqTwoByteString::cast(*result)->GetChars();
+  // String::WriteToFlat(*source_handle_, chars, from, from + length);
+  // return result;
+  CHECK(false);
+  return Handle<String>();
 }


 template<>
 Handle<String> Lexer<int8_t>::AllocateLiteral(
     LiteralDesc* literal, PretenureFlag pretenured) {
-  // FIXME: implement
-  UNREACHABLE();
+  CHECK(false);
   return Handle<String>();
 }
+

 template class Lexer<uint8_t>;
 template class Lexer<uint16_t>;
=======================================

--- /branches/experimental/parser/src/lexer/lexer.h Wed Mar 5 08:32:502014 UTC+++ /branches/experimental/parser/src/lexer/lexer.h Wed Mar 5 14:15:392014 UTC

@@ -57,10 +57,7 @@
     Location(int b, int e) : beg_pos(b), end_pos(e) { }
     Location() : beg_pos(0), end_pos(0) { }

-    bool IsValid() const {
-      return beg_pos >= 0 && end_pos >= beg_pos;
-    }
-
+    bool IsValid() const { return beg_pos >= 0 && end_pos >= beg_pos; }
     static Location invalid() { return Location(-1, -1); }

     int beg_pos;
@@ -120,12 +117,12 @@

   Vector<const uint8_t> literal_one_byte_string() {
     EnsureCurrentLiteralIsValid();
-    return current_literal_->one_byte_string;
+    return current_literal_->one_byte_string();
   }

   Vector<const uint16_t> literal_two_byte_string() {
     EnsureCurrentLiteralIsValid();
-    return current_literal_->two_byte_string;
+    return current_literal_->two_byte_string();
   }

   int literal_length() {
@@ -135,7 +132,7 @@

   bool is_literal_one_byte() {
     EnsureCurrentLiteralIsValid();
-    return current_literal_->is_one_byte;
+    return current_literal_->is_one_byte();
   }

   bool is_literal_contextual_keyword(Vector<const uint8_t> keyword) {
@@ -151,12 +148,12 @@

   Vector<const uint8_t> next_literal_one_byte_string() {
     EnsureNextLiteralIsValid();
-    return next_literal_->one_byte_string;
+    return next_literal_->one_byte_string();
   }

   Vector<const uint16_t> next_literal_two_byte_string() {
     EnsureNextLiteralIsValid();
-    return next_literal_->two_byte_string;
+    return next_literal_->two_byte_string();
   }

   int next_literal_length() {
@@ -166,7 +163,7 @@

   bool is_next_literal_one_byte() {
     EnsureNextLiteralIsValid();
-    return next_literal_->is_one_byte;
+    return next_literal_->is_one_byte();
   }

   bool is_next_contextual_keyword(Vector<const uint8_t> keyword) {
@@ -202,42 +199,72 @@

   UnicodeCache* unicode_cache() { return unicode_cache_; }

+  class LiteralDesc {
+   public:
+    LiteralDesc()
+        : beg_pos(-1),
+          offset(0),
+          length(0),
+          is_one_byte_(false),
+          is_in_buffer_(false),

+ is_one_byte_string_owned_(false) // TODO(dcarney): move tobuffer

+    { }
+
+    ~LiteralDesc() {
+      if (is_one_byte_string_owned_) {
+        one_byte_string_.Dispose();
+      }
+    }
+
+    inline bool is_one_byte() { return is_one_byte_; }
+    inline Vector<const uint8_t> one_byte_string() {
+      ASSERT(is_one_byte_);
+      return one_byte_string_;
+    }
+    inline  Vector<const uint16_t> two_byte_string() {
+      ASSERT(!is_one_byte_);
+      return two_byte_string_;
+    }
+
+    inline bool Valid(int pos) { return beg_pos == pos; }
+    inline void Invalidate() { if (is_in_buffer_) beg_pos = -1; }
+
+    // TODO(dcarney): make private as well.
+    int beg_pos;
+    int offset;
+    int length;
+    LiteralBuffer buffer;
+
+    void SetOneByteString(Vector<const uint8_t> string, bool owned);
+    void SetTwoByteString(Vector<const uint16_t> string);
+    void SetStringFromLiteralBuffer();
+
+   private:
+    bool is_one_byte_;
+    bool is_in_buffer_;
+    bool is_one_byte_string_owned_;
+    Vector<const uint8_t> one_byte_string_;
+    Vector<const uint16_t> two_byte_string_;
+
+    DISALLOW_COPY_AND_ASSIGN(LiteralDesc);
+  };
+
  protected:
   struct TokenDesc {
-    Token::Value token;
     int beg_pos;
     int end_pos;
+    Token::Value token;
     bool has_escapes;
     bool is_onebyte;
   };

-  struct LiteralDesc {
-    int beg_pos;
-    bool is_one_byte;
-    bool is_in_buffer;
-    int offset;
-    int length;
-    Vector<const uint8_t> one_byte_string;
-    Vector<const uint16_t> two_byte_string;
-    LiteralBuffer buffer;
-    LiteralDesc() : beg_pos(-1), is_one_byte(false), is_in_buffer(false),
-                    offset(0), length(0) { }
-    bool Valid(int pos) { return beg_pos == pos; }
-  };
-
   virtual void Scan() = 0;
-
   virtual void UpdateBufferBasedOnHandle() = 0;

virtual bool FillLiteral(const TokenDesc& token, LiteralDesc* literal) =0;

   virtual Handle<String> InternalizeLiteral(LiteralDesc* literal) = 0;
   virtual Handle<String> AllocateLiteral(LiteralDesc* literal,
                                          PretenureFlag tenured) = 0;

-  void ResetLiterals() {
-    if (!current_literal_->is_in_buffer) current_literal_->beg_pos = -1;
-    if (!next_literal_->is_in_buffer) next_literal_->beg_pos = -1;
-  }
-
   void EnsureCurrentLiteralIsValid() {
     if (!current_literal_->Valid(current_.beg_pos)) {
       FillLiteral(current_, current_literal_);
@@ -251,19 +278,18 @@
   }

   UnicodeCache* unicode_cache_;
+  LiteralDesc* current_literal_;
+  LiteralDesc* next_literal_;
+  LiteralDesc literals_[2];
+
+  TokenDesc current_;  // desc for current token (as returned by Next())
+  TokenDesc next_;     // desc for next token (one token look-ahead)

+  // TODO(dcarney): encode flags in uint8_t
   bool has_line_terminator_before_next_;

// Whether there is a multiline comment *with a line break* before thenext

   // token.
   bool has_multiline_comment_before_next_;
-
-  TokenDesc current_;  // desc for current token (as returned by Next())
-  TokenDesc next_;     // desc for next token (one token look-ahead)
-
-  LiteralDesc* current_literal_;
-  LiteralDesc* next_literal_;
-  LiteralDesc literals_[2];
-
   bool harmony_numeric_literals_;
   bool harmony_modules_;
   bool harmony_scoping_;
@@ -292,6 +318,11 @@
  protected:
   virtual void Scan();

+ private:
+  uc32 ScanHexNumber(int length);
+
+  bool ScanLiteralUnicodeEscape();
+
   const Char* GetNewBufferBasedOnHandle() const;
   virtual void UpdateBufferBasedOnHandle();

@@ -300,27 +331,10 @@
   virtual Handle<String> AllocateLiteral(LiteralDesc* literal,
                                          PretenureFlag tenured);

- private:
-  uc32 ScanHexNumber(int length);
-
-  bool ScanLiteralUnicodeEscape();
-
-  const Char* ScanHexNumber(const Char* start,
-                            const Char* end,
-                            uc32* result);
-  const Char* ScanOctalEscape(const Char* start,
-                              const Char* end,
-                              uc32* result);
-  const Char* ScanIdentifierUnicodeEscape(const Char* start,
-                                          const Char* end,
-                                          uc32* result);
-  const Char* ScanEscape(const Char* start,
-                         const Char* end,
-                         LiteralBuffer* literal);
-
-  // Returns true if the literal of the token can be represented as a
-  // substring of the source.
-  bool IsSubstringOfSource(const TokenDesc& token);
+  // Helper function for FillLiteral.
+  template<bool is_one_byte>
+  static void SetLiteral(
+      const Char* start, const Char* end, LiteralDesc* literal);

   bool CopyToLiteralBuffer(const Char* start,
                            const Char* end,
@@ -332,7 +346,6 @@
   Isolate* isolate_;
   const Handle<String> source_handle_;
   const Char* const source_ptr_;
-  const int start_position_;
   const int end_position_;
   // Stream variables.
   const Char* buffer_;

--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev

---You received this message because you are subscribed to the Google Groups "v8-dev" group.

To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.

[v8-dev] [v8] r19678 committed - Experimental parser: make utf8 sort of work...

Reply via email to