[v8-dev] [v8] r19849 committed - Move most scanner buffer accesses into scanner....

codesite-noreply Wed, 12 Mar 2014 07:04:32 -0700

Revision: 19849
Author:   [email protected]
Date:     Wed Mar 12 14:03:25 2014 UTC
Log:      Move most scanner buffer accesses into scanner.


[email protected]

BUG=

Review URL: https://codereview.chromium.org/197103002
http://code.google.com/p/v8/source/detail?r=19849

Modified:
 /branches/bleeding_edge/src/parser.cc
 /branches/bleeding_edge/src/parser.h
 /branches/bleeding_edge/src/preparser.cc
 /branches/bleeding_edge/src/preparser.h
 /branches/bleeding_edge/src/scanner.cc
 /branches/bleeding_edge/src/scanner.h
 /branches/bleeding_edge/test/cctest/test-parsing.cc

=======================================
--- /branches/bleeding_edge/src/parser.cc       Wed Mar 12 13:27:32 2014 UTC
+++ /branches/bleeding_edge/src/parser.cc       Wed Mar 12 14:03:25 2014 UTC
@@ -212,13 +212,7 @@
   // count.
   if (symbol_id < 0 ||
       (pre_parse_data_ && symbol_id >= pre_parse_data_->symbol_count())) {
-    if (scanner()->is_literal_ascii()) {
-      return isolate()->factory()->InternalizeOneByteString(
-          Vector<const uint8_t>::cast(scanner()->literal_ascii_string()));
-    } else {
-      return isolate()->factory()->InternalizeTwoByteString(
-          scanner()->literal_utf16_string());
-    }
+    return scanner()->AllocateInternalizedString(isolate_);
   }
   return LookupCachedSymbol(symbol_id);
 }
@@ -233,13 +227,7 @@
   }
   Handle<String> result = symbol_cache_.at(symbol_id);
   if (result.is_null()) {
-    if (scanner()->is_literal_ascii()) {
-      result = isolate()->factory()->InternalizeOneByteString(
-          Vector<const uint8_t>::cast(scanner()->literal_ascii_string()));
-    } else {
-      result = isolate()->factory()->InternalizeTwoByteString(
-          scanner()->literal_utf16_string());
-    }
+    result = scanner()->AllocateInternalizedString(isolate_);
     symbol_cache_.at(symbol_id) = result;
     return result;
   }
@@ -514,13 +502,7 @@

 Handle<String> ParserTraits::NextLiteralString(Scanner* scanner,
                                                PretenureFlag tenured) {
-  if (scanner->is_next_literal_ascii()) {
-    return parser_->isolate_->factory()->NewStringFromAscii(
-        scanner->next_literal_ascii_string(), tenured);
-  } else {
-    return parser_->isolate_->factory()->NewStringFromTwoByte(
-        scanner->next_literal_utf16_string(), tenured);
-  }
+  return scanner->AllocateNextLiteralString(parser_->isolate(), tenured);
 }


@@ -544,11 +526,7 @@
     case Token::FALSE_LITERAL:
       return factory->NewLiteral(isolate_factory->false_value(), pos);
     case Token::NUMBER: {
-      ASSERT(scanner->is_literal_ascii());
-      double value = StringToDouble(parser_->isolate()->unicode_cache(),
-                                    scanner->literal_ascii_string(),
-                                    ALLOW_HEX | ALLOW_OCTAL |

- ALLOW_IMPLICIT_OCTAL |ALLOW_BINARY);

+      double value = scanner->DoubleValue();
       return factory->NewNumberLiteral(value, pos);
     }
     default:
=======================================
--- /branches/bleeding_edge/src/parser.h        Tue Mar 11 16:30:47 2014 UTC
+++ /branches/bleeding_edge/src/parser.h        Wed Mar 12 14:03:25 2014 UTC
@@ -704,16 +704,6 @@

bool CheckInOrOf(bool accept_OF, ForEachStatement::VisitMode*visit_mode);


-  Handle<String> LiteralString(PretenureFlag tenured) {
-    if (scanner()->is_literal_ascii()) {
-      return isolate_->factory()->NewStringFromAscii(
-          scanner()->literal_ascii_string(), tenured);
-    } else {
-      return isolate_->factory()->NewStringFromTwoByte(
-            scanner()->literal_utf16_string(), tenured);
-    }
-  }
-
   // Get odd-ball literals.
   Literal* GetLiteralUndefined(int position);

=======================================
--- /branches/bleeding_edge/src/preparser.cc    Tue Mar 11 16:30:47 2014 UTC
+++ /branches/bleeding_edge/src/preparser.cc    Wed Mar 12 14:03:25 2014 UTC
@@ -91,16 +91,11 @@
   } else if (scanner->current_token() == Token::YIELD) {
     return PreParserIdentifier::Yield();
   }
-  if (scanner->is_literal_ascii()) {
-    // Detect strict-mode poison words.
-    if (scanner->literal_length() == 4 &&
-        !strncmp(scanner->literal_ascii_string().start(), "eval", 4)) {
-      return PreParserIdentifier::Eval();
-    }
-    if (scanner->literal_length() == 9 &&

- !strncmp(scanner->literal_ascii_string().start(), "arguments", 9)){

-      return PreParserIdentifier::Arguments();
-    }
+  if (scanner->UnescapedLiteralMatches("eval", 4)) {
+    return PreParserIdentifier::Eval();
+  }
+  if (scanner->UnescapedLiteralMatches("arguments", 9)) {
+    return PreParserIdentifier::Arguments();
   }
   return PreParserIdentifier::Default();
 }
@@ -108,14 +103,8 @@

 PreParserExpression PreParserTraits::ExpressionFromString(
     int pos, Scanner* scanner, PreParserFactory* factory) {
-  const int kUseStrictLength = 10;
-  const char* kUseStrictChars = "use strict";
   pre_parser_->LogSymbol();
-  if (scanner->is_literal_ascii() &&
-      scanner->literal_length() == kUseStrictLength &&
-      !scanner->literal_contains_escapes() &&
-      !strncmp(scanner->literal_ascii_string().start(), kUseStrictChars,
-               kUseStrictLength)) {
+  if (scanner->UnescapedLiteralMatches("use strict", 10)) {
     return PreParserExpression::UseStrictStringLiteral();
   }
   return PreParserExpression::StringLiteral();
@@ -1176,9 +1165,9 @@
     }

     int prev_value;
-    if (scanner()->is_literal_ascii()) {
-      prev_value =

-duplicate_finder.AddAsciiSymbol(scanner()->literal_ascii_string(), 1);

+    if (scanner()->is_literal_one_byte()) {
+      prev_value = duplicate_finder.AddAsciiSymbol(
+          scanner()->literal_one_byte_string(), 1);
     } else {
       prev_value =

duplicate_finder.AddUtf16Symbol(scanner()->literal_utf16_string(), 1);

@@ -1285,8 +1274,8 @@

 void PreParser::LogSymbol() {
   int identifier_pos = position();
-  if (scanner()->is_literal_ascii()) {

- log_->LogAsciiSymbol(identifier_pos,scanner()->literal_ascii_string());

+  if (scanner()->is_literal_one_byte()) {

+ log_->LogAsciiSymbol(identifier_pos,scanner()->literal_one_byte_string());

   } else {

log_->LogUtf16Symbol(identifier_pos,scanner()->literal_utf16_string());

   }
=======================================
--- /branches/bleeding_edge/src/preparser.h     Tue Mar 11 16:30:47 2014 UTC
+++ /branches/bleeding_edge/src/preparser.h     Wed Mar 12 14:03:25 2014 UTC
@@ -1114,12 +1114,7 @@
                                                   bool* ok) {
   typename Traits::Type::Identifier result = ParseIdentifierName(ok);
   if (!*ok) return Traits::EmptyIdentifier();
-  if (scanner()->is_literal_ascii() &&
-      scanner()->literal_length() == 3) {
-    const char* token = scanner()->literal_ascii_string().start();
-    *is_get = strncmp(token, "get", 3) == 0;
-    *is_set = !*is_get && strncmp(token, "set", 3) == 0;
-  }
+  scanner()->IsGetOrSet(is_get, is_set);
   return result;
 }

@@ -1517,9 +1512,9 @@
     bool* ok) {
   int old;
   if (property == Token::NUMBER) {
-    old = finder_.AddNumber(scanner()->literal_ascii_string(), type);
-  } else if (scanner()->is_literal_ascii()) {
-    old = finder_.AddAsciiSymbol(scanner()->literal_ascii_string(), type);
+    old = finder_.AddNumber(scanner()->literal_one_byte_string(), type);
+  } else if (scanner()->is_literal_one_byte()) {

+ old = finder_.AddAsciiSymbol(scanner()->literal_one_byte_string(),type);

   } else {
     old = finder_.AddUtf16Symbol(scanner()->literal_utf16_string(), type);
   }
=======================================
--- /branches/bleeding_edge/src/scanner.cc      Mon Feb 10 12:43:10 2014 UTC
+++ /branches/bleeding_edge/src/scanner.cc      Wed Mar 12 14:03:25 2014 UTC
@@ -35,6 +35,7 @@
 #include "char-predicates-inl.h"
 #include "conversions-inl.h"
 #include "list-inl.h"
+#include "v8.h"

 namespace v8 {
 namespace internal {
@@ -982,8 +983,8 @@

   literal.Complete();

-  if (next_.literal_chars->is_ascii()) {
-    Vector<const char> chars = next_.literal_chars->ascii_literal();
+  if (next_.literal_chars->is_one_byte()) {
+    Vector<const char> chars = next_.literal_chars->one_byte_literal();
     return KeywordOrIdentifierToken(chars.start(),
                                     chars.length(),
                                     harmony_scoping_,
@@ -1112,6 +1113,49 @@
   next_.location.end_pos = source_pos() - 1;
   return true;
 }
+
+
+Handle<String> Scanner::AllocateLiteralString(Isolate* isolate,
+                                              PretenureFlag tenured) {
+  if (is_literal_one_byte()) {
+    return isolate->factory()->NewStringFromOneByte(
+        Vector<const uint8_t>::cast(literal_one_byte_string()), tenured);
+  } else {
+    return isolate->factory()->NewStringFromTwoByte(
+          literal_utf16_string(), tenured);
+  }
+}
+
+
+Handle<String> Scanner::AllocateNextLiteralString(Isolate* isolate,
+                                                  PretenureFlag tenured) {
+  if (is_next_literal_one_byte()) {
+    return isolate->factory()->NewStringFromOneByte(

+ Vector<const uint8_t>::cast(next_literal_one_byte_string()),tenured);

+  } else {
+    return isolate->factory()->NewStringFromTwoByte(
+          next_literal_utf16_string(), tenured);
+  }
+}
+
+
+Handle<String> Scanner::AllocateInternalizedString(Isolate* isolate) {
+  if (is_literal_one_byte()) {
+    return isolate->factory()->InternalizeOneByteString(
+        Vector<const uint8_t>::cast(literal_one_byte_string()));
+  } else {
+    return isolate->factory()->InternalizeTwoByteString(
+        literal_utf16_string());
+  }
+}
+
+
+double Scanner::DoubleValue() {
+  ASSERT(is_literal_one_byte());
+  return StringToDouble(
+      unicode_cache_, literal_one_byte_string(),
+      ALLOW_HEX | ALLOW_OCTAL | ALLOW_IMPLICIT_OCTAL | ALLOW_BINARY);
+}


 int DuplicateFinder::AddAsciiSymbol(Vector<const char> key, int value) {
@@ -1125,10 +1169,10 @@


 int DuplicateFinder::AddSymbol(Vector<const byte> key,
-                               bool is_ascii,
+                               bool is_one_byte,
                                int value) {
-  uint32_t hash = Hash(key, is_ascii);
-  byte* encoding = BackupKey(key, is_ascii);
+  uint32_t hash = Hash(key, is_one_byte);
+  byte* encoding = BackupKey(key, is_one_byte);
   HashMap::Entry* entry = map_.Lookup(encoding, hash, true);

int old_value =static_cast<int>(reinterpret_cast<intptr_t>(entry->value));

   entry->value =
@@ -1189,11 +1233,11 @@
 }


-uint32_t DuplicateFinder::Hash(Vector<const byte> key, bool is_ascii) {
+uint32_t DuplicateFinder::Hash(Vector<const byte> key, bool is_one_byte) {
   // Primitive hash function, almost identical to the one used
   // for strings (except that it's seeded by the length and ASCII-ness).
   int length = key.length();
-  uint32_t hash = (length << 1) | (is_ascii ? 1 : 0) ;
+  uint32_t hash = (length << 1) | (is_one_byte ? 1 : 0) ;
   for (int i = 0; i < length; i++) {
     uint32_t c = key[i];
     hash = (hash + c) * 1025;
@@ -1211,39 +1255,39 @@
   // was ASCII.
   byte* s1 = reinterpret_cast<byte*>(first);
   byte* s2 = reinterpret_cast<byte*>(second);
-  uint32_t length_ascii_field = 0;
+  uint32_t length_one_byte_field = 0;
   byte c1;
   do {
     c1 = *s1;
     if (c1 != *s2) return false;
-    length_ascii_field = (length_ascii_field << 7) | (c1 & 0x7f);
+    length_one_byte_field = (length_one_byte_field << 7) | (c1 & 0x7f);
     s1++;
     s2++;
   } while ((c1 & 0x80) != 0);
-  int length = static_cast<int>(length_ascii_field >> 1);
+  int length = static_cast<int>(length_one_byte_field >> 1);
   return memcmp(s1, s2, length) == 0;
 }


 byte* DuplicateFinder::BackupKey(Vector<const byte> bytes,
-                                 bool is_ascii) {
-  uint32_t ascii_length = (bytes.length() << 1) | (is_ascii ? 1 : 0);
+                                 bool is_one_byte) {
+  uint32_t one_byte_length = (bytes.length() << 1) | (is_one_byte ? 1 : 0);
   backing_store_.StartSequence();
-  // Emit ascii_length as base-128 encoded number, with the 7th bit set
+  // Emit one_byte_length as base-128 encoded number, with the 7th bit set
   // on the byte of every heptet except the last, least significant, one.
-  if (ascii_length >= (1 << 7)) {
-    if (ascii_length >= (1 << 14)) {
-      if (ascii_length >= (1 << 21)) {
-        if (ascii_length >= (1 << 28)) {

- backing_store_.Add(static_cast<byte>((ascii_length >> 28) |0x80));

+  if (one_byte_length >= (1 << 7)) {
+    if (one_byte_length >= (1 << 14)) {
+      if (one_byte_length >= (1 << 21)) {
+        if (one_byte_length >= (1 << 28)) {

+ backing_store_.Add(static_cast<byte>((one_byte_length >> 28) |0x80));

- backing_store_.Add(static_cast<byte>((ascii_length >> 21) |0x80u));+ backing_store_.Add(static_cast<byte>((one_byte_length >> 21) |0x80u));

       }
-      backing_store_.Add(static_cast<byte>((ascii_length >> 14) | 0x80u));

+ backing_store_.Add(static_cast<byte>((one_byte_length >> 14) |0x80u));

     }
-    backing_store_.Add(static_cast<byte>((ascii_length >> 7) | 0x80u));
+    backing_store_.Add(static_cast<byte>((one_byte_length >> 7) | 0x80u));
   }
-  backing_store_.Add(static_cast<byte>(ascii_length & 0x7f));
+  backing_store_.Add(static_cast<byte>(one_byte_length & 0x7f));

   backing_store_.AddBlock(bytes);
   return backing_store_.EndSequence().start();
=======================================
--- /branches/bleeding_edge/src/scanner.h       Mon Feb 10 12:43:10 2014 UTC
+++ /branches/bleeding_edge/src/scanner.h       Wed Mar 12 14:03:25 2014 UTC
@@ -176,19 +176,19 @@
   int AddNumber(Vector<const char> key, int value);

  private:
-  int AddSymbol(Vector<const byte> key, bool is_ascii, int value);
+  int AddSymbol(Vector<const byte> key, bool is_one_byte, int value);
   // Backs up the key and its length in the backing store.
   // The backup is stored with a base 127 encoding of the
   // length (plus a bit saying whether the string is ASCII),
   // followed by the bytes of the key.
-  byte* BackupKey(Vector<const byte> key, bool is_ascii);
+  byte* BackupKey(Vector<const byte> key, bool is_one_byte);

   // Compare two encoded keys (both pointing into the backing store)
   // for having the same base-127 encoded lengths and ASCII-ness,
   // and then having the same 'length' bytes following.
   static bool Match(void* first, void* second);
   // Creates a hash from a sequence of bytes.
-  static uint32_t Hash(Vector<const byte> key, bool is_ascii);
+  static uint32_t Hash(Vector<const byte> key, bool is_one_byte);
   // Checks whether a string containing a JS number is its canonical
   // form.
   static bool IsNumberCanonical(Vector<const char> key);
@@ -211,7 +211,7 @@

 class LiteralBuffer {
  public:
-  LiteralBuffer() : is_ascii_(true), position_(0), backing_store_() { }
+  LiteralBuffer() : is_one_byte_(true), position_(0), backing_store_() { }

   ~LiteralBuffer() {
     if (backing_store_.length() > 0) {
@@ -221,7 +221,7 @@

   INLINE(void AddChar(uint32_t code_unit)) {
     if (position_ >= backing_store_.length()) ExpandBuffer();
-    if (is_ascii_) {
+    if (is_one_byte_) {
       if (code_unit <= unibrow::Latin1::kMaxChar) {
         backing_store_[position_] = static_cast<byte>(code_unit);
         position_ += kOneByteSize;
@@ -234,35 +234,35 @@
     position_ += kUC16Size;
   }

-  bool is_ascii() { return is_ascii_; }
+  bool is_one_byte() { return is_one_byte_; }

   bool is_contextual_keyword(Vector<const char> keyword) {
-    return is_ascii() && keyword.length() == position_ &&
+    return is_one_byte() && keyword.length() == position_ &&
         (memcmp(keyword.start(), backing_store_.start(), position_) == 0);
   }

   Vector<const uc16> utf16_literal() {
-    ASSERT(!is_ascii_);
+    ASSERT(!is_one_byte_);
     ASSERT((position_ & 0x1) == 0);
     return Vector<const uc16>(
         reinterpret_cast<const uc16*>(backing_store_.start()),
         position_ >> 1);
   }

-  Vector<const char> ascii_literal() {
-    ASSERT(is_ascii_);
+  Vector<const char> one_byte_literal() {
+    ASSERT(is_one_byte_);
     return Vector<const char>(
         reinterpret_cast<const char*>(backing_store_.start()),
         position_);
   }

   int length() {
-    return is_ascii_ ? position_ : (position_ >> 1);
+    return is_one_byte_ ? position_ : (position_ >> 1);
   }

   void Reset() {
     position_ = 0;
-    is_ascii_ = true;
+    is_one_byte_ = true;
   }

  private:
@@ -284,7 +284,7 @@
   }

   void ConvertToUtf16() {
-    ASSERT(is_ascii_);
+    ASSERT(is_one_byte_);
     Vector<byte> new_store;
     int new_content_size = position_ * kUC16Size;
     if (new_content_size >= backing_store_.length()) {
@@ -304,10 +304,10 @@
       backing_store_ = new_store;
     }
     position_ = new_content_size;
-    is_ascii_ = false;
+    is_one_byte_ = false;
   }

-  bool is_ascii_;
+  bool is_one_byte_;
   int position_;
   Vector<byte> backing_store_;

@@ -376,17 +376,17 @@
   // numbers.
   // These functions only give the correct result if the literal
   // was scanned between calls to StartLiteral() and TerminateLiteral().
-  Vector<const char> literal_ascii_string() {
+  Vector<const char> literal_one_byte_string() {
     ASSERT_NOT_NULL(current_.literal_chars);
-    return current_.literal_chars->ascii_literal();
+    return current_.literal_chars->one_byte_literal();
   }
   Vector<const uc16> literal_utf16_string() {
     ASSERT_NOT_NULL(current_.literal_chars);
     return current_.literal_chars->utf16_literal();
   }
-  bool is_literal_ascii() {
+  bool is_literal_one_byte() {
     ASSERT_NOT_NULL(current_.literal_chars);
-    return current_.literal_chars->is_ascii();
+    return current_.literal_chars->is_one_byte();
   }
   bool is_literal_contextual_keyword(Vector<const char> keyword) {
     ASSERT_NOT_NULL(current_.literal_chars);
@@ -416,17 +416,17 @@

   // Returns the literal string for the next token (the token that
   // would be returned if Next() were called).
-  Vector<const char> next_literal_ascii_string() {
+  Vector<const char> next_literal_one_byte_string() {
     ASSERT_NOT_NULL(next_.literal_chars);
-    return next_.literal_chars->ascii_literal();
+    return next_.literal_chars->one_byte_literal();
   }
   Vector<const uc16> next_literal_utf16_string() {
     ASSERT_NOT_NULL(next_.literal_chars);
     return next_.literal_chars->utf16_literal();
   }
-  bool is_next_literal_ascii() {
+  bool is_next_literal_one_byte() {
     ASSERT_NOT_NULL(next_.literal_chars);
-    return next_.literal_chars->is_ascii();
+    return next_.literal_chars->is_one_byte();
   }
   bool is_next_contextual_keyword(Vector<const char> keyword) {
     ASSERT_NOT_NULL(next_.literal_chars);
@@ -436,6 +436,30 @@
     ASSERT_NOT_NULL(next_.literal_chars);
     return next_.literal_chars->length();
   }
+

+ Handle<String> AllocateLiteralString(Isolate* isolate, PretenureFlagtenured);

+  Handle<String> AllocateNextLiteralString(Isolate* isolate,
+                                           PretenureFlag tenured);
+  Handle<String> AllocateInternalizedString(Isolate* isolate);
+
+  double DoubleValue();
+  bool UnescapedLiteralMatches(const char* data, int length) {
+    if (is_literal_one_byte() &&
+        literal_length() == length &&
+        !literal_contains_escapes()) {
+      return !strncmp(literal_one_byte_string().start(), data, length);
+    }
+    return false;
+  }
+  void IsGetOrSet(bool* is_get, bool* is_set) {
+    if (is_literal_one_byte() &&
+        literal_length() == 3 &&
+        !literal_contains_escapes()) {
+      const char* token = literal_one_byte_string().start();
+      *is_get = strncmp(token, "get", 3) == 0;
+      *is_set = !*is_get && strncmp(token, "set", 3) == 0;
+    }
+  }

   UnicodeCache* unicode_cache() { return unicode_cache_; }

=======================================

--- /branches/bleeding_edge/test/cctest/test-parsing.cc Tue Mar 11 16:30:472014 UTC+++ /branches/bleeding_edge/test/cctest/test-parsing.cc Wed Mar 12 14:03:252014 UTC

@@ -802,8 +802,8 @@
   CHECK(start == i::Token::DIV || start == i::Token::ASSIGN_DIV);
   CHECK(scanner.ScanRegExpPattern(start == i::Token::ASSIGN_DIV));
   scanner.Next();  // Current token is now the regexp literal.
-  CHECK(scanner.is_literal_ascii());
-  i::Vector<const char> actual = scanner.literal_ascii_string();
+  CHECK(scanner.is_literal_one_byte());
+  i::Vector<const char> actual = scanner.literal_one_byte_string();
   for (int i = 0; i < actual.length(); i++) {
     CHECK_NE('\0', expected[i]);
     CHECK_EQ(expected[i], actual[i]);

--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev

---You received this message because you are subscribed to the Google Groups "v8-dev" group.

To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/d/optout.

[v8-dev] [v8] r19849 committed - Move most scanner buffer accesses into scanner....

Reply via email to