Author: [EMAIL PROTECTED]
Date: Tue Oct  7 03:10:03 2008
New Revision: 456

Modified:
    branches/bleeding_edge/src/heap.cc
    branches/bleeding_edge/src/heap.h
    branches/bleeding_edge/src/objects-inl.h
    branches/bleeding_edge/src/objects.cc
    branches/bleeding_edge/src/objects.h
    branches/bleeding_edge/src/runtime.cc

Log:
Calculate string hash during flattening and convert flat strings to
symbols.


Modified: branches/bleeding_edge/src/heap.cc
==============================================================================
--- branches/bleeding_edge/src/heap.cc  (original)
+++ branches/bleeding_edge/src/heap.cc  Tue Oct  7 03:10:03 2008
@@ -2251,6 +2251,16 @@
  }


+bool Heap::LookupSymbolIfExists(String* string, String** symbol) {
+  if (string->IsSymbol()) {
+    *symbol = string;
+    return true;
+  }
+  SymbolTable* table = SymbolTable::cast(symbol_table_);
+  return table->LookupSymbolIfExists(string, symbol);
+}
+
+
  #ifdef DEBUG
  void Heap::ZapFromSpace() {
    ASSERT(HAS_HEAP_OBJECT_TAG(kFromSpaceZapValue));

Modified: branches/bleeding_edge/src/heap.h
==============================================================================
--- branches/bleeding_edge/src/heap.h   (original)
+++ branches/bleeding_edge/src/heap.h   Tue Oct  7 03:10:03 2008
@@ -528,6 +528,7 @@
      return LookupSymbol(CStrVector(str));
    }
    static Object* LookupSymbol(String* str);
+  static bool LookupSymbolIfExists(String* str, String** symbol);

    // Compute the matching symbol map for a string if possible.
    // NULL is returned if string is in new space or not flattened.

Modified: branches/bleeding_edge/src/objects-inl.h
==============================================================================
--- branches/bleeding_edge/src/objects-inl.h    (original)
+++ branches/bleeding_edge/src/objects-inl.h    Tue Oct  7 03:10:03 2008
@@ -2088,6 +2088,65 @@
  }


+StringHasher::StringHasher(int length)
+  : length_(length),
+    raw_running_hash_(0),
+    array_index_(0),
+    is_array_index_(0 < length_ && length_ <= String::kMaxArrayIndexSize),
+    is_first_char_(true),
+    is_valid_(true) { }
+
+
+bool StringHasher::has_trivial_hash() {
+  return length_ > String::kMaxMediumStringSize;
+}
+
+
+void StringHasher::AddCharacter(uc32 c) {
+  // Note: the Jenkins one-at-a-time hash function
+  raw_running_hash_ += c;
+  raw_running_hash_ += (raw_running_hash_ << 10);
+  raw_running_hash_ ^= (raw_running_hash_ >> 6);
+  // Incremental array index computation
+  if (is_array_index_) {
+    if (c < '0' || c > '9') {
+      is_array_index_ = false;
+    } else {
+      int d = c - '0';
+      if (is_first_char_) {
+        is_first_char_ = false;
+        if (c == '0' && length_ > 1) {
+          is_array_index_ = false;
+          return;
+        }
+      }
+      if (array_index_ > 429496729U - ((d + 2) >> 3)) {
+        is_array_index_ = false;
+      } else {
+        array_index_ = array_index_ * 10 + d;
+      }
+    }
+  }
+}
+
+
+void StringHasher::AddCharacterNoIndex(uc32 c) {
+  ASSERT(!is_array_index());
+  raw_running_hash_ += c;
+  raw_running_hash_ += (raw_running_hash_ << 10);
+  raw_running_hash_ ^= (raw_running_hash_ >> 6);
+}
+
+
+uint32_t StringHasher::GetHash() {
+  uint32_t result = raw_running_hash_;
+  result += (result << 3);
+  result ^= (result >> 11);
+  result += (result << 15);
+  return result;
+}
+
+
  bool String::AsArrayIndex(uint32_t* index) {
    uint32_t field = length_field();
    if ((field & kHashComputedMask) && !(field & kIsArrayIndexMask)) return  
false;

Modified: branches/bleeding_edge/src/objects.cc
==============================================================================
--- branches/bleeding_edge/src/objects.cc       (original)
+++ branches/bleeding_edge/src/objects.cc       Tue Oct  7 03:10:03 2008
@@ -521,12 +521,23 @@
        // cons string is in old space.  It can never get GCed until there is
        // an old space GC.
        PretenureFlag tenure = Heap::InNewSpace(this) ? NOT_TENURED :  
TENURED;
+      int len = length();
        Object* object = IsAscii() ?
-          Heap::AllocateRawAsciiString(length(), tenure) :
-          Heap::AllocateRawTwoByteString(length(), tenure);
+          Heap::AllocateRawAsciiString(len, tenure) :
+          Heap::AllocateRawTwoByteString(len, tenure);
        if (object->IsFailure()) return object;
        String* result = String::cast(object);
-      Flatten(this, result, 0, length(), 0);
+      StringHasher hasher(len);
+      Flatten(this, result, 0, len, 0, &hasher);
+      if (hasher.is_valid()) {
+#ifdef DEBUG
+        result->ComputeAndSetHash();
+        ASSERT(result->length_field() == hasher.GetHashField());
+#else
+        result->set_length_field(hasher.GetHashField());
+#endif
+        Heap::LookupSymbolIfExists(result, &result);
+      }
        cs->set_first(result);
        cs->set_second(Heap::empty_string());
        return this;
@@ -3606,7 +3617,12 @@
  }


-void String::Flatten(String* src, String* sink, int f, int t, int so) {
+void String::Flatten(String* src,
+                     String* sink,
+                     int f,
+                     int t,
+                     int so,
+                     StringHasher* hasher) {
    String* source = src;
    int from = f;
    int to = t;
@@ -3621,7 +3637,10 @@
          buffer->Reset(from, source);
          int j = sink_offset;
          for (int i = from; i < to; i++) {
-          sink->Set(j++, buffer->GetNext());
+          uc32 c = buffer->GetNext();
+          if (hasher->is_valid())
+            hasher->AddCharacter(c);
+          sink->Set(j++, c);
          }
          return;
        }
@@ -3637,10 +3656,10 @@
          ConsString* cons_string = ConsString::cast(source);
          String* first = String::cast(cons_string->first());
          int boundary = first->length();
-        if (to - boundary > boundary - from) {
+        if (to - boundary >= boundary - from) {
            // Right hand side is longer.  Recurse over left.
            if (from < boundary) {
-            Flatten(first, sink, from, boundary, sink_offset);
+            Flatten(first, sink, from, boundary, sink_offset, hasher);
              sink_offset += boundary - from;
              from = 0;
            } else {
@@ -3649,14 +3668,18 @@
            to -= boundary;
            source = String::cast(cons_string->second());
          } else {
-          // Left hand side is longer.  Recurse over right.
+          // Left hand side is longer.  Recurse over right.  The hasher
+          // needs us to visit the string from left to right so doing
+          // this invalidates that hash.
+          hasher->invalidate();
            if (to > boundary) {
              String* second = String::cast(cons_string->second());
              Flatten(second,
                      sink,
                      0,
                      to - boundary,
-                    sink_offset + boundary - from);
+                    sink_offset + boundary - from,
+                    hasher);
              to = boundary;
            }
            source = first;
@@ -3812,39 +3835,45 @@
  }


+uint32_t StringHasher::GetHashField() {
+  ASSERT(is_valid());
+  if (length_ <= String::kMaxShortStringSize) {
+    uint32_t payload;
+    if (is_array_index()) {
+      payload = v8::internal::HashField(array_index(), true);
+    } else {
+      payload = v8::internal::HashField(GetHash(), false);
+    }
+    return (payload & 0x00FFFFFF) | (length_ << String::kShortLengthShift);
+  } else if (length_ <= String::kMaxMediumStringSize) {
+    uint32_t payload = v8::internal::HashField(GetHash(), false);
+    return (payload & 0x0000FFFF) | (length_ <<  
String::kMediumLengthShift);
+  } else {
+    return v8::internal::HashField(length_, false);
+  }
+}
+
+
  uint32_t String::ComputeLengthAndHashField(unibrow::CharacterStream*  
buffer,
                                             int length) {
-  // Large string (please note large strings cannot be an array index).
-  if (length > kMaxMediumStringSize) return HashField(length, false);
+  StringHasher hasher(length);

-  // Note: the Jenkins one-at-a-time hash function
-  uint32_t hash = 0;
-  while (buffer->has_more()) {
-    uc32 r = buffer->GetNext();
-    hash += r;
-    hash += (hash << 10);
-    hash ^= (hash >> 6);
-  }
-  hash += (hash << 3);
-  hash ^= (hash >> 11);
-  hash += (hash << 15);
-
-  // Short string.
-  if (length <= kMaxShortStringSize) {
-    // Make hash value consistent with value returned from String::Hash.
-    buffer->Rewind();
-    uint32_t index;
-    hash = HashField(hash, ComputeArrayIndex(buffer, &index, length));
-    hash = (hash & 0x00FFFFFF) | (length << kShortLengthShift);
-    return hash;
-  }
+  // Very long strings have a trivial hash that doesn't inspect the
+  // string contents.
+  if (hasher.has_trivial_hash())
+    return hasher.GetHashField();
+
+  // Do the iterative array index computation as long as there is a
+  // chance this is an array index.
+  while (buffer->has_more() && hasher.is_array_index())
+    hasher.AddCharacter(buffer->GetNext());
+
+  // Process the remaining characters without updating the array
+  // index.
+  while (buffer->has_more())
+    hasher.AddCharacterNoIndex(buffer->GetNext());

-  // Medium string (please note medium strings cannot be an array index).
-  ASSERT(length <= kMaxMediumStringSize);
-  // Make hash value consistent with value returned from String::Hash.
-  hash = HashField(hash, false);
-  hash = (hash & 0x0000FFFF) | (length << kMediumLengthShift);
-  return hash;
+  return hasher.GetHashField();
  }


@@ -5627,6 +5656,20 @@
  Object* SymbolTable::LookupString(String* string, Object** s) {
    SymbolKey key(string);
    return LookupKey(&key, s);
+}
+
+
+bool SymbolTable::LookupSymbolIfExists(String* string, String** symbol) {
+  SymbolKey key(string);
+  int entry = FindEntry(&key);
+  if (entry == -1) {
+    return false;
+  } else {
+    String* result = String::cast(KeyAt(entry));
+    ASSERT(result->is_symbol());
+    *symbol = result;
+    return true;
+  }
  }



Modified: branches/bleeding_edge/src/objects.h
==============================================================================
--- branches/bleeding_edge/src/objects.h        (original)
+++ branches/bleeding_edge/src/objects.h        Tue Oct  7 03:10:03 2008
@@ -1815,6 +1815,11 @@
    Object* LookupSymbol(Vector<const char> str, Object** s);
    Object* LookupString(String* key, Object** s);

+  // Looks up a symbol that is equal to the given string and returns
+  // true if it is found, assigning the symbol to the given output
+  // parameter.
+  bool LookupSymbolIfExists(String* str, String** symbol);
+
    // Casting.
    static inline SymbolTable* cast(Object* obj);

@@ -2839,6 +2844,52 @@
  enum RobustnessFlag {ROBUST_STRING_TRAVERSAL, FAST_STRING_TRAVERSAL};


+class StringHasher {
+ public:
+  inline StringHasher(int length);
+
+  // Returns true if the hash of this string can be computed without
+  // looking at the contents.
+  inline bool has_trivial_hash();
+
+  // Add a character to the hash and update the array index calculation.
+  inline void AddCharacter(uc32 c);
+
+  // Adds a character to the hash but does not update the array index
+  // calculation.  This can only be called when it has been verified
+  // that the input is not an array index.
+  inline void AddCharacterNoIndex(uc32 c);
+
+  // Returns the value to store in the hash field of a string with
+  // the given length and contents.
+  uint32_t GetHashField();
+
+  // Returns true if the characters seen so far make up a legal array
+  // index.
+  bool is_array_index() { return is_array_index_; }
+
+  bool is_valid() { return is_valid_; }
+
+  void invalidate() { is_valid_ = false; }
+
+ private:
+
+  uint32_t array_index() {
+    ASSERT(is_array_index());
+    return array_index_;
+  }
+
+  inline uint32_t GetHash();
+
+  int length_;
+  uint32_t raw_running_hash_;
+  uint32_t array_index_;
+  bool is_array_index_;
+  bool is_first_char_;
+  bool is_valid_;
+};
+
+
  // The String abstract class captures JavaScript string values:
  //
  // Ecma-262:
@@ -2980,6 +3031,8 @@
    static const int kMaxShortStringSize = 255;
    static const int kMaxMediumStringSize = 65535;

+  static const int kMaxArrayIndexSize = 10;
+
    // Max ascii char code.
    static const int kMaxAsciiCharCode = 127;

@@ -3024,7 +3077,8 @@
                        String* sink,
                        int from,
                        int to,
-                      int sink_offset);
+                      int sink_offset,
+                      StringHasher* hasher);

   protected:
    class ReadBlockBuffer {

Modified: branches/bleeding_edge/src/runtime.cc
==============================================================================
--- branches/bleeding_edge/src/runtime.cc       (original)
+++ branches/bleeding_edge/src/runtime.cc       Tue Oct  7 03:10:03 2008
@@ -2380,21 +2380,24 @@
    if (object->IsFailure()) return object;

    String* answer = String::cast(object);
+  StringHasher hasher(length);
    for (int i = 0; i < array_length; i++) {
      Object* element = fixed_array->get(i);
      if (element->IsSmi()) {
        int len = Smi::cast(element)->value();
        int pos = len >> 11;
        len &= 0x7ff;
-      String::Flatten(special, answer, pos, pos + len, position);
+      String::Flatten(special, answer, pos, pos + len, position, &hasher);
        position += len;
      } else {
        String* string = String::cast(element);
        int element_length = string->length();
-      String::Flatten(string, answer, 0, element_length, position);
+      String::Flatten(string, answer, 0, element_length, position,  
&hasher);
        position += element_length;
      }
    }
+  if (hasher.is_valid())
+    answer->set_length_field(hasher.GetHashField());
    return answer;
  }


--~--~---------~--~----~------------~-------~--~----~
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
-~----------~----~----~----~------~----~------~--~---

Reply via email to