Revision: 13242
Author: [email protected]
Date: Wed Dec 19 05:27:20 2012
Log: Replace the use CharacterStreams in Heap::AllocateSymbolInternal
and String::ComputeHash
[email protected]
BUG=
Review URL: https://chromiumcodereview.appspot.com/11593007
Patch from Dan Carney <[email protected]>.
http://code.google.com/p/v8/source/detail?r=13242
Modified:
/branches/bleeding_edge/src/heap-inl.h
/branches/bleeding_edge/src/heap.cc
/branches/bleeding_edge/src/heap.h
/branches/bleeding_edge/src/json-parser.h
/branches/bleeding_edge/src/objects-inl.h
/branches/bleeding_edge/src/objects.cc
/branches/bleeding_edge/src/objects.h
/branches/bleeding_edge/src/profile-generator.cc
/branches/bleeding_edge/src/unicode.h
/branches/bleeding_edge/test/cctest/test-strings.cc
=======================================
--- /branches/bleeding_edge/src/heap-inl.h Tue Dec 11 02:14:01 2012
+++ /branches/bleeding_edge/src/heap-inl.h Wed Dec 19 05:27:20 2012
@@ -96,14 +96,36 @@
// Non-ASCII and we need to decode.
return AllocateStringFromUtf8Slow(str, non_ascii_start, pretenure);
}
+
+
+template<>
+bool inline Heap::IsOneByte(Vector<const char> str, int chars) {
+ // TODO(dcarney): incorporate Latin-1 check when Latin-1 is supported?
+ // ASCII only check.
+ return chars == str.length();
+}
+
+
+template<>
+bool inline Heap::IsOneByte(String* str, int chars) {
+ return str->IsOneByteRepresentation();
+}
MaybeObject* Heap::AllocateSymbol(Vector<const char> str,
int chars,
uint32_t hash_field) {
- unibrow::Utf8InputBuffer<> buffer(str.start(),
- static_cast<unsigned>(str.length()));
- return AllocateInternalSymbol(&buffer, chars, hash_field);
+ if (IsOneByte(str, chars)) return AllocateAsciiSymbol(str, hash_field);
+ return AllocateInternalSymbol<false>(str, chars, hash_field);
+}
+
+
+template<typename T>
+MaybeObject* Heap::AllocateInternalSymbol(T t, int chars, uint32_t
hash_field) {
+ if (IsOneByte(t, chars)) {
+ return AllocateInternalSymbol<true>(t, chars, hash_field);
+ }
+ return AllocateInternalSymbol<false>(t, chars, hash_field);
}
=======================================
--- /branches/bleeding_edge/src/heap.cc Mon Dec 17 07:56:16 2012
+++ /branches/bleeding_edge/src/heap.cc Wed Dec 19 05:27:20 2012
@@ -3302,8 +3302,8 @@
MUST_USE_RESULT static inline MaybeObject* MakeOrFindTwoCharacterString(
Heap* heap,
- uint32_t c1,
- uint32_t c2) {
+ uint16_t c1,
+ uint16_t c2) {
String* symbol;
// Numeric strings have a different hash algorithm not known by
// LookupTwoCharsSymbolIfExists, so we skip this step for such strings.
@@ -3352,8 +3352,8 @@
// dictionary. Check whether we already have the string in the symbol
// table to prevent creation of many unneccesary strings.
if (length == 2) {
- unsigned c1 = first->Get(0);
- unsigned c2 = second->Get(0);
+ uint16_t c1 = first->Get(0);
+ uint16_t c2 = second->Get(0);
return MakeOrFindTwoCharacterString(this, c1, c2);
}
@@ -3467,8 +3467,8 @@
// Optimization for 2-byte strings often used as keys in a
decompression
// dictionary. Check whether we already have the string in the symbol
// table to prevent creation of many unneccesary strings.
- unsigned c1 = buffer->Get(start);
- unsigned c2 = buffer->Get(start + 1);
+ uint16_t c1 = buffer->Get(start);
+ uint16_t c2 = buffer->Get(start + 1);
return MakeOrFindTwoCharacterString(this, c1, c2);
}
@@ -4624,27 +4624,88 @@
}
-MaybeObject* Heap::AllocateInternalSymbol(unibrow::CharacterStream* buffer,
- int chars,
- uint32_t hash_field) {
- ASSERT(chars >= 0);
- // Ensure the chars matches the number of characters in the buffer.
- ASSERT(static_cast<unsigned>(chars) == buffer->Utf16Length());
- // Determine whether the string is ASCII.
- bool is_ascii = true;
- while (buffer->has_more()) {
- if (buffer->GetNext() > unibrow::Utf8::kMaxOneByteChar) {
- is_ascii = false;
- break;
+template<typename T>
+class AllocateInternalSymbolHelper {
+ public:
+ static void WriteOneByteData(T t, char* chars, int len);
+ static void WriteTwoByteData(T t, uint16_t* chars, int len);
+ private:
+ DISALLOW_COPY_AND_ASSIGN(AllocateInternalSymbolHelper);
+};
+
+
+template<>
+class AllocateInternalSymbolHelper< Vector<const char> > {
+ public:
+ static inline void WriteOneByteData(Vector<const char> vector,
+ char* chars,
+ int len) {
+ // Only works for ascii.
+ ASSERT(vector.length() == len);
+ memcpy(chars, vector.start(), len);
+ }
+
+ static inline void WriteTwoByteData(Vector<const char> vector,
+ uint16_t* chars,
+ int len) {
+ const uint8_t* stream = reinterpret_cast<const
uint8_t*>(vector.start());
+ unsigned stream_length = vector.length();
+ while (stream_length != 0) {
+ unsigned consumed = 0;
+ uint32_t c = unibrow::Utf8::ValueOf(stream, stream_length,
&consumed);
+ ASSERT(c != unibrow::Utf8::kBadChar);
+ ASSERT(consumed <= stream_length);
+ stream_length -= consumed;
+ stream += consumed;
+ if (c > unibrow::Utf16::kMaxNonSurrogateCharCode) {
+ len -= 2;
+ if (len < 0) break;
+ *chars++ = unibrow::Utf16::LeadSurrogate(c);
+ *chars++ = unibrow::Utf16::TrailSurrogate(c);
+ } else {
+ len -= 1;
+ if (len < 0) break;
+ *chars++ = c;
+ }
}
+ ASSERT(stream_length == 0);
+ ASSERT(len == 0);
}
- buffer->Rewind();
+ private:
+ DISALLOW_COPY_AND_ASSIGN(AllocateInternalSymbolHelper);
+};
+
+
+template<>
+class AllocateInternalSymbolHelper<String*> {
+ public:
+ static inline void WriteOneByteData(String* s, char* chars, int len) {
+ ASSERT(s->length() == len);
+ String::WriteToFlat(s, chars, 0, len);
+ }
+
+ static inline void WriteTwoByteData(String* s, uint16_t* chars, int len)
{
+ ASSERT(s->length() == len);
+ String::WriteToFlat(s, chars, 0, len);
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(AllocateInternalSymbolHelper<String*>);
+};
+
+
+template<bool is_one_byte, typename T>
+MaybeObject* Heap::AllocateInternalSymbol(T t,
+ int chars,
+ uint32_t hash_field) {
+ typedef AllocateInternalSymbolHelper<T> H;
+ ASSERT(chars >= 0);
// Compute map and object size.
int size;
Map* map;
- if (is_ascii) {
+ if (is_one_byte) {
if (chars > SeqOneByteString::kMaxLength) {
return Failure::OutOfMemoryException();
}
@@ -4674,19 +4735,24 @@
ASSERT_EQ(size, answer->Size());
- // Fill in the characters.
- int i = 0;
- while (i < chars) {
- uint32_t character = buffer->GetNext();
- if (character > unibrow::Utf16::kMaxNonSurrogateCharCode) {
- answer->Set(i++, unibrow::Utf16::LeadSurrogate(character));
- answer->Set(i++, unibrow::Utf16::TrailSurrogate(character));
- } else {
- answer->Set(i++, character);
- }
+ if (is_one_byte) {
+ H::WriteOneByteData(t, SeqOneByteString::cast(answer)->GetChars(),
chars);
+ } else {
+ H::WriteTwoByteData(t, SeqTwoByteString::cast(answer)->GetChars(),
chars);
}
return answer;
}
+
+
+// Need explicit instantiations.
+template
+MaybeObject* Heap::AllocateInternalSymbol<true>(String*, int, uint32_t);
+template
+MaybeObject* Heap::AllocateInternalSymbol<false>(String*, int, uint32_t);
+template
+MaybeObject* Heap::AllocateInternalSymbol<false>(Vector<const char>,
+ int,
+ uint32_t);
MaybeObject* Heap::AllocateRawOneByteString(int length,
=======================================
--- /branches/bleeding_edge/src/heap.h Mon Dec 17 07:56:16 2012
+++ /branches/bleeding_edge/src/heap.h Wed Dec 19 05:27:20 2012
@@ -764,12 +764,16 @@
Vector<const uc16> str,
uint32_t hash_field);
- MUST_USE_RESULT MaybeObject* AllocateInternalSymbol(
- unibrow::CharacterStream* buffer, int chars, uint32_t hash_field);
+ template<typename T>
+ static inline bool IsOneByte(T t, int chars);
- MUST_USE_RESULT MaybeObject* AllocateExternalSymbol(
- Vector<const char> str,
- int chars);
+ template<typename T>
+ MUST_USE_RESULT inline MaybeObject* AllocateInternalSymbol(
+ T t, int chars, uint32_t hash_field);
+
+ template<bool is_one_byte, typename T>
+ MUST_USE_RESULT MaybeObject* AllocateInternalSymbol(
+ T t, int chars, uint32_t hash_field);
// Allocates and partially initializes a String. There are two String
// encodings: ASCII and two byte. These functions allocate a string of
the
=======================================
--- /branches/bleeding_edge/src/json-parser.h Mon Dec 17 07:56:16 2012
+++ /branches/bleeding_edge/src/json-parser.h Wed Dec 19 05:27:20 2012
@@ -631,7 +631,17 @@
position_);
}
if (c0 < 0x20) return Handle<String>::null();
- running_hash = StringHasher::AddCharacterCore(running_hash, c0);
+ if (static_cast<uint32_t>(c0) >
+ unibrow::Utf16::kMaxNonSurrogateCharCode) {
+ running_hash =
+ StringHasher::AddCharacterCore(running_hash,
+
unibrow::Utf16::LeadSurrogate(c0));
+ running_hash =
+ StringHasher::AddCharacterCore(running_hash,
+
unibrow::Utf16::TrailSurrogate(c0));
+ } else {
+ running_hash = StringHasher::AddCharacterCore(running_hash, c0);
+ }
position++;
if (position >= source_length_) return Handle<String>::null();
c0 = seq_source_->SeqOneByteStringGet(position);
=======================================
--- /branches/bleeding_edge/src/objects-inl.h Wed Dec 19 02:28:36 2012
+++ /branches/bleeding_edge/src/objects-inl.h Wed Dec 19 05:27:20 2012
@@ -2584,8 +2584,7 @@
case kConsStringTag | kOneByteStringTag:
case kConsStringTag | kTwoByteStringTag:
- string = cons_op.Operate(ConsString::cast(string), &offset, &type,
- &length);
+ string = cons_op.Operate(string, &offset, &type, &length);
if (string == NULL) return;
slice_offset = offset;
ASSERT(length == static_cast<unsigned>(string->length()));
@@ -2775,6 +2774,11 @@
unsigned start) {
return GetChars() + start;
}
+
+
+String* ConsStringNullOp::Operate(String*, unsigned*, int32_t*, unsigned*)
{
+ return NULL;
+}
unsigned ConsStringIteratorOp::OffsetForDepth(unsigned depth) {
@@ -2805,42 +2809,38 @@
}
-void ConsStringIteratorOp::Reset() {
- depth_ = 0;
- maximum_depth_ = 0;
+bool ConsStringIteratorOp::HasMore() {
+ return depth_ != 0;
}
-bool ConsStringIteratorOp::HasMore() {
- return depth_ != 0;
+void ConsStringIteratorOp::Reset() {
+ depth_ = 0;
}
-bool ConsStringIteratorOp::ContinueOperation(ContinueResponse* response) {
+String* ConsStringIteratorOp::ContinueOperation(int32_t* type_out,
+ unsigned* length_out) {
bool blew_stack;
- int32_t type;
- unsigned length;
- String* string = NextLeaf(&blew_stack, &type, &length);
+ String* string = NextLeaf(&blew_stack, type_out, length_out);
// String found.
if (string != NULL) {
- consumed_ += length;
- response->string_ = string;
- response->offset_ = 0;
- response->length_ = length;
- response->type_ = type;
- return true;
+ // Verify output.
+ ASSERT(*length_out == static_cast<unsigned>(string->length()));
+ ASSERT(*type_out == string->map()->instance_type());
+ return string;
}
// Traversal complete.
- if (!blew_stack) return false;
- // Restart search.
- Reset();
- // TODO(dcarney) This is unnecessary.
- // After a reset, we don't need a String::Visit
- response->string_ = root_;
- response->offset_ = consumed_;
- response->length_ = root_length_;
- response->type_ = root_type_;
- return true;
+ if (!blew_stack) return NULL;
+ // Restart search from root.
+ unsigned offset_out;
+ string = Search(&offset_out, type_out, length_out);
+ // Verify output.
+ ASSERT(string == NULL || offset_out == 0);
+ ASSERT(string == NULL ||
+ *length_out == static_cast<unsigned>(string->length()));
+ ASSERT(string == NULL || *type_out == string->map()->instance_type());
+ return string;
}
@@ -2857,18 +2857,24 @@
end_(NULL),
op_(op) {
op->Reset();
- String::Visit(string,
- offset, *this, *op, string->map()->instance_type(),
string->length());
+ int32_t type = string->map()->instance_type();
+ unsigned length = string->length();
+ String::Visit(string, offset, *this, *op, type, length);
}
bool StringCharacterStream::HasMore() {
if (buffer8_ != end_) return true;
if (!op_->HasMore()) return false;
- ConsStringIteratorOp::ContinueResponse response;
- if (!op_->ContinueOperation(&response)) return false;
- String::Visit(response.string_,
- response.offset_, *this, *op_, response.type_, response.length_);
+ unsigned length;
+ int32_t type;
+ String* string = op_->ContinueOperation(&type, &length);
+ if (string == NULL) return false;
+ ASSERT(!string->IsConsString());
+ ASSERT(string->length() != 0);
+ ConsStringNullOp null_op;
+ String::Visit(string, 0, *this, null_op, type, length);
+ ASSERT(buffer8_ != end_);
return true;
}
@@ -5138,7 +5144,7 @@
}
-uint32_t StringHasher::AddCharacterCore(uint32_t running_hash, uint32_t c)
{
+uint32_t StringHasher::AddCharacterCore(uint32_t running_hash, uint16_t c)
{
running_hash += c;
running_hash += (running_hash << 10);
running_hash ^= (running_hash >> 6);
@@ -5157,66 +5163,62 @@
}
-void StringHasher::AddCharacter(uint32_t c) {
- if (c > unibrow::Utf16::kMaxNonSurrogateCharCode) {
- AddSurrogatePair(c); // Not inlined.
- return;
- }
+void StringHasher::AddCharacter(uint16_t c) {
// Use the Jenkins one-at-a-time hash function to update the hash
// for the given character.
raw_running_hash_ = AddCharacterCore(raw_running_hash_, c);
- // Incremental array index computation.
- if (is_array_index_) {
- if (c < '0' || c > '9') {
- is_array_index_ = false;
- } else {
- int d = c - '0';
- if (is_first_char_) {
- is_first_char_ = false;
- if (c == '0' && length_ > 1) {
- is_array_index_ = false;
- return;
- }
- }
- if (array_index_ > 429496729U - ((d + 2) >> 3)) {
- is_array_index_ = false;
- } else {
- array_index_ = array_index_ * 10 + d;
- }
- }
- }
}
-void StringHasher::AddCharacterNoIndex(uint32_t c) {
- ASSERT(!is_array_index());
- if (c > unibrow::Utf16::kMaxNonSurrogateCharCode) {
- AddSurrogatePairNoIndex(c); // Not inlined.
- return;
+bool StringHasher::UpdateIndex(uint16_t c) {
+ ASSERT(is_array_index_);
+ if (c < '0' || c > '9') {
+ is_array_index_ = false;
+ return false;
}
- raw_running_hash_ = AddCharacterCore(raw_running_hash_, c);
+ int d = c - '0';
+ if (is_first_char_) {
+ is_first_char_ = false;
+ if (c == '0' && length_ > 1) {
+ is_array_index_ = false;
+ return false;
+ }
+ }
+ if (array_index_ > 429496729U - ((d + 2) >> 3)) {
+ is_array_index_ = false;
+ return false;
+ }
+ array_index_ = array_index_ * 10 + d;
+ return true;
}
-uint32_t StringHasher::GetHash() {
- // Get the calculated raw hash value and do some more bit ops to
distribute
- // the hash further. Ensure that we never return zero as the hash value.
- return GetHashCore(raw_running_hash_);
+template<typename Char>
+inline void StringHasher::AddCharacters(const Char* chars, int length) {
+ ASSERT(sizeof(Char) == 1 || sizeof(Char) == 2);
+ int i = 0;
+ if (is_array_index_) {
+ for (; i < length; i++) {
+ AddCharacter(chars[i]);
+ if (!UpdateIndex(chars[i])) {
+ i++;
+ break;
+ }
+ }
+ }
+ for (; i < length; i++) {
+ ASSERT(!is_array_index_);
+ AddCharacter(chars[i]);
+ }
}
template <typename schar>
-uint32_t HashSequentialString(const schar* chars, int length, uint32_t
seed) {
+uint32_t StringHasher::HashSequentialString(const schar* chars,
+ int length,
+ uint32_t seed) {
StringHasher hasher(length, seed);
- if (!hasher.has_trivial_hash()) {
- int i;
- for (i = 0; hasher.is_array_index() && (i < length); i++) {
- hasher.AddCharacter(chars[i]);
- }
- for (; i < length; i++) {
- hasher.AddCharacterNoIndex(chars[i]);
- }
- }
+ if (!hasher.has_trivial_hash()) hasher.AddCharacters(chars, length);
return hasher.GetHashField();
}
=======================================
--- /branches/bleeding_edge/src/objects.cc Tue Dec 18 08:25:45 2012
+++ /branches/bleeding_edge/src/objects.cc Wed Dec 19 05:27:20 2012
@@ -7029,24 +7029,36 @@
}
-String* ConsStringIteratorOp::Operate(ConsString* cons_string,
- unsigned* offset_out, int32_t* type_out, unsigned* length_out) {
- ASSERT(*length_out == (unsigned)cons_string->length());
- ASSERT(depth_ == 0);
- // Push the root string.
- PushLeft(cons_string);
+String* ConsStringIteratorOp::Operate(String* string,
+ unsigned* offset_out,
+ int32_t* type_out,
+ unsigned* length_out) {
+ ASSERT(string->IsConsString());
+ ConsString* cons_string = ConsString::cast(string);
+ // Set up search data.
root_ = cons_string;
- root_type_ = *type_out;
- root_length_ = *length_out;
consumed_ = *offset_out;
- unsigned targetOffset = *offset_out;
+ // Now search.
+ return Search(offset_out, type_out, length_out);
+}
+
+
+String* ConsStringIteratorOp::Search(unsigned* offset_out,
+ int32_t* type_out,
+ unsigned* length_out) {
+ ConsString* cons_string = root_;
+ // Reset the stack, pushing the root string.
+ depth_ = 1;
+ maximum_depth_ = 1;
+ frames_[0] = cons_string;
+ const unsigned consumed = consumed_;
unsigned offset = 0;
while (true) {
// Loop until the string is found which contains the target offset.
String* string = cons_string->first();
unsigned length = string->length();
int32_t type;
- if (targetOffset < offset + length) {
+ if (consumed < offset + length) {
// Target offset is in the left branch.
// Keep going if we're still in a ConString.
type = string->map()->instance_type();
@@ -7075,6 +7087,7 @@
// Account for the possibility of an empty right leaf.
// This happens only if we have asked for an offset outside the
string.
if (length == 0) {
+ // Reset depth so future operations will return null immediately.
Reset();
return NULL;
}
@@ -7085,9 +7098,8 @@
}
ASSERT(length != 0);
// Adjust return values and exit.
- unsigned innerOffset = targetOffset - offset;
- consumed_ += length - innerOffset;
- *offset_out = innerOffset;
+ consumed_ = offset + length;
+ *offset_out = consumed - offset;
*type_out = type;
*length_out = length;
return string;
@@ -7097,8 +7109,9 @@
}
-String* ConsStringIteratorOp::NextLeaf(
- bool* blew_stack, int32_t* type_out, unsigned* length_out) {
+String* ConsStringIteratorOp::NextLeaf(bool* blew_stack,
+ int32_t* type_out,
+ unsigned* length_out) {
while (true) {
// Tree traversal complete.
if (depth_ == 0) {
@@ -7122,6 +7135,7 @@
if (length == 0) continue;
*length_out = length;
*type_out = type;
+ consumed_ += length;
return string;
}
cons_string = ConsString::cast(string);
@@ -7134,8 +7148,11 @@
type = string->map()->instance_type();
if ((type & kStringRepresentationMask) != kConsStringTag) {
AdjustMaximumDepth();
+ unsigned length = (unsigned) string->length();
+ ASSERT(length != 0);
+ *length_out = length;
*type_out = type;
- *length_out = string->length();
+ consumed_ += length;
return string;
}
cons_string = ConsString::cast(string);
@@ -7674,28 +7691,62 @@
}
+class IteratingStringHasher: public StringHasher {
+ public:
+ static inline uint32_t Hash(String* string, uint32_t seed) {
+ const unsigned len = static_cast<unsigned>(string->length());
+ IteratingStringHasher hasher(len, seed);
+ if (hasher.has_trivial_hash()) {
+ return hasher.GetHashField();
+ }
+ int32_t type = string->map()->instance_type();
+ ConsStringNullOp null_op;
+ String::Visit(string, 0, hasher, null_op, type, len);
+ // Flat strings terminate immediately.
+ if (hasher.consumed_ == len) {
+ ASSERT(!string->IsConsString());
+ return hasher.GetHashField();
+ }
+ ASSERT(string->IsConsString());
+ // This is a ConsString, iterate across it.
+ ConsStringIteratorOp op;
+ unsigned offset = 0;
+ unsigned leaf_length = len;
+ string = op.Operate(string, &offset, &type, &leaf_length);
+ while (true) {
+ ASSERT(hasher.consumed_ < len);
+ String::Visit(string, 0, hasher, null_op, type, leaf_length);
+ if (hasher.consumed_ == len) break;
+ string = op.ContinueOperation(&type, &leaf_length);
+ // This should be taken care of by the length check.
+ ASSERT(string != NULL);
+ }
+ return hasher.GetHashField();
+ }
+ inline void VisitOneByteString(const uint8_t* chars, unsigned length) {
+ AddCharacters(chars, static_cast<int>(length));
+ consumed_ += length;
+ }
+ inline void VisitTwoByteString(const uint16_t* chars, unsigned length) {
+ AddCharacters(chars, static_cast<int>(length));
+ consumed_ += length;
+ }
+
+ private:
+ inline IteratingStringHasher(int len, uint32_t seed)
+ : StringHasher(len, seed),
+ consumed_(0) {}
+ unsigned consumed_;
+ DISALLOW_COPY_AND_ASSIGN(IteratingStringHasher);
+};
+
+
uint32_t String::ComputeAndSetHash() {
// Should only be called if hash code has not yet been computed.
ASSERT(!HasHashCode());
-
- const int len = length();
-
- // Compute the hash code.
- uint32_t field = 0;
- if (StringShape(this).IsSequentialAscii()) {
- field = HashSequentialString(SeqOneByteString::cast(this)->GetChars(),
- len,
- GetHeap()->HashSeed());
- } else if (StringShape(this).IsSequentialTwoByte()) {
- field = HashSequentialString(SeqTwoByteString::cast(this)->GetChars(),
- len,
- GetHeap()->HashSeed());
- } else {
- StringInputBuffer buffer(this);
- field = ComputeHashField(&buffer, len, GetHeap()->HashSeed());
- }
// Store the hash code in the object.
+ uint32_t field = IteratingStringHasher::Hash(this,
GetHeap()->HashSeed());
set_hash_field(field);
// Check the hash code is there.
@@ -7799,57 +7850,59 @@
}
-void StringHasher::AddSurrogatePair(uc32 c) {
- uint16_t lead = unibrow::Utf16::LeadSurrogate(c);
- AddCharacter(lead);
- uint16_t trail = unibrow::Utf16::TrailSurrogate(c);
- AddCharacter(trail);
-}
-
-
-void StringHasher::AddSurrogatePairNoIndex(uc32 c) {
- uint16_t lead = unibrow::Utf16::LeadSurrogate(c);
- AddCharacterNoIndex(lead);
- uint16_t trail = unibrow::Utf16::TrailSurrogate(c);
- AddCharacterNoIndex(trail);
-}
-
-
uint32_t StringHasher::GetHashField() {
if (length_ <= String::kMaxHashCalcLength) {
- if (is_array_index()) {
- return MakeArrayIndexHash(array_index(), length_);
+ if (is_array_index_) {
+ return MakeArrayIndexHash(array_index_, length_);
}
- return (GetHash() << String::kHashShift) |
String::kIsNotArrayIndexMask;
+ return (GetHashCore(raw_running_hash_) << String::kHashShift) |
+ String::kIsNotArrayIndexMask;
} else {
return (length_ << String::kHashShift) | String::kIsNotArrayIndexMask;
}
}
-uint32_t String::ComputeHashField(unibrow::CharacterStream* buffer,
- int length,
- uint32_t seed) {
+uint32_t StringHasher::ComputeHashField(unibrow::CharacterStream* buffer,
+ int length,
+ uint32_t seed) {
+ typedef unibrow::Utf16 u;
StringHasher hasher(length, seed);
-
// Very long strings have a trivial hash that doesn't inspect the
// string contents.
if (hasher.has_trivial_hash()) {
return hasher.GetHashField();
}
-
// Do the iterative array index computation as long as there is a
// chance this is an array index.
- while (buffer->has_more() && hasher.is_array_index()) {
- hasher.AddCharacter(buffer->GetNext());
+ if (hasher.is_array_index_) {
+ while (buffer->has_more()) {
+ uint32_t c = buffer->GetNext();
+ if (c > u::kMaxNonSurrogateCharCode) {
+ uint16_t c1 = u::LeadSurrogate(c);
+ uint16_t c2 = u::TrailSurrogate(c);
+ hasher.AddCharacter(c1);
+ hasher.AddCharacter(c2);
+ if (!hasher.UpdateIndex(c1)) break;
+ if (!hasher.UpdateIndex(c2)) break;
+ } else {
+ hasher.AddCharacter(c);
+ if (!hasher.UpdateIndex(c)) break;
+ }
+ }
}
-
// Process the remaining characters without updating the array
// index.
while (buffer->has_more()) {
- hasher.AddCharacterNoIndex(buffer->GetNext());
+ ASSERT(!hasher.is_array_index_);
+ uint32_t c = buffer->GetNext();
+ if (c > u::kMaxNonSurrogateCharCode) {
+ hasher.AddCharacter(u::LeadSurrogate(c));
+ hasher.AddCharacter(u::TrailSurrogate(c));
+ } else {
+ hasher.AddCharacter(c);
+ }
}
-
return hasher.GetHashField();
}
@@ -11667,7 +11720,7 @@
unibrow::Utf8InputBuffer<> buffer(string_.start(),
static_cast<unsigned>(string_.length()));
chars_ = buffer.Utf16Length();
- hash_field_ = String::ComputeHashField(&buffer, chars_, seed_);
+ hash_field_ = StringHasher::ComputeHashField(&buffer, chars_, seed_);
uint32_t result = hash_field_ >> String::kHashShift;
ASSERT(result != 0); // Ensure that the hash value of 0 is never
computed.
return result;
@@ -11697,29 +11750,9 @@
: string_(string), hash_field_(0), seed_(seed) { }
uint32_t Hash() {
- StringHasher hasher(string_.length(), seed_);
-
- // Very long strings have a trivial hash that doesn't inspect the
- // string contents.
- if (hasher.has_trivial_hash()) {
- hash_field_ = hasher.GetHashField();
- } else {
- int i = 0;
- // Do the iterative array index computation as long as there is a
- // chance this is an array index.
- while (i < string_.length() && hasher.is_array_index()) {
- hasher.AddCharacter(static_cast<uc32>(string_[i]));
- i++;
- }
-
- // Process the remaining characters without updating the array
- // index.
- while (i < string_.length()) {
- hasher.AddCharacterNoIndex(static_cast<uc32>(string_[i]));
- i++;
- }
- hash_field_ = hasher.GetHashField();
- }
+ hash_field_ = StringHasher::HashSequentialString<Char>(string_.start(),
+
string_.length(),
+ seed_);
uint32_t result = hash_field_ >> String::kHashShift;
ASSERT(result != 0); // Ensure that the hash value of 0 is never
computed.
@@ -11764,32 +11797,9 @@
uint32_t Hash() {
ASSERT(length_ >= 0);
ASSERT(from_ + length_ <= string_->length());
- StringHasher hasher(length_, string_->GetHeap()->HashSeed());
-
- // Very long strings have a trivial hash that doesn't inspect the
- // string contents.
- if (hasher.has_trivial_hash()) {
- hash_field_ = hasher.GetHashField();
- } else {
- int i = 0;
- // Do the iterative array index computation as long as there is a
- // chance this is an array index.
- while (i < length_ && hasher.is_array_index()) {
- hasher.AddCharacter(static_cast<uc32>(
- string_->SeqOneByteStringGet(i + from_)));
- i++;
- }
-
- // Process the remaining characters without updating the array
- // index.
- while (i < length_) {
- hasher.AddCharacterNoIndex(static_cast<uc32>(
- string_->SeqOneByteStringGet(i + from_)));
- i++;
- }
- hash_field_ = hasher.GetHashField();
- }
-
+ char* chars = string_->GetChars() + from_;
+ hash_field_ = StringHasher::HashSequentialString(
+ chars, length_, string_->GetHeap()->HashSeed());
uint32_t result = hash_field_ >> String::kHashShift;
ASSERT(result != 0); // Ensure that the hash value of 0 is never
computed.
return result;
@@ -11864,8 +11874,7 @@
return string_;
}
// Otherwise allocate a new symbol.
- StringInputBuffer buffer(string_);
- return heap->AllocateInternalSymbol(&buffer,
+ return heap->AllocateInternalSymbol(string_,
string_->length(),
string_->hash_field());
}
@@ -12635,7 +12644,7 @@
// algorithm.
class TwoCharHashTableKey : public HashTableKey {
public:
- TwoCharHashTableKey(uint32_t c1, uint32_t c2, uint32_t seed)
+ TwoCharHashTableKey(uint16_t c1, uint16_t c2, uint32_t seed)
: c1_(c1), c2_(c2) {
// Char 1.
uint32_t hash = seed;
@@ -12651,17 +12660,17 @@
hash ^= hash >> 11;
hash += hash << 15;
if ((hash & String::kHashBitMask) == 0) hash = StringHasher::kZeroHash;
+ hash_ = hash;
#ifdef DEBUG
- StringHasher hasher(2, seed);
- hasher.AddCharacter(c1);
- hasher.AddCharacter(c2);
// If this assert fails then we failed to reproduce the two-character
// version of the string hashing algorithm above. One reason could be
// that we were passed two digits as characters, since the hash
// algorithm is different in that case.
- ASSERT_EQ(static_cast<int>(hasher.GetHash()), static_cast<int>(hash));
+ uint16_t chars[2] = {c1, c2};
+ uint32_t check_hash = StringHasher::HashSequentialString(chars, 2,
seed);
+ hash = (hash << String::kHashShift) | String::kIsNotArrayIndexMask;
+ ASSERT_EQ(static_cast<int32_t>(hash),
static_cast<int32_t>(check_hash));
#endif
- hash_ = hash;
}
bool IsMatch(Object* o) {
@@ -12686,8 +12695,8 @@
}
private:
- uint32_t c1_;
- uint32_t c2_;
+ uint16_t c1_;
+ uint16_t c2_;
uint32_t hash_;
};
@@ -12706,8 +12715,8 @@
}
-bool SymbolTable::LookupTwoCharsSymbolIfExists(uint32_t c1,
- uint32_t c2,
+bool SymbolTable::LookupTwoCharsSymbolIfExists(uint16_t c1,
+ uint16_t c2,
String** symbol) {
TwoCharHashTableKey key(c1, c2, GetHeap()->HashSeed());
int entry = FindEntry(&key);
=======================================
--- /branches/bleeding_edge/src/objects.h Wed Dec 19 02:28:36 2012
+++ /branches/bleeding_edge/src/objects.h Wed Dec 19 05:27:20 2012
@@ -3043,7 +3043,7 @@
// true if it is found, assigning the symbol to the given output
// parameter.
bool LookupSymbolIfExists(String* str, String** symbol);
- bool LookupTwoCharsSymbolIfExists(uint32_t c1, uint32_t c2, String**
symbol);
+ bool LookupTwoCharsSymbolIfExists(uint16_t c1, uint16_t c2, String**
symbol);
// Casting.
static inline SymbolTable* cast(Object* obj);
@@ -6929,30 +6929,14 @@
public:
explicit inline StringHasher(int length, uint32_t seed);
- // Returns true if the hash of this string can be computed without
- // looking at the contents.
- inline bool has_trivial_hash();
-
- // Add a character to the hash and update the array index calculation.
- inline void AddCharacter(uint32_t c);
+ template <typename schar>
+ static inline uint32_t HashSequentialString(const schar* chars,
+ int length,
+ uint32_t seed);
- // Adds a character to the hash but does not update the array index
- // calculation. This can only be called when it has been verified
- // that the input is not an array index.
- inline void AddCharacterNoIndex(uint32_t c);
-
- // Add a character above 0xffff as a surrogate pair. These can get into
- // the hasher through the routines that take a UTF-8 string and make a
symbol.
- void AddSurrogatePair(uc32 c);
- void AddSurrogatePairNoIndex(uc32 c);
-
- // Returns the value to store in the hash field of a string with
- // the given length and contents.
- uint32_t GetHashField();
-
- // Returns true if the characters seen so far make up a legal array
- // index.
- bool is_array_index() { return is_array_index_; }
+ static uint32_t ComputeHashField(unibrow::CharacterStream* buffer,
+ int length,
+ uint32_t seed);
// Calculated hash value for a string consisting of 1 to
// String::kMaxArrayIndexSize digits with no leading zeros (except "0").
@@ -6964,51 +6948,36 @@
// use 27 instead.
static const int kZeroHash = 27;
- private:
- uint32_t array_index() {
- ASSERT(is_array_index());
- return array_index_;
- }
-
- inline uint32_t GetHash();
-
// Reusable parts of the hashing algorithm.
- INLINE(static uint32_t AddCharacterCore(uint32_t running_hash, uint32_t
c));
+ INLINE(static uint32_t AddCharacterCore(uint32_t running_hash, uint16_t
c));
INLINE(static uint32_t GetHashCore(uint32_t running_hash));
- int length_;
- uint32_t raw_running_hash_;
- uint32_t array_index_;
- bool is_array_index_;
- bool is_first_char_;
- friend class TwoCharHashTableKey;
+ protected:
+ // Returns the value to store in the hash field of a string with
+ // the given length and contents.
+ uint32_t GetHashField();
+ // Returns true if the hash of this string can be computed without
+ // looking at the contents.
+ inline bool has_trivial_hash();
+ // Adds a block of characters to the hash.
+ template<typename Char>
+ inline void AddCharacters(const Char* chars, int len);
- template <bool seq_ascii> friend class JsonParser;
-};
-
-
-class IncrementalAsciiStringHasher {
- public:
- explicit inline IncrementalAsciiStringHasher(uint32_t seed, char
first_char);
- inline void AddCharacter(uc32 c);
- inline uint32_t GetHash();
+ private:
+ // Add a character to the hash.
+ inline void AddCharacter(uint16_t c);
+ // Update index. Returns true if string is still an index.
+ inline bool UpdateIndex(uint16_t c);
- private:
int length_;
uint32_t raw_running_hash_;
uint32_t array_index_;
bool is_array_index_;
- char first_char_;
+ bool is_first_char_;
+ DISALLOW_COPY_AND_ASSIGN(StringHasher);
};
-// Calculates string hash.
-template <typename schar>
-inline uint32_t HashSequentialString(const schar* chars,
- int length,
- uint32_t seed);
-
-
// The characteristics of a string are stored in its map. Retrieving these
// few bits of information is moderately expensive, involving two memory
// loads where the second is dependent on the first. To improve efficiency
@@ -7227,10 +7196,6 @@
// Returns a hash value used for the property table
inline uint32_t Hash();
- static uint32_t ComputeHashField(unibrow::CharacterStream* buffer,
- int length,
- uint32_t seed);
-
static bool ComputeArrayIndex(unibrow::CharacterStream* buffer,
uint32_t* index,
int length);
@@ -7870,22 +7835,30 @@
};
+// A ConsStringOp that returns null.
+// Useful when the operation to apply on a ConsString
+// requires an expensive data structure.
+class ConsStringNullOp {
+ public:
+ inline ConsStringNullOp() {}
+ static inline String* Operate(String*, unsigned*, int32_t*, unsigned*);
+ private:
+ DISALLOW_COPY_AND_ASSIGN(ConsStringNullOp);
+};
+
+
// This maintains an off-stack representation of the stack frames required
// to traverse a ConsString, allowing an entirely iterative and restartable
// traversal of the entire string
// Note: this class is not GC-safe.
class ConsStringIteratorOp {
public:
- struct ContinueResponse {
- String* string_;
- unsigned offset_;
- unsigned length_;
- int32_t type_;
- };
inline ConsStringIteratorOp() {}
- String* Operate(ConsString* cons_string, unsigned* offset_out,
- int32_t* type_out, unsigned* length_out);
- inline bool ContinueOperation(ContinueResponse* response);
+ String* Operate(String* string,
+ unsigned* offset_out,
+ int32_t* type_out,
+ unsigned* length_out);
+ inline String* ContinueOperation(int32_t* type_out, unsigned*
length_out);
inline void Reset();
inline bool HasMore();
@@ -7902,6 +7875,9 @@
inline void AdjustMaximumDepth();
inline void Pop();
String* NextLeaf(bool* blew_stack, int32_t* type_out, unsigned*
length_out);
+ String* Search(unsigned* offset_out,
+ int32_t* type_out,
+ unsigned* length_out);
unsigned depth_;
unsigned maximum_depth_;
@@ -7910,8 +7886,6 @@
ConsString* frames_[kStackSize];
unsigned consumed_;
ConsString* root_;
- int32_t root_type_;
- unsigned root_length_;
DISALLOW_COPY_AND_ASSIGN(ConsStringIteratorOp);
};
@@ -7919,8 +7893,9 @@
// Note: this class is not GC-safe.
class StringCharacterStream {
public:
- inline StringCharacterStream(
- String* string, unsigned offset, ConsStringIteratorOp* op);
+ inline StringCharacterStream(String* string,
+ unsigned offset,
+ ConsStringIteratorOp* op);
inline uint16_t GetNext();
inline bool HasMore();
inline void Reset(String* string, unsigned offset, ConsStringIteratorOp*
op);
=======================================
--- /branches/bleeding_edge/src/profile-generator.cc Wed Dec 12 01:49:46
2012
+++ /branches/bleeding_edge/src/profile-generator.cc Wed Dec 19 05:27:20
2012
@@ -112,7 +112,7 @@
OS::StrNCpy(dst, src, len);
dst[len] = '\0';
uint32_t hash =
- HashSequentialString(dst.start(), len, HEAP->HashSeed());
+ StringHasher::HashSequentialString(dst.start(), len,
HEAP->HashSeed());
return AddOrDisposeString(dst.start(), hash);
}
@@ -145,7 +145,7 @@
DeleteArray(str.start());
return format;
}
- uint32_t hash = HashSequentialString(
+ uint32_t hash = StringHasher::HashSequentialString(
str.start(), len, HEAP->HashSeed());
return AddOrDisposeString(str.start(), hash);
}
@@ -156,8 +156,8 @@
int length = Min(kMaxNameSize, name->length());
SmartArrayPointer<char> data =
name->ToCString(DISALLOW_NULLS, ROBUST_STRING_TRAVERSAL, 0,
length);
- uint32_t hash =
- HashSequentialString(*data, length, name->GetHeap()->HashSeed());
+ uint32_t hash = StringHasher::HashSequentialString(
+ *data, length, name->GetHeap()->HashSeed());
return AddOrDisposeString(data.Detach(), hash);
}
return "";
@@ -1451,9 +1451,9 @@
SnapshotObjectId HeapObjectsMap::GenerateId(v8::RetainedObjectInfo* info) {
SnapshotObjectId id = static_cast<SnapshotObjectId>(info->GetHash());
const char* label = info->GetLabel();
- id ^= HashSequentialString(label,
- static_cast<int>(strlen(label)),
- HEAP->HashSeed());
+ id ^= StringHasher::HashSequentialString(label,
+ static_cast<int>(strlen(label)),
+ HEAP->HashSeed());
intptr_t element_count = info->GetElementCount();
if (element_count != -1)
id ^= ComputeIntegerHash(static_cast<uint32_t>(element_count),
@@ -2940,9 +2940,10 @@
NativeGroupRetainedObjectInfo* NativeObjectsExplorer::FindOrAddGroupInfo(
const char* label) {
const char* label_copy = collection_->names()->GetCopy(label);
- uint32_t hash = HashSequentialString(label_copy,
-
static_cast<int>(strlen(label_copy)),
- HEAP->HashSeed());
+ uint32_t hash = StringHasher::HashSequentialString(
+ label_copy,
+ static_cast<int>(strlen(label_copy)),
+ HEAP->HashSeed());
HashMap::Entry* entry =
native_groups_.Lookup(const_cast<char*>(label_copy),
hash, true);
if (entry->value == NULL) {
=======================================
--- /branches/bleeding_edge/src/unicode.h Tue Aug 28 02:37:41 2012
+++ /branches/bleeding_edge/src/unicode.h Wed Dec 19 05:27:20 2012
@@ -170,10 +170,6 @@
// that match are coded as a 4 byte UTF-8 sequence.
static const unsigned kBytesSavedByCombiningSurrogates = 2;
static const unsigned kSizeOfUnmatchedSurrogate = 3;
-
- private:
- template <unsigned s> friend class Utf8InputBuffer;
- friend class Test;
static inline uchar ValueOf(const byte* str,
unsigned length,
unsigned* cursor);
=======================================
--- /branches/bleeding_edge/test/cctest/test-strings.cc Fri Dec 14 04:45:28
2012
+++ /branches/bleeding_edge/test/cctest/test-strings.cc Wed Dec 19 05:27:20
2012
@@ -345,37 +345,24 @@
void AccumulateStatsWithOperator(
ConsString* cons_string, ConsStringStats* stats) {
- // Init op.
+ unsigned offset = 0;
+ int32_t type = cons_string->map()->instance_type();
+ unsigned length = static_cast<unsigned>(cons_string->length());
ConsStringIteratorOp op;
- op.Reset();
- // Use response for initial search and on blown stack.
- ConsStringIteratorOp::ContinueResponse response;
- response.string_ = cons_string;
- response.offset_ = 0;
- response.type_ = cons_string->map()->instance_type();
- response.length_ = (uint32_t) cons_string->length();
+ String* string = op.Operate(cons_string, &offset, &type, &length);
+ CHECK(string != NULL);
while (true) {
- String* string = op.Operate(ConsString::cast(response.string_),
- &response.offset_,
- &response.type_,
- &response.length_);
- CHECK(string != NULL);
- while (true) {
- // Accumulate stats.
- stats->leaves_++;
- stats->chars_ += string->length();
- // Check for completion.
- bool keep_going_fast_check = op.HasMore();
- bool keep_going = op.ContinueOperation(&response);
- if (!keep_going) return;
- // Verify no false positives for fast check.
- CHECK(keep_going_fast_check);
- CHECK(response.string_ != NULL);
- // Blew stack. Restart outer loop.
- if (response.string_->IsConsString()) break;
- string = response.string_;
- }
- };
+ ASSERT(!string->IsConsString());
+ // Accumulate stats.
+ stats->leaves_++;
+ stats->chars_ += string->length();
+ // Check for completion.
+ bool keep_going_fast_check = op.HasMore();
+ string = op.ContinueOperation(&type, &length);
+ if (string == NULL) return;
+ // Verify no false positives for fast check.
+ CHECK(keep_going_fast_check);
+ }
}
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev