Revision: 13189
Author:   [email protected]
Date:     Tue Dec 11 02:22:15 2012
Log:      Cleanup StringCharacterStream and add initial test cases.

BUG=

Review URL: https://chromiumcodereview.appspot.com/11438046
Patch from Dan Carney <[email protected]>.
http://code.google.com/p/v8/source/detail?r=13189

Modified:
 /branches/bleeding_edge/src/objects-inl.h
 /branches/bleeding_edge/src/objects.cc
 /branches/bleeding_edge/src/objects.h
 /branches/bleeding_edge/test/cctest/test-strings.cc

=======================================
--- /branches/bleeding_edge/src/objects-inl.h   Mon Dec 10 04:18:54 2012
+++ /branches/bleeding_edge/src/objects-inl.h   Tue Dec 11 02:22:15 2012
@@ -2521,14 +2521,12 @@
     String* string,
     unsigned offset,
     Visitor& visitor,
-    ConsOp& consOp,
+    ConsOp& cons_op,
     int32_t type,
     unsigned length) {
-
   ASSERT(length == static_cast<unsigned>(string->length()));
   ASSERT(offset <= length);
-
-  unsigned sliceOffset = offset;
+  unsigned slice_offset = offset;
   while (true) {
     ASSERT(type == string->map()->instance_type());

@@ -2536,35 +2534,36 @@
       case kSeqStringTag | kOneByteStringTag:
         visitor.VisitOneByteString(
             reinterpret_cast<const uint8_t*>(
-                SeqOneByteString::cast(string)->GetChars()) + sliceOffset,
+                SeqOneByteString::cast(string)->GetChars()) + slice_offset,
                 length - offset);
         return;

       case kSeqStringTag | kTwoByteStringTag:
         visitor.VisitTwoByteString(
             reinterpret_cast<const uint16_t*>(
-                SeqTwoByteString::cast(string)->GetChars()) + sliceOffset,
+                SeqTwoByteString::cast(string)->GetChars()) + slice_offset,
                 length - offset);
         return;

       case kExternalStringTag | kOneByteStringTag:
         visitor.VisitOneByteString(
             reinterpret_cast<const uint8_t*>(
- ExternalAsciiString::cast(string)->GetChars()) + sliceOffset, + ExternalAsciiString::cast(string)->GetChars()) + slice_offset,
                 length - offset);
         return;

       case kExternalStringTag | kTwoByteStringTag:
         visitor.VisitTwoByteString(
             reinterpret_cast<const uint16_t*>(
- ExternalTwoByteString::cast(string)->GetChars()) + sliceOffset,
+                ExternalTwoByteString::cast(string)->GetChars())
+                    + slice_offset,
                 length - offset);
         return;

       case kSlicedStringTag | kOneByteStringTag:
       case kSlicedStringTag | kTwoByteStringTag: {
         SlicedString* slicedString = SlicedString::cast(string);
-        sliceOffset += slicedString->offset();
+        slice_offset += slicedString->offset();
         string = slicedString->parent();
         type = string->map()->instance_type();
         continue;
@@ -2572,10 +2571,10 @@

       case kConsStringTag | kOneByteStringTag:
       case kConsStringTag | kTwoByteStringTag:
-        string = consOp.Operate(ConsString::cast(string), &offset, &type,
+        string = cons_op.Operate(ConsString::cast(string), &offset, &type,
             &length);
         if (string == NULL) return;
-        sliceOffset = offset;
+        slice_offset = offset;
         ASSERT(length == static_cast<unsigned>(string->length()));
         continue;

@@ -2768,21 +2767,6 @@
 unsigned ConsStringIteratorOp::OffsetForDepth(unsigned depth) {
   return depth & kDepthMask;
 }
-
-
-uint32_t ConsStringIteratorOp::MaskForDepth(unsigned depth) {
-  return 1 << OffsetForDepth(depth);
-}
-
-
-void ConsStringIteratorOp::SetRightDescent() {
-  trace_ |= MaskForDepth(depth_ - 1);
-}
-
-
-void ConsStringIteratorOp::ClearRightDescent() {
-  trace_ &= ~MaskForDepth(depth_ - 1);
-}


 void ConsStringIteratorOp::PushLeft(ConsString* string) {
@@ -2790,14 +2774,9 @@
 }


-void ConsStringIteratorOp::PushRight(ConsString* string, int32_t type) {
-  // Inplace update
+void ConsStringIteratorOp::PushRight(ConsString* string) {
+  // Inplace update.
   frames_[(depth_-1) & kDepthMask] = string;
-  if (depth_ != 1) return;
-  // Optimization: can replace root in this case.
-  root_ = string;
-  root_type_ = type;
-  root_length_ = string->length();
 }


@@ -2814,29 +2793,23 @@


 void ConsStringIteratorOp::Reset() {
-  consumed_ = 0;
-  ResetStack();
+  depth_ = 0;
+  maximum_depth_ = 0;
 }


 bool ConsStringIteratorOp::HasMore() {
   return depth_ != 0;
 }
-
-
-void ConsStringIteratorOp::ResetStack() {
-  depth_ = 0;
-  maximum_depth_ = 0;
-}


 bool ConsStringIteratorOp::ContinueOperation(ContinueResponse* response) {
-  bool blewStack;
+  bool blew_stack;
   int32_t type;
-  String* string = NextLeaf(&blewStack, &type);
+  unsigned length;
+  String* string = NextLeaf(&blew_stack, &type, &length);
   // String found.
   if (string != NULL) {
-    unsigned length = string->length();
     consumed_ += length;
     response->string_ = string;
     response->offset_ = 0;
@@ -2845,9 +2818,11 @@
     return true;
   }
   // Traversal complete.
-  if (!blewStack) return false;
+  if (!blew_stack) return false;
   // Restart search.
-  ResetStack();
+  Reset();
+  // TODO(dcarney) This is unnecessary.
+  // After a reset, we don't need a String::Visit
   response->string_ = root_;
   response->offset_ = consumed_;
   response->length_ = root_length_;
@@ -2857,14 +2832,14 @@


 uint16_t StringCharacterStream::GetNext() {
-  ASSERT(buffer8_ != NULL);
+  ASSERT((buffer8_ == NULL && end_ == NULL) || buffer8_ < end_);
   return is_one_byte_ ? *buffer8_++ : *buffer16_++;
 }


 StringCharacterStream::StringCharacterStream(
     String* string, unsigned offset, ConsStringIteratorOp* op)
-  : is_one_byte_(true),
+  : is_one_byte_(false),
     buffer8_(NULL),
     end_(NULL),
     op_(op) {
@@ -2878,11 +2853,7 @@
   if (buffer8_ != end_) return true;
   if (!op_->HasMore()) return false;
   ConsStringIteratorOp::ContinueResponse response;
-  // This has been checked above
-  if (!op_->ContinueOperation(&response)) {
-    UNREACHABLE();
-    return false;
-  }
+  if (!op_->ContinueOperation(&response)) return false;
   String::Visit(response.string_,
       response.offset_, *this, *op_, response.type_, response.length_);
   return true;
=======================================
--- /branches/bleeding_edge/src/objects.cc      Mon Dec 10 03:09:12 2012
+++ /branches/bleeding_edge/src/objects.cc      Tue Dec 11 02:22:15 2012
@@ -7026,71 +7026,67 @@
 }


-String* ConsStringIteratorOp::Operate(ConsString* consString,
-    unsigned* outerOffset, int32_t* typeOut, unsigned* lengthOut) {
-  ASSERT(*lengthOut == (unsigned)consString->length());
+String* ConsStringIteratorOp::Operate(ConsString* cons_string,
+    unsigned* offset_out, int32_t* type_out, unsigned* length_out) {
+  ASSERT(*length_out == (unsigned)cons_string->length());
+  ASSERT(depth_ == 0);
   // Push the root string.
-  PushLeft(consString);
-  root_ = consString;
-  root_type_ = *typeOut;
-  root_length_ = *lengthOut;
-  unsigned targetOffset = *outerOffset;
+  PushLeft(cons_string);
+  root_ = cons_string;
+  root_type_ = *type_out;
+  root_length_ = *length_out;
+  consumed_ = *offset_out;
+  unsigned targetOffset = *offset_out;
   unsigned offset = 0;
   while (true) {
     // Loop until the string is found which contains the target offset.
-    String* string = consString->first();
+    String* string = cons_string->first();
     unsigned length = string->length();
     int32_t type;
     if (targetOffset < offset + length) {
       // Target offset is in the left branch.
-      // Mark the descent.
-      ClearRightDescent();
       // Keep going if we're still in a ConString.
       type = string->map()->instance_type();
       if ((type & kStringRepresentationMask) == kConsStringTag) {
-        consString = ConsString::cast(string);
-        PushLeft(consString);
+        cons_string = ConsString::cast(string);
+        PushLeft(cons_string);
         continue;
       }
+      // Tell the stack we're done decending.
+      AdjustMaximumDepth();
     } else {
       // Descend right.
       // Update progress through the string.
       offset += length;
       // Keep going if we're still in a ConString.
-      string = consString->second();
+      string = cons_string->second();
       type = string->map()->instance_type();
       if ((type & kStringRepresentationMask) == kConsStringTag) {
-        consString = ConsString::cast(string);
-        PushRight(consString, type);
+        cons_string = ConsString::cast(string);
+        PushRight(cons_string);
+        // TODO(dcarney) Add back root optimization.
         continue;
       }
-      // Mark the descent.
-      SetRightDescent();
       // Need this to be updated for the current string.
       length = string->length();
       // Account for the possibility of an empty right leaf.
-      while (length == 0) {
-        bool blewStack;
-        // Need to adjust maximum depth for NextLeaf to work.
-        AdjustMaximumDepth();
-        string = NextLeaf(&blewStack, &type);
-        if (string == NULL) {
-          // Luckily, this case is impossible.
-          ASSERT(!blewStack);
-          return NULL;
-        }
-        length = string->length();
+ // This happens only if we have asked for an offset outside the string.
+      if (length == 0) {
+        Reset();
+        return NULL;
       }
+      // Tell the stack we're done decending.
+      AdjustMaximumDepth();
+      // Pop stack so next iteration is in correct place.
+      Pop();
     }
-    // Tell the stack we're done decending.
-    AdjustMaximumDepth();
     ASSERT(length != 0);
     // Adjust return values and exit.
     unsigned innerOffset = targetOffset - offset;
     consumed_ += length - innerOffset;
-    *outerOffset = innerOffset;
-    *typeOut = type;
-    *lengthOut = length;
+    *offset_out = innerOffset;
+    *type_out = type;
+    *length_out = length;
     return string;
   }
   UNREACHABLE();
@@ -7098,52 +7094,49 @@
 }


-String* ConsStringIteratorOp::NextLeaf(bool* blewStack, int32_t* typeOut) {
+String* ConsStringIteratorOp::NextLeaf(
+    bool* blew_stack, int32_t* type_out, unsigned* length_out) {
   while (true) {
     // Tree traversal complete.
     if (depth_ == 0) {
-      *blewStack = false;
+      *blew_stack = false;
       return NULL;
     }
     // We've lost track of higher nodes.
     if (maximum_depth_ - depth_ == kStackSize) {
-      *blewStack = true;
+      *blew_stack = true;
       return NULL;
     }
-    // Check if we're done with this level.
-    bool haveAlreadyReadRight = trace_ & MaskForDepth(depth_ - 1);
-    if (haveAlreadyReadRight) {
-      Pop();
-      continue;
-    }
     // Go right.
-    ConsString* consString = frames_[OffsetForDepth(depth_ - 1)];
-    String* string = consString->second();
+    ConsString* cons_string = frames_[OffsetForDepth(depth_ - 1)];
+    String* string = cons_string->second();
     int32_t type = string->map()->instance_type();
     if ((type & kStringRepresentationMask) != kConsStringTag) {
-      // Don't need to mark the descent here.
       // Pop stack so next iteration is in correct place.
       Pop();
-      *typeOut = type;
+      unsigned length = (unsigned) string->length();
+      // Could be a flattened ConsString.
+      if (length == 0) continue;
+      *length_out = length;
+      *type_out = type;
       return string;
     }
-    // No need to mark the descent.
-    consString = ConsString::cast(string);
-    PushRight(consString, type);
+    cons_string = ConsString::cast(string);
+    // TODO(dcarney) Add back root optimization.
+    PushRight(cons_string);
     // Need to traverse all the way left.
     while (true) {
       // Continue left.
-      // Update marker.
-      ClearRightDescent();
-      string = consString->first();
+      string = cons_string->first();
       type = string->map()->instance_type();
       if ((type & kStringRepresentationMask) != kConsStringTag) {
         AdjustMaximumDepth();
-        *typeOut = type;
+        *type_out = type;
+        *length_out = string->length();
         return string;
       }
-      consString = ConsString::cast(string);
-      PushLeft(consString);
+      cons_string = ConsString::cast(string);
+      PushLeft(cons_string);
     }
   }
   UNREACHABLE();
=======================================
--- /branches/bleeding_edge/src/objects.h       Mon Dec 10 04:18:54 2012
+++ /branches/bleeding_edge/src/objects.h       Tue Dec 11 02:22:15 2012
@@ -7514,7 +7514,7 @@
   static inline void Visit(String* string,
                            unsigned offset,
                            Visitor& visitor,
-                           ConsOp& consOp,
+                           ConsOp& cons_op,
                            int32_t type,
                            unsigned length);

@@ -7985,8 +7985,8 @@
     int32_t type_;
   };
   inline ConsStringIteratorOp() {}
-  String* Operate(ConsString* consString, unsigned* outerOffset,
-      int32_t* typeOut, unsigned* lengthOut);
+  String* Operate(ConsString* cons_string, unsigned* offset_out,
+      int32_t* type_out, unsigned* length_out);
   inline bool ContinueOperation(ContinueResponse* response);
   inline void Reset();
   inline bool HasMore();
@@ -7998,20 +7998,17 @@
   static const unsigned kDepthMask = kStackSize-1;
   STATIC_ASSERT(IS_POWER_OF_TWO(kStackSize));
   static inline unsigned OffsetForDepth(unsigned depth);
-  static inline uint32_t MaskForDepth(unsigned depth);

-  inline void ClearRightDescent();
-  inline void SetRightDescent();
   inline void PushLeft(ConsString* string);
-  inline void PushRight(ConsString* string, int32_t type);
+  inline void PushRight(ConsString* string);
   inline void AdjustMaximumDepth();
   inline void Pop();
-  inline void ResetStack();
-  String* NextLeaf(bool* blewStack, int32_t* typeOut);
+ String* NextLeaf(bool* blew_stack, int32_t* type_out, unsigned* length_out);

   unsigned depth_;
   unsigned maximum_depth_;
-  uint32_t trace_;
+  // Stack must always contain only frames for which right traversal
+  // has not yet been performed.
   ConsString* frames_[kStackSize];
   unsigned consumed_;
   ConsString* root_;
=======================================
--- /branches/bleeding_edge/test/cctest/test-strings.cc Tue Sep 11 07:16:56 2012 +++ /branches/bleeding_edge/test/cctest/test-strings.cc Tue Dec 11 02:22:15 2012
@@ -15,17 +15,58 @@
 #include "cctest.h"
 #include "zone-inl.h"

-unsigned int seed = 123;
+// Adapted from http://en.wikipedia.org/wiki/Multiply-with-carry
+class RandomNumberGenerator {
+ public:
+  RandomNumberGenerator() {
+    init();
+  }

-static uint32_t gen() {
-        uint64_t z;
-        z = seed;
-        z *= 279470273;
-        z %= 4294967291U;
-        seed = static_cast<unsigned int>(z);
-        return static_cast<uint32_t>(seed >> 16);
-}
+  void init(uint32_t seed = 0x5688c73e) {
+    static const uint32_t phi = 0x9e3779b9;
+    c = 362436;
+    i = kQSize-1;
+    Q[0] = seed;
+    Q[1] = seed + phi;
+    Q[2] = seed + phi + phi;
+    for (unsigned j = 3; j < kQSize; j++) {
+      Q[j] = Q[j - 3] ^ Q[j - 2] ^ phi ^ j;
+    }
+  }

+  uint32_t next() {
+    uint64_t a = 18782;
+    uint32_t r = 0xfffffffe;
+    i = (i + 1) & (kQSize-1);
+    uint64_t t = a * Q[i] + c;
+    c = (t >> 32);
+    uint32_t x = t + c;
+    if (x < c) {
+      x++;
+      c++;
+    }
+    return (Q[i] = r - x);
+  }
+
+  uint32_t next(int max) {
+    return next() % max;
+  }
+
+  bool next(double threshold) {
+    ASSERT(threshold >= 0.0 && threshold <= 1.0);
+    if (threshold == 1.0) return true;
+    if (threshold == 0.0) return false;
+    uint32_t value = next() % 100000;
+    return threshold > static_cast<double>(value)/100000.0;
+  }
+
+ private:
+  static const uint32_t kQSize = 4096;
+  uint32_t Q[kQSize];
+  uint32_t c;
+  uint32_t i;
+};
+

 using namespace v8::internal;

@@ -44,7 +85,7 @@
 }


-static const int NUMBER_OF_BUILDING_BLOCKS = 128;
+static const int NUMBER_OF_BUILDING_BLOCKS = 256;
 static const int DEEP_DEPTH = 8 * 1024;
 static const int SUPER_DEEP_DEPTH = 80 * 1024;

@@ -79,21 +120,42 @@
 };


-static void InitializeBuildingBlocks(
-    Handle<String> building_blocks[NUMBER_OF_BUILDING_BLOCKS]) {
+static void InitializeBuildingBlocks(Handle<String>* building_blocks,
+                                     int bb_length,
+                                     bool long_blocks,
+                                     RandomNumberGenerator* rng) {
   // A list of pointers that we don't have any interest in cleaning up.
   // If they are reachable from a root then leak detection won't complain.
   Zone* zone = Isolate::Current()->runtime_zone();
-  for (int i = 0; i < NUMBER_OF_BUILDING_BLOCKS; i++) {
-    int len = gen() % 16;
-    if (len > 14) {
+  for (int i = 0; i < bb_length; i++) {
+    int len = rng->next(16);
+    int slice_head_chars = 0;
+    int slice_tail_chars = 0;
+    int slice_depth = 0;
+    for (int j = 0; j < 3; j++) {
+      if (rng->next(0.35)) slice_depth++;
+    }
+    // Must truncate something for a slice string. Loop until
+    // at least one end will be sliced.
+    while (slice_head_chars == 0 && slice_tail_chars == 0) {
+      slice_head_chars = rng->next(15);
+      slice_tail_chars = rng->next(12);
+    }
+    if (long_blocks) {
+      // Generate building blocks which will never be merged
+      len += ConsString::kMinLength + 1;
+    } else if (len > 14) {
       len += 1234;
     }
-    switch (gen() % 4) {
+    // Don't slice 0 length strings.
+    if (len == 0) slice_depth = 0;
+    int slice_length = slice_depth*(slice_head_chars + slice_tail_chars);
+    len += slice_length;
+    switch (rng->next(4)) {
       case 0: {
         uc16 buf[2000];
         for (int j = 0; j < len; j++) {
-          buf[j] = gen() % 65536;
+          buf[j] = rng->next(0x10000);
         }
         building_blocks[i] =
             FACTORY->NewStringFromTwoByte(Vector<const uc16>(buf, len));
@@ -105,7 +167,7 @@
       case 1: {
         char buf[2000];
         for (int j = 0; j < len; j++) {
-          buf[j] = gen() % 128;
+          buf[j] = rng->next(0x80);
         }
         building_blocks[i] =
             FACTORY->NewStringFromAscii(Vector<const char>(buf, len));
@@ -117,7 +179,7 @@
       case 2: {
         uc16* buf = zone->NewArray<uc16>(len);
         for (int j = 0; j < len; j++) {
-          buf[j] = gen() % 65536;
+          buf[j] = rng->next(0x10000);
         }
Resource* resource = new(zone) Resource(Vector<const uc16>(buf, len)); building_blocks[i] = FACTORY->NewExternalStringFromTwoByte(resource);
@@ -127,19 +189,26 @@
         break;
       }
       case 3: {
-        char* buf = NewArray<char>(len);
+        char* buf = zone->NewArray<char>(len);
         for (int j = 0; j < len; j++) {
-          buf[j] = gen() % 128;
+          buf[j] = rng->next(128);
         }
-        building_blocks[i] =
-            FACTORY->NewStringFromAscii(Vector<const char>(buf, len));
+        AsciiResource* resource =
+            new(zone) AsciiResource(Vector<const char>(buf, len));
+        building_blocks[i] = FACTORY->NewExternalStringFromAscii(resource);
         for (int j = 0; j < len; j++) {
           CHECK_EQ(buf[j], building_blocks[i]->Get(j));
         }
-        DeleteArray<char>(buf);
         break;
       }
     }
+    for (int j = slice_depth; j > 0; j--) {
+      building_blocks[i] = FACTORY->NewSubString(
+          building_blocks[i],
+          slice_head_chars,
+          building_blocks[i]->length() - slice_tail_chars);
+    }
+    CHECK(len == building_blocks[i]->length() + slice_length);
   }
 }

@@ -198,18 +267,27 @@


 static StringInputBuffer buffer;
-
+static ConsStringIteratorOp cons_string_iterator_op_1;
+static ConsStringIteratorOp cons_string_iterator_op_2;

 static void Traverse(Handle<String> s1, Handle<String> s2) {
   int i = 0;
   buffer.Reset(*s1);
+ StringCharacterStream character_stream_1(*s1, 0, &cons_string_iterator_op_1); + StringCharacterStream character_stream_2(*s2, 0, &cons_string_iterator_op_2);
   StringInputBuffer buffer2(*s2);
   while (buffer.has_more()) {
     CHECK(buffer2.has_more());
+    CHECK(character_stream_1.HasMore());
+    CHECK(character_stream_2.HasMore());
     uint16_t c = buffer.GetNext();
     CHECK_EQ(c, buffer2.GetNext());
+    CHECK_EQ(c, character_stream_1.GetNext());
+    CHECK_EQ(c, character_stream_2.GetNext());
     i++;
   }
+  CHECK(!character_stream_1.HasMore());
+  CHECK(!character_stream_2.HasMore());
   CHECK_EQ(s1->length(), i);
   CHECK_EQ(s2->length(), i);
 }
@@ -219,10 +297,16 @@
   int i = 0;
   buffer.Reset(*s1);
   StringInputBuffer buffer2(*s2);
+ StringCharacterStream character_stream_1(*s1, 0, &cons_string_iterator_op_1); + StringCharacterStream character_stream_2(*s2, 0, &cons_string_iterator_op_2);
   while (buffer.has_more() && i < chars) {
     CHECK(buffer2.has_more());
+    CHECK(character_stream_1.HasMore());
+    CHECK(character_stream_2.HasMore());
     uint16_t c = buffer.GetNext();
     CHECK_EQ(c, buffer2.GetNext());
+    CHECK_EQ(c, character_stream_1.GetNext());
+    CHECK_EQ(c, character_stream_2.GetNext());
     i++;
   }
   s1->Get(s1->length() - 1);
@@ -236,7 +320,10 @@
   v8::HandleScope scope;
   Handle<String> building_blocks[NUMBER_OF_BUILDING_BLOCKS];
   ZoneScope zone(Isolate::Current()->runtime_zone(), DELETE_ON_EXIT);
-  InitializeBuildingBlocks(building_blocks);
+  RandomNumberGenerator rng;
+  rng.init();
+  InitializeBuildingBlocks(
+      building_blocks, NUMBER_OF_BUILDING_BLOCKS, false, &rng);
   Handle<String> flat = ConstructBalanced(building_blocks);
   FlattenString(flat);
Handle<String> left_asymmetric = ConstructLeft(building_blocks, DEEP_DEPTH);
@@ -273,6 +360,324 @@
   FlattenString(left_deep_asymmetric);
   printf("18\n");
 }
+
+
+class ConsStringStats {
+ public:
+  ConsStringStats() {
+    Reset();
+  }
+  void Reset();
+  void VerifyEqual(const ConsStringStats& that) const;
+  unsigned leaves_;
+  unsigned empty_leaves_;
+  unsigned chars_;
+  unsigned left_traversals_;
+  unsigned right_traversals_;
+ private:
+  DISALLOW_COPY_AND_ASSIGN(ConsStringStats);
+};
+
+
+void ConsStringStats::Reset() {
+  leaves_ = 0;
+  empty_leaves_ = 0;
+  chars_ = 0;
+  left_traversals_ = 0;
+  right_traversals_ = 0;
+}
+
+
+void ConsStringStats::VerifyEqual(const ConsStringStats& that) const {
+  CHECK(this->leaves_ == that.leaves_);
+  CHECK(this->empty_leaves_ == that.empty_leaves_);
+  CHECK(this->chars_ == that.chars_);
+  CHECK(this->left_traversals_ == that.left_traversals_);
+  CHECK(this->right_traversals_ == that.right_traversals_);
+}
+
+
+class ConsStringGenerationData {
+ public:
+  ConsStringGenerationData();
+  void Reset();
+  // Input variables.
+  double early_termination_threshold_;
+  double leftness_;
+  double rightness_;
+  double empty_leaf_threshold_;
+  unsigned max_leaves_;
+  // Cached data.
+  Handle<String> building_blocks_[NUMBER_OF_BUILDING_BLOCKS];
+  String* empty_string_;
+  RandomNumberGenerator rng_;
+  // Stats.
+  ConsStringStats stats_;
+  unsigned early_terminations_;
+ private:
+  DISALLOW_COPY_AND_ASSIGN(ConsStringGenerationData);
+};
+
+
+ConsStringGenerationData::ConsStringGenerationData() {
+  rng_.init();
+  InitializeBuildingBlocks(
+      building_blocks_, NUMBER_OF_BUILDING_BLOCKS, true, &rng_);
+  empty_string_ = Isolate::Current()->heap()->empty_string();
+  Reset();
+}
+
+
+void ConsStringGenerationData::Reset() {
+  early_termination_threshold_ = 0.01;
+  leftness_ = 0.75;
+  rightness_ = 0.75;
+  empty_leaf_threshold_ = 0.02;
+  max_leaves_ = 1000;
+  stats_.Reset();
+  early_terminations_ = 0;
+}
+
+
+void VerifyConsString(ConsString* cons_string, ConsStringStats* stats) {
+  int left_length = cons_string->first()->length();
+  int right_length = cons_string->second()->length();
+  CHECK(cons_string->length() == left_length + right_length);
+  // Check left side.
+  if (cons_string->first()->IsConsString()) {
+    stats->left_traversals_++;
+    VerifyConsString(ConsString::cast(cons_string->first()), stats);
+  } else {
+    CHECK_NE(left_length, 0);
+    stats->leaves_++;
+    stats->chars_ += left_length;
+  }
+  // Check right side.
+  if (cons_string->second()->IsConsString()) {
+    stats->right_traversals_++;
+    VerifyConsString(ConsString::cast(cons_string->second()), stats);
+  } else {
+    if (right_length == 0) stats->empty_leaves_++;
+    stats->leaves_++;
+    stats->chars_ += right_length;
+  }
+}
+
+
+void VerifyConsStringWithOperator(
+    ConsString* cons_string, ConsStringStats* stats) {
+  // Init op.
+  ConsStringIteratorOp op;
+  op.Reset();
+  // Use response for initial search and on blown stack.
+  ConsStringIteratorOp::ContinueResponse response;
+  response.string_ = cons_string;
+  response.offset_ = 0;
+  response.type_ = cons_string->map()->instance_type();
+  response.length_ = (uint32_t) cons_string->length();
+  while (true) {
+    String* string = op.Operate(ConsString::cast(response.string_),
+                                &response.offset_,
+                                &response.type_,
+                                &response.length_);
+    CHECK(string != NULL);
+    while (true) {
+      // Accumulate stats.
+      stats->leaves_++;
+      stats->chars_ += string->length();
+      // Check for completion.
+      bool keep_going_fast_check = op.HasMore();
+      bool keep_going = op.ContinueOperation(&response);
+      if (!keep_going) return;
+      // Verify no false positives for fast check.
+      CHECK(keep_going_fast_check);
+      CHECK(response.string_ != NULL);
+      // Blew stack. Restart outer loop.
+      if (response.string_->IsConsString()) break;
+      string = response.string_;
+    }
+  };
+}
+
+
+void VerifyConsString(Handle<String> root, ConsStringGenerationData* data) {
+  // Verify basic data.
+  CHECK(root->IsConsString());
+  CHECK((unsigned)root->length() == data->stats_.chars_);
+  // Recursive verify.
+  ConsStringStats stats;
+  VerifyConsString(ConsString::cast(*root), &stats);
+  stats.VerifyEqual(data->stats_);
+  // Iteratively verify.
+  stats.Reset();
+  VerifyConsStringWithOperator(ConsString::cast(*root), &stats);
+  // Don't see these. Must copy over.
+  stats.empty_leaves_ = data->stats_.empty_leaves_;
+  stats.left_traversals_ = data->stats_.left_traversals_;
+  stats.right_traversals_ = data->stats_.right_traversals_;
+  // Adjust total leaves to compensate.
+  stats.leaves_ += stats.empty_leaves_;
+  stats.VerifyEqual(data->stats_);
+}
+
+
+static Handle<String> ConstructRandomString(ConsStringGenerationData* data,
+                                            unsigned max_recursion) {
+  // Compute termination characteristics.
+  bool terminate = false;
+  bool flat = data->rng_.next(data->empty_leaf_threshold_);
+ bool terminate_early = data->rng_.next(data->early_termination_threshold_);
+  if (terminate_early) data->early_terminations_++;
+  // The obvious condition.
+  terminate |= max_recursion == 0;
+  // Flat cons string terminate by definition.
+  terminate |= flat;
+  // Cap for max leaves.
+  terminate |= data->stats_.leaves_ >= data->max_leaves_;
+  // Roll the dice.
+  terminate |= terminate_early;
+  // Compute termination characteristics for each side.
+  bool terminate_left = terminate || !data->rng_.next(data->leftness_);
+  bool terminate_right = terminate || !data->rng_.next(data->rightness_);
+  // Generate left string.
+  Handle<String> left;
+  if (terminate_left) {
+ left = data->building_blocks_[data->rng_.next(NUMBER_OF_BUILDING_BLOCKS)];
+    data->stats_.leaves_++;
+    data->stats_.chars_ += left->length();
+  } else {
+    left = ConstructRandomString(data, max_recursion - 1);
+    data->stats_.left_traversals_++;
+  }
+  // Generate right string.
+  Handle<String> right;
+  if (terminate_right) {
+ right = data->building_blocks_[data->rng_.next(NUMBER_OF_BUILDING_BLOCKS)];
+    data->stats_.leaves_++;
+    data->stats_.chars_ += right->length();
+  } else {
+    right = ConstructRandomString(data, max_recursion - 1);
+    data->stats_.right_traversals_++;
+  }
+  // Build the cons string.
+  Handle<String> root = FACTORY->NewConsString(left, right);
+  CHECK(root->IsConsString() && !root->IsFlat());
+  // Special work needed for flat string.
+  if (flat) {
+    data->stats_.empty_leaves_++;
+    FlattenString(root);
+    CHECK(root->IsConsString() && root->IsFlat());
+  }
+  return root;
+}
+
+
+static const int kCharacterStreamRandomCases = 150;
+static const int kCharacterStreamEdgeCases =
+    kCharacterStreamRandomCases + 5;
+
+
+static Handle<String> BuildConsStrings(int testCase,
+                                       ConsStringGenerationData* data) {
+  // For random constructions, need to reset the generator.
+  data->rng_.init();
+  for (int j = 0; j < testCase * 50; j++) {
+    data->rng_.next();
+  }
+  Handle<String> string;
+  switch (testCase) {
+    case 0:
+      return ConstructBalanced(data->building_blocks_);
+    case 1:
+      return ConstructLeft(data->building_blocks_, DEEP_DEPTH);
+    case 2:
+      return ConstructRight(data->building_blocks_, DEEP_DEPTH);
+    case 3:
+      return ConstructLeft(data->building_blocks_, 10);
+    case 4:
+      return ConstructRight(data->building_blocks_, 10);
+    case 5:
+      return FACTORY->NewConsString(
+          data->building_blocks_[0], data->building_blocks_[1]);
+    default:
+      if (testCase >= kCharacterStreamEdgeCases) {
+        CHECK(false);
+        return string;
+      }
+      // Random test case.
+      data->Reset();
+      string = ConstructRandomString(data, 200);
+      AssertNoAllocation no_alloc;
+      VerifyConsString(string, data);
+#ifdef DEBUG
+      printf(
+          "%s: [%d], %s: [%d], %s: [%d], %s: [%d], %s: [%d], %s: [%d]\n",
+          "leaves", data->stats_.leaves_,
+          "empty", data->stats_.empty_leaves_,
+          "chars", data->stats_.chars_,
+          "lefts", data->stats_.left_traversals_,
+          "rights", data->stats_.right_traversals_,
+          "early_terminations", data->early_terminations_);
+#endif
+      return string;
+    }
+}
+
+
+static void VerifyCharacterStream(
+    String* flat_string, String* cons_string) {
+  // Do not want to test ConString traversal on flat string.
+  CHECK(flat_string->IsFlat());
+  CHECK(!flat_string->IsConsString());
+  CHECK(cons_string->IsConsString());
+  // TODO(dcarney) Test stream reset as well.
+  int length = flat_string->length();
+  // Iterate start search in multiple places in the string.
+  int outer_iterations = length > 20 ? 20 : length;
+  for (int j = 0; j <= outer_iterations; j++) {
+    int offset = static_cast<double>(length)*j/outer_iterations;
+    if (offset < 0) offset = 0;
+    // Want to test the offset == length case.
+    if (offset > length) offset = length;
+    StringCharacterStream flat_stream(
+        flat_string, (unsigned) offset, &cons_string_iterator_op_1);
+    StringCharacterStream cons_stream(
+        cons_string, (unsigned) offset, &cons_string_iterator_op_2);
+    for (int i = offset; i < length; i++) {
+      uint16_t c = flat_string->Get(i);
+      CHECK(flat_stream.HasMore());
+      CHECK(cons_stream.HasMore());
+      CHECK_EQ(c, flat_stream.GetNext());
+      CHECK_EQ(c, cons_stream.GetNext());
+    }
+    CHECK(!flat_stream.HasMore());
+    CHECK(!cons_stream.HasMore());
+  }
+}
+
+
+TEST(StringCharacterStreamEdgeCases) {
+  printf("TestStringCharacterStreamEdgeCases\n");
+  InitializeVM();
+  Isolate* isolate = Isolate::Current();
+  HandleScope outer_scope(isolate);
+  ZoneScope zone(Isolate::Current()->runtime_zone(), DELETE_ON_EXIT);
+  ConsStringGenerationData data;
+  for (int i = 0; i < kCharacterStreamEdgeCases; i++) {
+    printf("%d\n", i);
+    isolate->heap()->CollectAllGarbage(
+        Heap::kNoGCFlags, "must not allocate in loop");
+    AlwaysAllocateScope always_allocate;
+    HandleScope inner_scope(isolate);
+    Handle<String> cons_string = BuildConsStrings(i, &data);
+    Handle<String> flat_string = BuildConsStrings(i, &data);
+    FlattenString(flat_string);
+    AssertNoAllocation no_alloc;
+    CHECK(flat_string->IsConsString() && flat_string->IsFlat());
+    VerifyCharacterStream(ConsString::cast(*flat_string)->first(),
+        *cons_string);
+  }
+}


 static const int DEEP_ASCII_DEPTH = 100000;

--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev

Reply via email to