Revision: 7716
Author:   [email protected]
Date:     Fri Apr 29 01:53:36 2011
Log: Handle join of sparse arrays with non-empty separator more efficiently.

BUG=v8:1028

Review URL: http://codereview.chromium.org/6902144
http://code.google.com/p/v8/source/detail?r=7716

Modified:
 /branches/bleeding_edge/src/array.js
 /branches/bleeding_edge/src/runtime.cc
 /branches/bleeding_edge/src/runtime.h
 /branches/bleeding_edge/test/mjsunit/array-join.js

=======================================
--- /branches/bleeding_edge/src/array.js        Thu Mar  3 04:56:14 2011
+++ /branches/bleeding_edge/src/array.js        Fri Apr 29 01:53:36 2011
@@ -65,6 +65,25 @@
   keys.sort(function(a, b) { return a - b; });
   return keys;
 }
+
+
+function SparseJoinWithSeparator(array, len, convert, separator) {
+  var keys = GetSortedArrayKeys(array, %GetArrayKeys(array, len));
+  var totalLength = 0;
+  var elements = new InternalArray(keys.length * 2);
+  var previousKey = -1;
+  for (var i = 0; i < keys.length; i++) {
+    var key = keys[i];
+    if (key != previousKey) {  // keys may contain duplicates.
+      var e = array[key];
+      if (!IS_STRING(e)) e = convert(e);
+      elements[i * 2] = key;
+      elements[i * 2 + 1] = e;
+      previousKey = key;
+    }
+  }
+  return %SparseJoinWithSeparator(elements, len, separator);
+}


 // Optimized for sparse arrays if separator is ''.
@@ -110,8 +129,12 @@

   // Attempt to convert the elements.
   try {
- if (UseSparseVariant(array, length, is_array) && (separator.length == 0)) {
-      return SparseJoin(array, length, convert);
+    if (UseSparseVariant(array, length, is_array)) {
+      if (separator.length == 0) {
+        return SparseJoin(array, length, convert);
+      } else {
+        return SparseJoinWithSeparator(array, length, convert, separator);
+      }
     }

     // Fast case for one-element arrays.
@@ -129,10 +152,8 @@
       var elements_length = 0;
       for (var i = 0; i < length; i++) {
         var e = array[i];
-        if (!IS_UNDEFINED(e)) {
-          if (!IS_STRING(e)) e = convert(e);
-          elements[elements_length++] = e;
-        }
+        if (!IS_STRING(e)) e = convert(e);
+        elements[elements_length++] = e;
       }
       elements.length = elements_length;
       var result = %_FastAsciiArrayJoin(elements, '');
@@ -151,11 +172,12 @@
     } else {
       for (var i = 0; i < length; i++) {
         var e = array[i];
-        if (IS_NUMBER(e)) elements[i] = %_NumberToString(e);
-        else {
-          if (!IS_STRING(e)) e = convert(e);
+          if (IS_NUMBER(e)) {
+            e = %_NumberToString(e);
+          } else if (!IS_STRING(e)) {
+            e = convert(e);
+          }
           elements[i] = e;
-        }
       }
     }
     var result = %_FastAsciiArrayJoin(elements, separator);
=======================================
--- /branches/bleeding_edge/src/runtime.cc      Wed Apr 27 22:04:48 2011
+++ /branches/bleeding_edge/src/runtime.cc      Fri Apr 29 01:53:36 2011
@@ -6158,6 +6158,135 @@
ASSERT(!answer->HasOnlyAsciiChars()); // Use %_FastAsciiArrayJoin instead.
   return answer;
 }
+
+template <typename Char>
+static void JoinSparseArrayWithSeparator(FixedArray* elements,
+                                         int elements_length,
+                                         uint32_t array_length,
+                                         String* separator,
+                                         Vector<Char> buffer) {
+  int previous_separator_position = 0;
+  int separator_length = separator->length();
+  int cursor = 0;
+  for (int i = 0; i < elements_length; i += 2) {
+    int position = NumberToInt32(elements->get(i));
+    String* string = String::cast(elements->get(i + 1));
+    int string_length = string->length();
+    if (string->length() > 0) {
+      while (previous_separator_position < position) {
+        String::WriteToFlat<Char>(separator, &buffer[cursor],
+                                  0, separator_length);
+        cursor += separator_length;
+        previous_separator_position++;
+      }
+      String::WriteToFlat<Char>(string, &buffer[cursor],
+                                0, string_length);
+      cursor += string->length();
+    }
+  }
+  if (separator_length > 0) {
+    // Array length must be representable as a signed 32-bit number,
+    // otherwise the total string length would have been too large.
+    ASSERT(array_length <= 0x7fffffff);  // Is int32_t.
+    int last_array_index = static_cast<int>(array_length - 1);
+    while (previous_separator_position < last_array_index) {
+      String::WriteToFlat<Char>(separator, &buffer[cursor],
+                                0, separator_length);
+      cursor += separator_length;
+      previous_separator_position++;
+    }
+  }
+  ASSERT(cursor <= buffer.length());
+}
+
+
+RUNTIME_FUNCTION(MaybeObject*, Runtime_SparseJoinWithSeparator) {
+  NoHandleAllocation ha;
+  ASSERT(args.length() == 3);
+  CONVERT_CHECKED(JSArray, elements_array, args[0]);
+  RUNTIME_ASSERT(elements_array->HasFastElements());
+  CONVERT_NUMBER_CHECKED(uint32_t, array_length, Uint32, args[1]);
+  CONVERT_CHECKED(String, separator, args[2]);
+  // elements_array is fast-mode JSarray of alternating positions
+  // (increasing order) and strings.
+  // array_length is length of original array (used to add separators);
+  // separator is string to put between elements. Assumed to be non-empty.
+
+  // Find total length of join result.
+  int string_length = 0;
+  bool is_ascii = true;
+  int max_string_length = SeqAsciiString::kMaxLength;
+  bool overflow = false;
+  CONVERT_NUMBER_CHECKED(int, elements_length,
+                         Int32, elements_array->length());
+  RUNTIME_ASSERT((elements_length & 1) == 0);  // Even length.
+  FixedArray* elements = FixedArray::cast(elements_array->elements());
+  for (int i = 0; i < elements_length; i += 2) {
+    RUNTIME_ASSERT(elements->get(i)->IsNumber());
+    CONVERT_CHECKED(String, string, elements->get(i + 1));
+    int length = string->length();
+    if (is_ascii && !string->IsAsciiRepresentation()) {
+      is_ascii = false;
+      max_string_length = SeqTwoByteString::kMaxLength;
+    }
+    if (length > max_string_length ||
+        max_string_length - length < string_length) {
+      overflow = true;
+      break;
+    }
+    string_length += length;
+  }
+  int separator_length = separator->length();
+  if (!overflow && separator_length > 0) {
+    if (array_length <= 0x7fffffffu) {
+      int separator_count = static_cast<int>(array_length) - 1;
+      int remaining_length = max_string_length - string_length;
+      if ((remaining_length / separator_length) >= separator_count) {
+        string_length += separator_length * (array_length - 1);
+      } else {
+        // Not room for the separators within the maximal string length.
+        overflow = true;
+      }
+    } else {
+      // Nonempty separator and at least 2^31-1 separators necessary
+      // means that the string is too large to create.
+      STATIC_ASSERT(String::kMaxLength < 0x7fffffff);
+      overflow = true;
+    }
+  }
+  if (overflow) {
+    // Throw OutOfMemory exception for creating too large a string.
+    V8::FatalProcessOutOfMemory("Array join result too large.");
+  }
+
+  if (is_ascii) {
+    MaybeObject* result_allocation =
+        isolate->heap()->AllocateRawAsciiString(string_length);
+    if (result_allocation->IsFailure()) return result_allocation;
+    SeqAsciiString* result_string =
+        SeqAsciiString::cast(result_allocation->ToObjectUnchecked());
+    JoinSparseArrayWithSeparator<char>(elements,
+                                       elements_length,
+                                       array_length,
+                                       separator,
+ Vector<char>(result_string->GetChars(),
+                                                    string_length));
+    return result_string;
+  } else {
+    MaybeObject* result_allocation =
+        isolate->heap()->AllocateRawTwoByteString(string_length);
+    if (result_allocation->IsFailure()) return result_allocation;
+    SeqTwoByteString* result_string =
+        SeqTwoByteString::cast(result_allocation->ToObjectUnchecked());
+    JoinSparseArrayWithSeparator<uc16>(elements,
+                                       elements_length,
+                                       array_length,
+                                       separator,
+ Vector<uc16>(result_string->GetChars(),
+                                                    string_length));
+    return result_string;
+  }
+}


 RUNTIME_FUNCTION(MaybeObject*, Runtime_NumberOr) {
=======================================
--- /branches/bleeding_edge/src/runtime.h       Mon Apr 11 06:24:50 2011
+++ /branches/bleeding_edge/src/runtime.h       Fri Apr 29 01:53:36 2011
@@ -132,6 +132,7 @@
   F(StringAdd, 2, 1) \
   F(StringBuilderConcat, 3, 1) \
   F(StringBuilderJoin, 3, 1) \
+  F(SparseJoinWithSeparator, 3, 1)            \
   \
   /* Bit operations */ \
   F(NumberOr, 2, 1) \
=======================================
--- /branches/bleeding_edge/test/mjsunit/array-join.js Tue Mar 1 06:09:23 2011 +++ /branches/bleeding_edge/test/mjsunit/array-join.js Fri Apr 29 01:53:36 2011
@@ -44,7 +44,8 @@
assertEquals('1,2**********3**********4**********5,6**********', a.join('**********'));

 // Replace array.prototype.toString.
-Array.prototype.toString = function() { return "array"; }
+var oldToString = Array.prototype.toString;
+Array.prototype.toString = function() { return "array"; };
 assertEquals('array34arrayarray', a.join(''));
 assertEquals('array*3*4*array*array', a.join('*'));
 assertEquals('array**3**4**array**array', a.join('**'));
@@ -52,7 +53,7 @@
assertEquals('array********3********4********array********array', a.join('********')); assertEquals('array**********3**********4**********array**********array', a.join('**********'));

-Array.prototype.toString = function() { throw 42; }
+Array.prototype.toString = function() { throw 42; };
 assertThrows("a.join('')");
 assertThrows("a.join('*')");
 assertThrows("a.join('**')");
@@ -60,7 +61,7 @@
 assertThrows("a.join('********')");
 assertThrows("a.join('**********')");

-Array.prototype.toString = function() { return "array"; }
+Array.prototype.toString = function() { return "array"; };
 assertEquals('array34arrayarray', a.join(''));
 assertEquals('array*3*4*array*array', a.join('*'));
 assertEquals('array**3**4**array**array', a.join('**'));
@@ -68,3 +69,25 @@
assertEquals('array********3********4********array********array', a.join('********')); assertEquals('array**********3**********4**********array**********array', a.join('**********'));

+// Restore original toString.
+delete Array.prototype.toString;
+if (Array.prototype.toString != oldToString) {
+  Array.prototype.toString = oldToString;
+}
+
+var a = new Array(123123123);
+assertEquals(123123122, String(a).length);
+assertEquals(123123122, a.join(",").length);
+assertEquals(246246244, a.join("oo").length);
+
+a = new Array(Math.pow(2,32) - 1);  // Max length.
+assertEquals("", a.join(""));
+a[123123123] = "o";
+a[1255215215] = "p";
+assertEquals("op", a.join(""));
+
+a = new Array(100001);
+for (var i = 0; i < a.length; i++) a[i] = undefined;
+a[5] = "ab";
+a[90000] = "cd";
+assertEquals("abcd", a.join(""));  // Must not throw.

--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev

Reply via email to