Revision: 11104
Author:   [email protected]
Date:     Wed Mar 21 06:48:29 2012
Log: Speed up WriteUtf8 in the case where the output buffer is large enough.
Review URL: https://chromiumcodereview.appspot.com/9696032
http://code.google.com/p/v8/source/detail?r=11104

Modified:
 /branches/bleeding_edge/src/api.cc
 /branches/bleeding_edge/test/cctest/test-api.cc

=======================================
--- /branches/bleeding_edge/src/api.cc  Mon Mar 19 02:51:36 2012
+++ /branches/bleeding_edge/src/api.cc  Wed Mar 21 06:48:29 2012
@@ -3692,6 +3692,94 @@
   if (IsDeadCheck(str->GetIsolate(), "v8::String::Utf8Length()")) return 0;
   return i::Utf8Length(str);
 }
+
+
+// Will fail with a negative answer if the recursion depth is too high.
+static int RecursivelySerializeToUtf8(i::String* string,
+                                      char* buffer,
+                                      int start,
+                                      int end,
+                                      int recursion_budget,
+                                      int32_t previous_character,
+                                      int32_t* last_character) {
+  int utf8_bytes = 0;
+  while (true) {
+    if (string->IsAsciiRepresentation()) {
+      i::String::WriteToFlat(string, buffer, start, end);
+      *last_character = unibrow::Utf16::kNoPreviousCharacter;
+      return utf8_bytes + end - start;
+    }
+    switch (i::StringShape(string).representation_tag()) {
+      case i::kExternalStringTag: {
+        const uint16_t* data = i::ExternalTwoByteString::cast(string)->
+          ExternalTwoByteStringGetData(0);
+        char* current = buffer;
+        for (int i = start; i < end; i++) {
+          uint16_t character = data[i];
+          current +=
+ unibrow::Utf8::Encode(current, character, previous_character);
+          previous_character = character;
+        }
+        *last_character = previous_character;
+        return utf8_bytes + current - buffer;
+      }
+      case i::kSeqStringTag: {
+        const uint16_t* data =
+            i::SeqTwoByteString::cast(string)->SeqTwoByteStringGetData(0);
+        char* current = buffer;
+        for (int i = start; i < end; i++) {
+          uint16_t character = data[i];
+          current +=
+ unibrow::Utf8::Encode(current, character, previous_character);
+          previous_character = character;
+        }
+        *last_character = previous_character;
+        return utf8_bytes + current - buffer;
+      }
+      case i::kSlicedStringTag: {
+        i::SlicedString* slice = i::SlicedString::cast(string);
+        unsigned offset = slice->offset();
+        string = slice->parent();
+        start += offset;
+        end += offset;
+        continue;
+      }
+      case i::kConsStringTag: {
+        i::ConsString* cons_string = i::ConsString::cast(string);
+        i::String* first = cons_string->first();
+        int boundary = first->length();
+        if (start >= boundary) {
+          // Only need RHS.
+          string = cons_string->second();
+          start -= boundary;
+          end -= boundary;
+          continue;
+        } else if (end <= boundary) {
+          // Only need LHS.
+          string = first;
+        } else {
+          if (recursion_budget == 0) return -1;
+          int extra_utf8_bytes =
+              RecursivelySerializeToUtf8(first,
+                                         buffer,
+                                         start,
+                                         boundary,
+                                         recursion_budget - 1,
+                                         previous_character,
+                                         &previous_character);
+          if (extra_utf8_bytes < 0) return extra_utf8_bytes;
+          buffer += extra_utf8_bytes;
+          utf8_bytes += extra_utf8_bytes;
+          string = cons_string->second();
+          start = 0;
+          end -= boundary;
+        }
+      }
+    }
+  }
+  UNREACHABLE();
+  return 0;
+}


 bool String::MayContainNonAscii() const {
@@ -3712,11 +3800,12 @@
   LOG_API(isolate, "String::WriteUtf8");
   ENTER_V8(isolate);
   i::Handle<i::String> str = Utils::OpenHandle(this);
+  int string_length = str->length();
   if (str->IsAsciiRepresentation()) {
     int len;
     if (capacity == -1) {
       capacity = str->length() + 1;
-      len = str->length();
+      len = string_length;
     } else {
       len = i::Min(capacity, str->length());
     }
@@ -3729,6 +3818,42 @@
     return len;
   }

+  if (capacity == -1 || capacity >= string_length * 3) {
+    int32_t previous = unibrow::Utf16::kNoPreviousCharacter;
+    const int kMaxRecursion = 100;
+    int utf8_bytes =
+        RecursivelySerializeToUtf8(*str,
+                                   buffer,
+                                   0,
+                                   string_length,
+                                   kMaxRecursion,
+                                   previous,
+                                   &previous);
+    if (utf8_bytes >= 0) {
+      // Success serializing with recursion.
+      if ((options & NO_NULL_TERMINATION) == 0 &&
+          (capacity > utf8_bytes || capacity == -1)) {
+        buffer[utf8_bytes++] = '\0';
+      }
+      if (nchars_ref != NULL) *nchars_ref = string_length;
+      return utf8_bytes;
+    }
+    FlattenString(str);
+ // Recurse once. This time around the string is flat and the serializing
+    // with recursion will certainly succeed.
+    return WriteUtf8(buffer, capacity, nchars_ref, options);
+  } else if (capacity >= string_length) {
+    // First check that the buffer is large enough.  If it is, then recurse
+ // once without a capacity limit, which will get into the other branch of
+    // this 'if'.
+    int utf8_bytes = i::Utf8Length(str);
+    if ((options & NO_NULL_TERMINATION) == 0) utf8_bytes++;
+    if (utf8_bytes <= capacity) {
+      return WriteUtf8(buffer, -1, nchars_ref, options);
+    }
+  }
+
+  // Slow case.
i::StringInputBuffer& write_input_buffer = *isolate->write_input_buffer();
   isolate->string_tracker()->RecordWrite(str);
   if (options & HINT_MANY_WRITES_EXPECTED) {
=======================================
--- /branches/bleeding_edge/test/cctest/test-api.cc     Mon Mar 19 02:51:36 2012
+++ /branches/bleeding_edge/test/cctest/test-api.cc     Wed Mar 21 06:48:29 2012
@@ -5870,6 +5870,7 @@
       "p.push(String.fromCharCode(0xdc00));"
       "var a = [];"
       "var b = [];"
+      "var c = [];"
       "var alens = [];"
       "for (var i = 0; i < 3; i++) {"
       "  p[1] = String.fromCharCode(lead++);"
@@ -5877,17 +5878,21 @@
       "    p[2] = String.fromCharCode(trail++);"
       "    a.push(p[i] + p[j]);"
       "    b.push(p[i] + p[j]);"
+      "    c.push(p[i] + p[j]);"
       "    alens.push(plens[i] + plens[j]);"
       "  }"
       "}"
       "alens[5] -= 2;"  // Here the surrogate pairs match up.
       "var a2 = [];"
       "var b2 = [];"
+      "var c2 = [];"
       "var a2lens = [];"
       "for (var m = 0; m < 9; m++) {"
       "  for (var n = 0; n < 9; n++) {"
       "    a2.push(a[m] + a[n]);"
       "    b2.push(b[m] + b[n]);"
+      "    var newc = 'x' + c[m] + c[n] + 'y';"
+      "    c2.push(newc.substring(1, newc.length - 1));"
       "    var utf = alens[m] + alens[n];"  // And here.
            // The 'n's that start with 0xdc.. are 6-8
            // The 'm's that end with 0xd8.. are 1, 4 and 7
@@ -5899,6 +5904,7 @@
   Utf16Helper(context, "a2", "a2lens", 81);
   WriteUtf8Helper(context, "b", "alens", 9);
   WriteUtf8Helper(context, "b2", "a2lens", 81);
+  WriteUtf8Helper(context, "c2", "a2lens", 81);
 }


--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev

Reply via email to