Reviewers: piscisaureus,

Message:
On 2012/02/29 18:04:46, piscisaureus wrote:
It seems to break String::Utf8Value so I was unable to test it.

All tests run for me. Do you have a failing test and configuration to share?

Description:
Faster WriteUtf8 for medium size strings?

Please review this at https://chromiumcodereview.appspot.com/9536018/

SVN Base: http://v8.googlecode.com/svn/branches/bleeding_edge/

Affected files:
  M     src/api.cc
  M     src/objects.h
  M     src/objects.cc


Index: src/api.cc
===================================================================
--- src/api.cc  (revision 10873)
+++ src/api.cc  (working copy)
@@ -3702,11 +3702,12 @@
   LOG_API(isolate, "String::WriteUtf8");
   ENTER_V8(isolate);
   i::Handle<i::String> str = Utils::OpenHandle(this);
+  int string_length = str->length();
   if (str->IsAsciiRepresentation()) {
     int len;
     if (capacity == -1) {
       capacity = str->length() + 1;
-      len = str->length();
+      len = string_length;
     } else {
       len = i::Min(capacity, str->length());
     }
@@ -3719,6 +3720,19 @@
     return len;
   }

+  if (capacity == -1 || capacity >= string_length * 3) {
+    if (string_length < 100) {
+      int utf8_bytes =
+          str->RecursivelySerializeToUtf8(buffer, 0, string_length);
+      if ((options & NO_NULL_TERMINATION) == 0 &&
+          (capacity > utf8_bytes || capacity == -1)) {
+        buffer[utf8_bytes++] = '\0';
+      }
+      if (nchars_ref != NULL) *nchars_ref = string_length;
+      return utf8_bytes;
+    }
+  }
+
i::StringInputBuffer& write_input_buffer = *isolate->write_input_buffer();
   isolate->string_tracker()->RecordWrite(str);
   if (options & HINT_MANY_WRITES_EXPECTED) {
Index: src/objects.cc
===================================================================
--- src/objects.cc      (revision 10873)
+++ src/objects.cc      (working copy)
@@ -6085,6 +6085,66 @@
 }


+int String::RecursivelySerializeToUtf8(char* buffer, int start, int end) {
+  if (IsAsciiRepresentation()) {
+    WriteToFlat(this, buffer, start, end);
+    return end - start;
+  }
+  switch (StringShape(this).representation_tag()) {
+    case kExternalStringTag: {
+      const uc16* data =
+          ExternalTwoByteString::cast(this)->GetChars();
+      char* current = buffer;
+      for (int i = start; i < end; i++) {
+        uc16 character = data[i];
+        current +=
+            unibrow::Utf8::Encode(current, character);
+      }
+      return current - buffer;
+    }
+    case kSeqStringTag: {
+      const uc16* data =
+          SeqTwoByteString::cast(this)->GetChars();
+      char* current = buffer;
+      for (int i = start; i < end; i++) {
+        uc16 character = data[i];
+        current +=
+            unibrow::Utf8::Encode(current, character);
+      }
+      return current - buffer;
+    }
+    case kConsStringTag: {
+      ConsString* cons_string = ConsString::cast(this);
+      String* first = cons_string->first();
+      int boundary = first->length();
+      if (start >= boundary) {
+        // Only need RHS.
+        return cons_string->second()->RecursivelySerializeToUtf8(
+            buffer, start - boundary, end - boundary);
+      } else if (end <= boundary) {
+        // Only need LHS.
+        return first->RecursivelySerializeToUtf8(
+            buffer, start - boundary, end - boundary);
+      } else {
+        int utf8_bytes = first->RecursivelySerializeToUtf8(
+            buffer, start, boundary);
+        return utf8_bytes +
+            cons_string->second()->RecursivelySerializeToUtf8(
+            buffer + utf8_bytes, 0, end - boundary);
+      }
+    }
+    case kSlicedStringTag: {
+      SlicedString* slice = SlicedString::cast(this);
+      unsigned offset = slice->offset();
+      return slice->parent()->RecursivelySerializeToUtf8(
+          buffer, start + offset, end + offset);
+    }
+  }
+  UNREACHABLE();
+  return 0;
+}
+
+
 SmartArrayPointer<char> String::ToCString(AllowNullsFlag allow_nulls,
                                           RobustnessFlag robust_flag,
                                           int offset,
Index: src/objects.h
===================================================================
--- src/objects.h       (revision 10873)
+++ src/objects.h       (working copy)
@@ -6746,6 +6746,7 @@

   inline int Utf8Length() { return Utf8Length(this, 0, length()); }
   static int Utf8Length(String* input, int from, int to);
+  int RecursivelySerializeToUtf8(char* buffer, int start, int end);

   // Return a 16 bit Unicode representation of the string.
   // The string should be nearly flat, otherwise the performance of


--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev

Reply via email to