Reviewers: Vitaly Repeshko, Lasse Reichstein,

Message:
The problem with slice having a different encoding that the backing string (due to externalizing) is not really a problem anymore since that - as far as I could
see - only affected  ToAsciiVector and ToUC16Vector that have been replaced
recently by GetFlatContent.

Description:
Enable slices of external strings (in the tentative implementation).
TEST=cctest test-strings/SliceFromExternal, mjsunit/string-slices.js


Please review this at http://codereview.chromium.org/7832002/

SVN Base: https://v8.googlecode.com/svn/branches/bleeding_edge

Affected files:
  M src/heap.cc
  M src/runtime.cc
  M test/cctest/test-strings.cc
  M test/mjsunit/string-slices.js


Index: src/heap.cc
diff --git a/src/heap.cc b/src/heap.cc
index 84909d52a5970164098dd4ac6be3f659501a6ffb..fdb010edb2b71df05664ca521ccd3547e43909ce 100644
--- a/src/heap.cc
+++ b/src/heap.cc
@@ -2661,25 +2661,24 @@ MaybeObject* Heap::AllocateSubString(String* buffer,
   // Make an attempt to flatten the buffer to reduce access time.
   buffer = buffer->TryFlattenGetString();

- // TODO(1626): For now slicing external strings is not supported. However, - // a flat cons string can have an external string as first part in some cases.
-  // Therefore we have to single out this case as well.
   if (!FLAG_string_slices ||
-      (buffer->IsConsString() &&
-        (!buffer->IsFlat() ||
-         !ConsString::cast(buffer)->first()->IsSeqString())) ||
-      buffer->IsExternalString() ||
+      !buffer->IsFlat() ||
       length < SlicedString::kMinLength ||
       pretenure == TENURED) {
     Object* result;
-    { MaybeObject* maybe_result = buffer->IsAsciiRepresentation()
-                     ? AllocateRawAsciiString(length, pretenure)
-                     : AllocateRawTwoByteString(length, pretenure);
+ // The string encoding of the string might be ascii, while the underlying
+    // string is actually a twobyte.  The content must be ascii though,
+    // therefore this is a good opportunity to go back to ascii encoding.
+    // WriteToFlat takes care of twobyte to ascii conversion.
+    bool is_ascii = buffer->IsAsciiRepresentation();
+    { MaybeObject* maybe_result = is_ascii
+ ? AllocateRawAsciiString(length, pretenure) + : AllocateRawTwoByteString(length, pretenure);
       if (!maybe_result->ToObject(&result)) return maybe_result;
     }
     String* string_result = String::cast(result);
     // Copy the characters into the new object.
-    if (buffer->IsAsciiRepresentation()) {
+    if (is_ascii) {
       ASSERT(string_result->IsAsciiRepresentation());
       char* dest = SeqAsciiString::cast(string_result)->GetChars();
       String::WriteToFlat(buffer, dest, start, end);
@@ -2692,12 +2691,13 @@ MaybeObject* Heap::AllocateSubString(String* buffer,
   }

   ASSERT(buffer->IsFlat());
-  ASSERT(!buffer->IsExternalString());
 #if DEBUG
   buffer->StringVerify();
 #endif

   Object* result;
+ // The string encoding tag in the newly created slice does not really matter. + // Therefore we do not further check the encoding of the underlying string.
   { Map* map = buffer->IsAsciiRepresentation()
                  ? sliced_ascii_string_map()
                  : sliced_string_map();
@@ -2723,7 +2723,8 @@ MaybeObject* Heap::AllocateSubString(String* buffer,
     sliced_string->set_parent(buffer);
     sliced_string->set_offset(start);
   }
-  ASSERT(sliced_string->parent()->IsSeqString());
+  ASSERT(sliced_string->parent()->IsSeqString() ||
+         sliced_string->parent()->IsExternalString());
   return result;
 }

Index: src/runtime.cc
diff --git a/src/runtime.cc b/src/runtime.cc
index d1fe1c7c1675377ead4e8d7bdc8d7717630abe07..4ddee7c38e5390a9e3970b45fbc996606b5816e7 100644
--- a/src/runtime.cc
+++ b/src/runtime.cc
@@ -6029,7 +6029,7 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_StringSplit) {
       }
     } else {
       Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
-      if (pattern->IsAsciiRepresentation()) {
+      if (pattern_content.IsAscii()) {
         FindStringIndices(isolate,
                           subject_vector,
                           pattern_content.ToAsciiVector(),
Index: test/cctest/test-strings.cc
diff --git a/test/cctest/test-strings.cc b/test/cctest/test-strings.cc
index 55c21417d0edeb5df29090f3280ac9580cc003bd..bc69d63d2f06d7ad7795e2cee73e0c24f6e71cbd 100644
--- a/test/cctest/test-strings.cc
+++ b/test/cctest/test-strings.cc
@@ -502,6 +502,36 @@ TEST(SliceFromCons) {
 }


+class AsciiVectorResource : public v8::String::ExternalAsciiStringResource {
+ public:
+  explicit AsciiVectorResource(i::Vector<const char> vector)
+      : data_(vector) {}
+  virtual ~AsciiVectorResource() {}
+  virtual size_t length() const { return data_.length(); }
+  virtual const char* data() const { return data_.start(); }
+ private:
+  i::Vector<const char> data_;
+};
+
+
+TEST(SliceFromExternal) {
+  FLAG_string_slices = true;
+  InitializeVM();
+  v8::HandleScope scope;
+  AsciiVectorResource resource(
+      i::Vector<const char>("abcdefghijklmnopqrstuvwxyz", 26));
+  Handle<String> string = FACTORY->NewExternalStringFromAscii(&resource);
+  CHECK(string->IsExternalString());
+  Handle<String> slice = FACTORY->NewSubString(string, 1, 25);
+  // After slicing, the original string becomes a flat cons.
+  CHECK(slice->IsSlicedString());
+  CHECK(string->IsExternalString());
+  CHECK_EQ(SlicedString::cast(*slice)->parent(), *string);
+  CHECK(SlicedString::cast(*slice)->parent()->IsExternalString());
+  CHECK(slice->IsFlat());
+}
+
+
 TEST(TrivialSlice) {
   // This tests whether a slice that contains the entire parent string
   // actually creates a new string (it should not).
Index: test/mjsunit/string-slices.js
diff --git a/test/mjsunit/string-slices.js b/test/mjsunit/string-slices.js
index 8cc1f81e774fe6b8449555b90a3d5bba868f4db2..40b9a20d1cdb132390d86863e25ad22e0c3d1842 100755
--- a/test/mjsunit/string-slices.js
+++ b/test/mjsunit/string-slices.js
@@ -189,11 +189,16 @@ assertEquals("\u03B4\u03B5\u03B6\u03B7\u03B8\u03B9abcdefghijklmnop",
 assertEquals("\u03B2\u03B3\u03B4\u03B5\u03B4\u03B5\u03B6\u03B7",
     utf.substring(5,1) + utf.substring(3,7));

-/*
 // Externalizing strings.
-var a = "123456789qwertyuiopasdfghjklzxcvbnm";
-var b = a.slice(1,-1);
+var a = "123456789" + "qwertyuiopasdfghjklzxcvbnm";
+var b = "23456789qwertyuiopasdfghjklzxcvbn"
 assertEquals(a.slice(1,-1), b);
-externalizeString(a);
+
+assertTrue(isAsciiString(a));
+externalizeString(a, true);
+assertFalse(isAsciiString(a));
+
 assertEquals(a.slice(1,-1), b);
-*/
+assertTrue(/3456789qwe/.test(a));
+assertEquals(5, a.indexOf("678"));
+assertEquals("12345", a.split("6")[0]);


--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev

Reply via email to