Revision: 4894
Author: [email protected]
Date: Thu Jun 17 09:19:28 2010
Log: Track ascii-ness of data in externalized strings.
If a two-byte string only contains ascii characters, then we can save
memory when flattening a cons string containing it. Similarly we can
use this in Array.prototype.join implementation. To track this a new
bit is added to instance type. This bit is used as a hint in generated
code and in runtime functions.
To enable testing a new V8 extension is added controlled by
--expose-externalize-string flag.
Review URL: http://codereview.chromium.org/2762008
http://code.google.com/p/v8/source/detail?r=4894
Added:
/branches/bleeding_edge/test/mjsunit/string-externalize.js
Modified:
/branches/bleeding_edge/src/arm/codegen-arm.cc
/branches/bleeding_edge/src/bootstrapper.cc
/branches/bleeding_edge/src/execution.cc
/branches/bleeding_edge/src/execution.h
/branches/bleeding_edge/src/flag-definitions.h
/branches/bleeding_edge/src/heap.cc
/branches/bleeding_edge/src/heap.h
/branches/bleeding_edge/src/ia32/codegen-ia32.cc
/branches/bleeding_edge/src/objects-debug.cc
/branches/bleeding_edge/src/objects-inl.h
/branches/bleeding_edge/src/objects.cc
/branches/bleeding_edge/src/objects.h
/branches/bleeding_edge/src/runtime.cc
/branches/bleeding_edge/src/x64/codegen-x64.cc
=======================================
--- /dev/null
+++ /branches/bleeding_edge/test/mjsunit/string-externalize.js Thu Jun 17
09:19:28 2010
@@ -0,0 +1,95 @@
+// Copyright 2010 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Flags: --expose-externalize-string
+
+var size = 1024;
+
+function test() {
+ var str = "";
+
+ // Build an ascii cons string.
+ for (var i = 0; i < size; i++) {
+ str += String.fromCharCode(i & 0x7f);
+ }
+ assertTrue(isAsciiString(str));
+
+ var twoByteExternalWithAsciiData =
+ "AA" + (function() { return "A"; })();
+ externalizeString(twoByteExternalWithAsciiData, true /* force two-byte
*/);
+ assertFalse(isAsciiString(twoByteExternalWithAsciiData));
+
+ var realTwoByteExternalString =
+ "\u1234\u1234" + (function() { return "\u1234"; })();
+ externalizeString(realTwoByteExternalString);
+ assertFalse(isAsciiString(realTwoByteExternalString));
+
+ assertTrue(isAsciiString(["a", twoByteExternalWithAsciiData].join("")));
+
+ // Appending a two-byte string that contains only ascii chars should
+ // still produce an ascii cons.
+ var str1 = str + twoByteExternalWithAsciiData;
+ assertTrue(isAsciiString(str1));
+
+ // Force flattening of the string.
+ var old_length = str1.length - twoByteExternalWithAsciiData.length;
+ for (var i = 0; i < old_length; i++) {
+ assertEquals(String.fromCharCode(i & 0x7f), str1[i]);
+ }
+ for (var i = old_length; i < str1.length; i++) {
+ assertEquals("A", str1[i]);
+ }
+
+ // Flattened string should still be ascii.
+ assertTrue(isAsciiString(str1));
+
+ // Lower-casing an ascii string should produce ascii.
+ assertTrue(isAsciiString(str1.toLowerCase()));
+
+ assertFalse(isAsciiString(["a", realTwoByteExternalString].join("")));
+
+ // Appending a real two-byte string should produce a two-byte cons.
+ var str2 = str + realTwoByteExternalString;
+ assertFalse(isAsciiString(str2));
+
+ // Force flattening of the string.
+ old_length = str2.length - realTwoByteExternalString.length;
+ for (var i = 0; i < old_length; i++) {
+ assertEquals(String.fromCharCode(i & 0x7f), str2[i]);
+ }
+ for (var i = old_length; i < str.length; i++) {
+ assertEquals("\u1234", str2[i]);
+ }
+
+ // Flattened string should still be two-byte.
+ assertFalse(isAsciiString(str2));
+}
+
+// Run the test many times to ensure IC-s don't break things.
+for (var i = 0; i < 10; i++) {
+ test();
+}
=======================================
--- /branches/bleeding_edge/src/arm/codegen-arm.cc Thu Jun 17 03:45:37 2010
+++ /branches/bleeding_edge/src/arm/codegen-arm.cc Thu Jun 17 09:19:28 2010
@@ -10559,13 +10559,14 @@
__ ldrb(r4, FieldMemOperand(r4, Map::kInstanceTypeOffset));
__ ldrb(r5, FieldMemOperand(r5, Map::kInstanceTypeOffset));
}
- Label non_ascii, allocated;
+ Label non_ascii, allocated, ascii_data;
ASSERT_EQ(0, kTwoByteStringTag);
__ tst(r4, Operand(kStringEncodingMask));
__ tst(r5, Operand(kStringEncodingMask), ne);
__ b(eq, &non_ascii);
// Allocate an ASCII cons string.
+ __ bind(&ascii_data);
__ AllocateAsciiConsString(r7, r6, r4, r5, &string_add_runtime);
__ bind(&allocated);
// Fill the fields of the cons string.
@@ -10577,6 +10578,19 @@
__ Ret();
__ bind(&non_ascii);
+ // At least one of the strings is two-byte. Check whether it happens
+ // to contain only ascii characters.
+ // r4: first instance type.
+ // r5: second instance type.
+ __ tst(r4, Operand(kAsciiDataHintMask));
+ __ tst(r5, Operand(kAsciiDataHintMask), ne);
+ __ b(ne, &ascii_data);
+ __ eor(r4, r4, Operand(r5));
+ ASSERT(kAsciiStringTag != 0 && kAsciiDataHintTag != 0);
+ __ and_(r4, r4, Operand(kAsciiStringTag | kAsciiDataHintTag));
+ __ cmp(r4, Operand(kAsciiStringTag | kAsciiDataHintTag));
+ __ b(eq, &ascii_data);
+
// Allocate a two byte cons string.
__ AllocateTwoByteConsString(r7, r6, r4, r5, &string_add_runtime);
__ jmp(&allocated);
=======================================
--- /branches/bleeding_edge/src/bootstrapper.cc Wed May 12 05:44:00 2010
+++ /branches/bleeding_edge/src/bootstrapper.cc Thu Jun 17 09:19:28 2010
@@ -1462,6 +1462,7 @@
}
if (FLAG_expose_gc) InstallExtension("v8/gc");
+ if (FLAG_expose_externalize_string) InstallExtension("v8/externalize");
if (extensions == NULL) return true;
// Install required extensions
=======================================
--- /branches/bleeding_edge/src/execution.cc Mon Apr 19 05:39:07 2010
+++ /branches/bleeding_edge/src/execution.cc Thu Jun 17 09:19:28 2010
@@ -679,7 +679,7 @@
// --- G C E x t e n s i o n ---
-const char* GCExtension::kSource = "native function gc();";
+const char* const GCExtension::kSource = "native function gc();";
v8::Handle<v8::FunctionTemplate> GCExtension::GetNativeFunction(
@@ -695,7 +695,115 @@
}
-static GCExtension kGCExtension;
-v8::DeclareExtension kGCExtensionDeclaration(&kGCExtension);
+static GCExtension gc_extension;
+static v8::DeclareExtension gc_extension_declaration(&gc_extension);
+
+
+// --- E x t e r n a l i z e S t r i n g E x t e n s i o n ---
+
+
+template <typename Char, typename Base>
+class SimpleStringResource : public Base {
+ public:
+ // Takes ownership of |data|.
+ SimpleStringResource(Char* data, size_t length)
+ : data_(data),
+ length_(length) {}
+
+ virtual ~SimpleStringResource() { delete data_; }
+
+ virtual const Char* data() const { return data_; }
+
+ virtual size_t length() const { return length_; }
+
+ private:
+ Char* const data_;
+ const size_t length_;
+};
+
+
+typedef SimpleStringResource<char, v8::String::ExternalAsciiStringResource>
+ SimpleAsciiStringResource;
+typedef SimpleStringResource<uc16, v8::String::ExternalStringResource>
+ SimpleTwoByteStringResource;
+
+
+const char* const ExternalizeStringExtension::kSource =
+ "native function externalizeString();"
+ "native function isAsciiString();";
+
+
+v8::Handle<v8::FunctionTemplate>
ExternalizeStringExtension::GetNativeFunction(
+ v8::Handle<v8::String> str) {
+ if (strcmp(*v8::String::AsciiValue(str), "externalizeString") == 0) {
+ return
v8::FunctionTemplate::New(ExternalizeStringExtension::Externalize);
+ } else {
+ ASSERT(strcmp(*v8::String::AsciiValue(str), "isAsciiString") == 0);
+ return v8::FunctionTemplate::New(ExternalizeStringExtension::IsAscii);
+ }
+}
+
+
+v8::Handle<v8::Value> ExternalizeStringExtension::Externalize(
+ const v8::Arguments& args) {
+ if (args.Length() < 1 || !args[0]->IsString()) {
+ return v8::ThrowException(v8::String::New(
+ "First parameter to externalizeString() must be a string."));
+ }
+ bool force_two_byte = false;
+ if (args.Length() >= 2) {
+ if (args[1]->IsBoolean()) {
+ force_two_byte = args[1]->BooleanValue();
+ } else {
+ return v8::ThrowException(v8::String::New(
+ "Second parameter to externalizeString() must be a boolean."));
+ }
+ }
+ bool result = false;
+ Handle<String> string = Utils::OpenHandle(*args[0].As<v8::String>());
+ if (string->IsExternalString()) {
+ return v8::ThrowException(v8::String::New(
+ "externalizeString() can't externalize twice."));
+ }
+ if (string->IsAsciiRepresentation() && !force_two_byte) {
+ char* data = new char[string->length()];
+ String::WriteToFlat(*string, data, 0, string->length());
+ SimpleAsciiStringResource* resource = new SimpleAsciiStringResource(
+ data, string->length());
+ result = string->MakeExternal(resource);
+ if (result && !string->IsSymbol()) {
+ i::ExternalStringTable::AddString(*string);
+ }
+ } else {
+ uc16* data = new uc16[string->length()];
+ String::WriteToFlat(*string, data, 0, string->length());
+ SimpleTwoByteStringResource* resource = new
SimpleTwoByteStringResource(
+ data, string->length());
+ result = string->MakeExternal(resource);
+ if (result && !string->IsSymbol()) {
+ i::ExternalStringTable::AddString(*string);
+ }
+ }
+ if (!result) {
+ return v8::ThrowException(v8::String::New("externalizeString()
failed."));
+ }
+ return v8::Undefined();
+}
+
+
+v8::Handle<v8::Value> ExternalizeStringExtension::IsAscii(
+ const v8::Arguments& args) {
+ if (args.Length() != 1 || !args[0]->IsString()) {
+ return v8::ThrowException(v8::String::New(
+ "isAsciiString() requires a single string argument."));
+ }
+ return
Utils::OpenHandle(*args[0].As<v8::String>())->IsAsciiRepresentation() ?
+ v8::True() : v8::False();
+}
+
+
+static ExternalizeStringExtension externalize_extension;
+static v8::DeclareExtension externalize_extension_declaration(
+ &externalize_extension);
} } // namespace v8::internal
=======================================
--- /branches/bleeding_edge/src/execution.h Wed Apr 14 00:36:49 2010
+++ /branches/bleeding_edge/src/execution.h Thu Jun 17 09:19:28 2010
@@ -316,10 +316,21 @@
v8::Handle<v8::String> name);
static v8::Handle<v8::Value> GC(const v8::Arguments& args);
private:
- static const char* kSource;
+ static const char* const kSource;
};
+class ExternalizeStringExtension : public v8::Extension {
+ public:
+ ExternalizeStringExtension() : v8::Extension("v8/externalize", kSource)
{}
+ virtual v8::Handle<v8::FunctionTemplate> GetNativeFunction(
+ v8::Handle<v8::String> name);
+ static v8::Handle<v8::Value> Externalize(const v8::Arguments& args);
+ static v8::Handle<v8::Value> IsAscii(const v8::Arguments& args);
+ private:
+ static const char* const kSource;
+};
+
} } // namespace v8::internal
#endif // V8_EXECUTION_H_
=======================================
--- /branches/bleeding_edge/src/flag-definitions.h Fri Jun 11 00:06:51 2010
+++ /branches/bleeding_edge/src/flag-definitions.h Thu Jun 17 09:19:28 2010
@@ -123,6 +123,8 @@
DEFINE_string(expose_natives_as, NULL, "expose natives in global object")
DEFINE_string(expose_debug_as, NULL, "expose debug in global object")
DEFINE_bool(expose_gc, false, "expose gc extension")
+DEFINE_bool(expose_externalize_string, false,
+ "expose externalize string extension")
DEFINE_int(stack_trace_limit, 10, "number of stack frames to capture")
DEFINE_bool(disable_native_files, false, "disable builtin natives files")
=======================================
--- /branches/bleeding_edge/src/heap.cc Fri Jun 11 00:06:51 2010
+++ /branches/bleeding_edge/src/heap.cc Thu Jun 17 09:19:28 2010
@@ -1928,6 +1928,18 @@
Top::context()->mark_out_of_memory();
return Failure::OutOfMemoryException();
}
+
+ bool is_ascii_data_in_two_byte_string = false;
+ if (!is_ascii) {
+ // At least one of the strings uses two-byte representation so we
+ // can't use the fast case code for short ascii strings below, but
+ // we can try to save memory if all chars actually fit in ascii.
+ is_ascii_data_in_two_byte_string =
+ first->HasOnlyAsciiChars() && second->HasOnlyAsciiChars();
+ if (is_ascii_data_in_two_byte_string) {
+ Counters::string_add_runtime_ext_to_ascii.Increment();
+ }
+ }
// If the resulting string is small make a flat string.
if (length < String::kMinNonFlatLength) {
@@ -1955,22 +1967,13 @@
for (int i = 0; i < second_length; i++) *dest++ = src[i];
return result;
} else {
- // For short external two-byte strings we check whether they can
- // be represented using ascii.
- if (!first_is_ascii) {
- first_is_ascii = first->IsExternalTwoByteStringWithAsciiChars();
- }
- if (first_is_ascii && !second_is_ascii) {
- second_is_ascii = second->IsExternalTwoByteStringWithAsciiChars();
- }
- if (first_is_ascii && second_is_ascii) {
+ if (is_ascii_data_in_two_byte_string) {
Object* result = AllocateRawAsciiString(length);
if (result->IsFailure()) return result;
// Copy the characters into the new object.
char* dest = SeqAsciiString::cast(result)->GetChars();
String::WriteToFlat(first, dest, 0, first_length);
String::WriteToFlat(second, dest + first_length, 0, second_length);
- Counters::string_add_runtime_ext_to_ascii.Increment();
return result;
}
@@ -1984,7 +1987,8 @@
}
}
- Map* map = is_ascii ? cons_ascii_string_map() : cons_string_map();
+ Map* map = (is_ascii || is_ascii_data_in_two_byte_string) ?
+ cons_ascii_string_map() : cons_string_map();
Object* result = Allocate(map, NEW_SPACE);
if (result->IsFailure()) return result;
@@ -2070,7 +2074,23 @@
return Failure::OutOfMemoryException();
}
- Map* map = Heap::external_string_map();
+ // For small strings we check whether the resource contains only
+ // ascii characters. If yes, we use a different string map.
+ bool is_ascii = true;
+ if (length >= static_cast<size_t>(String::kMinNonFlatLength)) {
+ is_ascii = false;
+ } else {
+ const uc16* data = resource->data();
+ for (size_t i = 0; i < length; i++) {
+ if (data[i] > String::kMaxAsciiCharCode) {
+ is_ascii = false;
+ break;
+ }
+ }
+ }
+
+ Map* map = is_ascii ?
+ Heap::external_string_with_ascii_data_map() :
Heap::external_string_map();
Object* result = Allocate(map, NEW_SPACE);
if (result->IsFailure()) return result;
@@ -2853,6 +2873,9 @@
if (map == cons_ascii_string_map()) return cons_ascii_symbol_map();
if (map == external_string_map()) return external_symbol_map();
if (map == external_ascii_string_map()) return
external_ascii_symbol_map();
+ if (map == external_string_with_ascii_data_map()) {
+ return external_symbol_with_ascii_data_map();
+ }
// No match found.
return NULL;
=======================================
--- /branches/bleeding_edge/src/heap.h Wed Jun 16 01:29:25 2010
+++ /branches/bleeding_edge/src/heap.h Thu Jun 17 09:19:28 2010
@@ -69,10 +69,12 @@
V(Map, cons_symbol_map,
ConsSymbolMap) \
V(Map, cons_ascii_symbol_map,
ConsAsciiSymbolMap) \
V(Map, external_symbol_map,
ExternalSymbolMap) \
+ V(Map, external_symbol_with_ascii_data_map,
ExternalSymbolWithAsciiDataMap) \
V(Map, external_ascii_symbol_map,
ExternalAsciiSymbolMap) \
V(Map, cons_string_map,
ConsStringMap) \
V(Map, cons_ascii_string_map,
ConsAsciiStringMap) \
V(Map, external_string_map,
ExternalStringMap) \
+ V(Map, external_string_with_ascii_data_map,
ExternalStringWithAsciiDataMap) \
V(Map, external_ascii_string_map,
ExternalAsciiStringMap) \
V(Map, undetectable_string_map,
UndetectableStringMap) \
V(Map, undetectable_ascii_string_map,
UndetectableAsciiStringMap) \
=======================================
--- /branches/bleeding_edge/src/ia32/codegen-ia32.cc Wed Jun 16 05:32:34
2010
+++ /branches/bleeding_edge/src/ia32/codegen-ia32.cc Thu Jun 17 09:19:28
2010
@@ -12852,7 +12852,7 @@
// If result is not supposed to be flat allocate a cons string object.
If both
// strings are ascii the result is an ascii cons string.
- Label non_ascii, allocated;
+ Label non_ascii, allocated, ascii_data;
__ mov(edi, FieldOperand(eax, HeapObject::kMapOffset));
__ movzx_b(ecx, FieldOperand(edi, Map::kInstanceTypeOffset));
__ mov(edi, FieldOperand(edx, HeapObject::kMapOffset));
@@ -12861,6 +12861,7 @@
ASSERT(kStringEncodingMask == kAsciiStringTag);
__ test(ecx, Immediate(kAsciiStringTag));
__ j(zero, &non_ascii);
+ __ bind(&ascii_data);
// Allocate an acsii cons string.
__ AllocateAsciiConsString(ecx, edi, no_reg, &string_add_runtime);
__ bind(&allocated);
@@ -12875,6 +12876,19 @@
__ IncrementCounter(&Counters::string_add_native, 1);
__ ret(2 * kPointerSize);
__ bind(&non_ascii);
+ // At least one of the strings is two-byte. Check whether it happens
+ // to contain only ascii characters.
+ // ecx: first instance type AND second instance type.
+ // edi: second instance type.
+ __ test(ecx, Immediate(kAsciiDataHintMask));
+ __ j(not_zero, &ascii_data);
+ __ mov(ecx, FieldOperand(eax, HeapObject::kMapOffset));
+ __ movzx_b(ecx, FieldOperand(ecx, Map::kInstanceTypeOffset));
+ __ xor_(edi, Operand(ecx));
+ ASSERT(kAsciiStringTag != 0 && kAsciiDataHintTag != 0);
+ __ and_(edi, kAsciiStringTag | kAsciiDataHintTag);
+ __ cmp(edi, kAsciiStringTag | kAsciiDataHintTag);
+ __ j(equal, &ascii_data);
// Allocate a two byte cons string.
__ AllocateConsString(ecx, edi, no_reg, &string_add_runtime);
__ jmp(&allocated);
=======================================
--- /branches/bleeding_edge/src/objects-debug.cc Thu May 27 05:30:45 2010
+++ /branches/bleeding_edge/src/objects-debug.cc Thu Jun 17 09:19:28 2010
@@ -552,12 +552,14 @@
case CONS_SYMBOL_TYPE: return "CONS_SYMBOL";
case CONS_ASCII_SYMBOL_TYPE: return "CONS_ASCII_SYMBOL";
case EXTERNAL_ASCII_SYMBOL_TYPE:
+ case EXTERNAL_SYMBOL_WITH_ASCII_DATA_TYPE:
case EXTERNAL_SYMBOL_TYPE: return "EXTERNAL_SYMBOL";
case ASCII_STRING_TYPE: return "ASCII_STRING";
case STRING_TYPE: return "TWO_BYTE_STRING";
case CONS_STRING_TYPE:
case CONS_ASCII_STRING_TYPE: return "CONS_STRING";
case EXTERNAL_ASCII_STRING_TYPE:
+ case EXTERNAL_STRING_WITH_ASCII_DATA_TYPE:
case EXTERNAL_STRING_TYPE: return "EXTERNAL_STRING";
case FIXED_ARRAY_TYPE: return "FIXED_ARRAY";
case BYTE_ARRAY_TYPE: return "BYTE_ARRAY";
=======================================
--- /branches/bleeding_edge/src/objects-inl.h Mon Jun 7 08:39:10 2010
+++ /branches/bleeding_edge/src/objects-inl.h Thu Jun 17 09:19:28 2010
@@ -237,31 +237,20 @@
bool String::IsAsciiRepresentation() {
uint32_t type = map()->instance_type();
- if ((type & kStringRepresentationMask) == kConsStringTag &&
- ConsString::cast(this)->second()->length() == 0) {
- return ConsString::cast(this)->first()->IsAsciiRepresentation();
- }
return (type & kStringEncodingMask) == kAsciiStringTag;
}
bool String::IsTwoByteRepresentation() {
uint32_t type = map()->instance_type();
- if ((type & kStringRepresentationMask) == kConsStringTag &&
- ConsString::cast(this)->second()->length() == 0) {
- return ConsString::cast(this)->first()->IsTwoByteRepresentation();
- }
return (type & kStringEncodingMask) == kTwoByteStringTag;
}
-bool String::IsExternalTwoByteStringWithAsciiChars() {
- if (!IsExternalTwoByteString()) return false;
- const uc16* data = ExternalTwoByteString::cast(this)->resource()->data();
- for (int i = 0, len = length(); i < len; i++) {
- if (data[i] > kMaxAsciiCharCode) return false;
- }
- return true;
+bool String::HasOnlyAsciiChars() {
+ uint32_t type = map()->instance_type();
+ return (type & kStringEncodingMask) == kAsciiStringTag ||
+ (type & kAsciiDataHintMask) == kAsciiDataHintTag;
}
=======================================
--- /branches/bleeding_edge/src/objects.cc Tue Jun 15 10:01:02 2010
+++ /branches/bleeding_edge/src/objects.cc Thu Jun 17 09:19:28 2010
@@ -678,6 +678,9 @@
bool String::MakeExternal(v8::String::ExternalStringResource* resource) {
+ // Externalizing twice leaks the external resouce, so it's
+ // prohibited by the API.
+ ASSERT(!this->IsExternalString());
#ifdef DEBUG
if (FLAG_enable_slow_asserts) {
// Assert that the resource and the string are equivalent.
@@ -697,13 +700,16 @@
return false;
}
ASSERT(size >= ExternalString::kSize);
+ bool is_ascii = this->IsAsciiRepresentation();
bool is_symbol = this->IsSymbol();
int length = this->length();
int hash_field = this->hash_field();
// Morph the object to an external string by adjusting the map and
// reinitializing the fields.
- this->set_map(Heap::external_string_map());
+ this->set_map(is_ascii ?
+ Heap::external_string_with_ascii_data_map() :
+ Heap::external_string_map());
ExternalTwoByteString* self = ExternalTwoByteString::cast(this);
self->set_length(length);
self->set_hash_field(hash_field);
@@ -713,7 +719,9 @@
if (is_symbol) {
self->Hash(); // Force regeneration of the hash value.
// Now morph this external string into a external symbol.
- this->set_map(Heap::external_symbol_map());
+ this->set_map(is_ascii ?
+ Heap::external_symbol_with_ascii_data_map() :
+ Heap::external_symbol_map());
}
// Fill the remainder of the string with dead wood.
=======================================
--- /branches/bleeding_edge/src/objects.h Mon Jun 14 06:55:38 2010
+++ /branches/bleeding_edge/src/objects.h Thu Jun 17 09:19:28 2010
@@ -320,6 +320,10 @@
ExternalTwoByteString::kSize, \
external_symbol, \
ExternalSymbol) \
+
V(EXTERNAL_SYMBOL_WITH_ASCII_DATA_TYPE,
\
+
ExternalTwoByteString::kSize, \
+
external_symbol_with_ascii_data, \
+
ExternalSymbolWithAsciiData) \
V(EXTERNAL_ASCII_SYMBOL_TYPE,
\
ExternalAsciiString::kSize, \
external_ascii_symbol, \
@@ -344,6 +348,10 @@
ExternalTwoByteString::kSize, \
external_string, \
ExternalString) \
+
V(EXTERNAL_STRING_WITH_ASCII_DATA_TYPE,
\
+
ExternalTwoByteString::kSize, \
+
external_string_with_ascii_data, \
+
ExternalStringWithAsciiData) \
V(EXTERNAL_ASCII_STRING_TYPE,
\
ExternalAsciiString::kSize, \
external_ascii_string, \
@@ -412,6 +420,11 @@
};
const uint32_t kIsConsStringMask = 0x1;
+// If bit 7 is clear, then bit 3 indicates whether this two-byte
+// string actually contains ascii data.
+const uint32_t kAsciiDataHintMask = 0x08;
+const uint32_t kAsciiDataHintTag = 0x08;
+
// A ConsString with an empty string as the right side is a candidate
// for being shortcut by the garbage collector unless it is a
@@ -427,18 +440,22 @@
enum InstanceType {
// String types.
- SYMBOL_TYPE = kSymbolTag | kSeqStringTag,
+ SYMBOL_TYPE = kTwoByteStringTag | kSymbolTag | kSeqStringTag,
ASCII_SYMBOL_TYPE = kAsciiStringTag | kSymbolTag | kSeqStringTag,
- CONS_SYMBOL_TYPE = kSymbolTag | kConsStringTag,
+ CONS_SYMBOL_TYPE = kTwoByteStringTag | kSymbolTag | kConsStringTag,
CONS_ASCII_SYMBOL_TYPE = kAsciiStringTag | kSymbolTag | kConsStringTag,
- EXTERNAL_SYMBOL_TYPE = kSymbolTag | kExternalStringTag,
+ EXTERNAL_SYMBOL_TYPE = kTwoByteStringTag | kSymbolTag |
kExternalStringTag,
+ EXTERNAL_SYMBOL_WITH_ASCII_DATA_TYPE =
+ kTwoByteStringTag | kSymbolTag | kExternalStringTag |
kAsciiDataHintTag,
EXTERNAL_ASCII_SYMBOL_TYPE =
kAsciiStringTag | kSymbolTag | kExternalStringTag,
- STRING_TYPE = kSeqStringTag,
+ STRING_TYPE = kTwoByteStringTag | kSeqStringTag,
ASCII_STRING_TYPE = kAsciiStringTag | kSeqStringTag,
- CONS_STRING_TYPE = kConsStringTag,
+ CONS_STRING_TYPE = kTwoByteStringTag | kConsStringTag,
CONS_ASCII_STRING_TYPE = kAsciiStringTag | kConsStringTag,
- EXTERNAL_STRING_TYPE = kExternalStringTag,
+ EXTERNAL_STRING_TYPE = kTwoByteStringTag | kExternalStringTag,
+ EXTERNAL_STRING_WITH_ASCII_DATA_TYPE =
+ kTwoByteStringTag | kExternalStringTag | kAsciiDataHintTag,
EXTERNAL_ASCII_STRING_TYPE = kAsciiStringTag | kExternalStringTag,
PRIVATE_EXTERNAL_ASCII_STRING_TYPE = EXTERNAL_ASCII_STRING_TYPE,
@@ -4069,12 +4086,14 @@
inline bool IsAsciiRepresentation();
inline bool IsTwoByteRepresentation();
- // Check whether this string is an external two-byte string that in
- // fact contains only ascii characters.
+ // Returns whether this string has ascii chars, i.e. all of them can
+ // be ascii encoded. This might be the case even if the string is
+ // two-byte. Such strings may appear when the embedder prefers
+ // two-byte external representations even for ascii data.
//
- // Such strings may appear when the embedder prefers two-byte
- // representations even for ascii data.
- inline bool IsExternalTwoByteStringWithAsciiChars();
+ // NOTE: this should be considered only a hint. False negatives are
+ // possible.
+ inline bool HasOnlyAsciiChars();
// Get and set individual two byte chars in the string.
inline void Set(int index, uint16_t value);
=======================================
--- /branches/bleeding_edge/src/runtime.cc Thu Jun 17 05:47:08 2010
+++ /branches/bleeding_edge/src/runtime.cc Thu Jun 17 09:19:28 2010
@@ -4944,16 +4944,6 @@
return s;
}
}
-
-
-static inline SeqAsciiString* TryGetSeqAsciiString(String* s) {
- if (!s->IsFlat() || !s->IsAsciiRepresentation()) return NULL;
- if (s->IsConsString()) {
- ASSERT(ConsString::cast(s)->second()->length() == 0);
- return SeqAsciiString::cast(ConsString::cast(s)->first());
- }
- return SeqAsciiString::cast(s);
-}
namespace {
@@ -5002,7 +4992,7 @@
unibrow::Mapping<typename ConvertTraits::UnibrowConverter, 128>*
mapping) {
NoHandleAllocation ha;
CONVERT_CHECKED(String, s, args[0]);
- s->TryFlatten();
+ s = s->TryFlattenGetString();
const int length = s->length();
// Assume that the string is not empty; we need this assumption later
@@ -5014,13 +5004,12 @@
// character is also ascii. This is currently the case, but it
// might break in the future if we implement more context and locale
// dependent upper/lower conversions.
- SeqAsciiString* seq_ascii = TryGetSeqAsciiString(s);
- if (seq_ascii != NULL) {
+ if (s->IsSeqAsciiString()) {
Object* o = Heap::AllocateRawAsciiString(length);
if (o->IsFailure()) return o;
SeqAsciiString* result = SeqAsciiString::cast(o);
bool has_changed_character = ConvertTraits::ConvertAscii(
- result->GetChars(), seq_ascii->GetChars(), length);
+ result->GetChars(), SeqAsciiString::cast(s)->GetChars(), length);
return has_changed_character ? result : s;
}
@@ -5564,7 +5553,7 @@
if (first->IsString()) return first;
}
- bool ascii = special->IsAsciiRepresentation();
+ bool ascii = special->HasOnlyAsciiChars();
int position = 0;
for (int i = 0; i < array_length; i++) {
int increment = 0;
@@ -5605,7 +5594,7 @@
String* element = String::cast(elt);
int element_length = element->length();
increment = element_length;
- if (ascii && !element->IsAsciiRepresentation()) {
+ if (ascii && !element->HasOnlyAsciiChars()) {
ascii = false;
}
} else {
=======================================
--- /branches/bleeding_edge/src/x64/codegen-x64.cc Thu Jun 17 08:48:43 2010
+++ /branches/bleeding_edge/src/x64/codegen-x64.cc Thu Jun 17 09:19:28 2010
@@ -11205,16 +11205,17 @@
// If result is not supposed to be flat, allocate a cons string object.
If
// both strings are ascii the result is an ascii cons string.
// rax: first string
- // ebx: length of resulting flat string
+ // rbx: length of resulting flat string
// rdx: second string
// r8: instance type of first string
// r9: instance type of second string
- Label non_ascii, allocated;
+ Label non_ascii, allocated, ascii_data;
__ movl(rcx, r8);
__ and_(rcx, r9);
ASSERT(kStringEncodingMask == kAsciiStringTag);
__ testl(rcx, Immediate(kAsciiStringTag));
__ j(zero, &non_ascii);
+ __ bind(&ascii_data);
// Allocate an acsii cons string.
__ AllocateAsciiConsString(rcx, rdi, no_reg, &string_add_runtime);
__ bind(&allocated);
@@ -11228,6 +11229,18 @@
__ IncrementCounter(&Counters::string_add_native, 1);
__ ret(2 * kPointerSize);
__ bind(&non_ascii);
+ // At least one of the strings is two-byte. Check whether it happens
+ // to contain only ascii characters.
+ // rcx: first instance type AND second instance type.
+ // r8: first instance type.
+ // r9: second instance type.
+ __ testb(rcx, Immediate(kAsciiDataHintMask));
+ __ j(not_zero, &ascii_data);
+ __ xor_(r8, r9);
+ ASSERT(kAsciiStringTag != 0 && kAsciiDataHintTag != 0);
+ __ andb(r8, Immediate(kAsciiStringTag | kAsciiDataHintTag));
+ __ cmpb(r8, Immediate(kAsciiStringTag | kAsciiDataHintTag));
+ __ j(equal, &ascii_data);
// Allocate a two byte cons string.
__ AllocateConsString(rcx, rdi, no_reg, &string_add_runtime);
__ jmp(&allocated);
@@ -11235,7 +11248,7 @@
// Handle creating a flat result. First check that both strings are not
// external strings.
// rax: first string
- // ebx: length of resulting flat string as smi
+ // rbx: length of resulting flat string as smi
// rdx: second string
// r8: instance type of first string
// r9: instance type of first string
@@ -11251,7 +11264,7 @@
__ j(equal, &string_add_runtime);
// Now check if both strings are ascii strings.
// rax: first string
- // ebx: length of resulting flat string
+ // rbx: length of resulting flat string
// rdx: second string
// r8: instance type of first string
// r9: instance type of second string
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev