Revision: 8999
Author: [email protected]
Date: Tue Aug 23 05:22:12 2011
Log: Replace ToAsciiVector and ToUC16Vector with single function that
returns a tagged value.
The tag tells whether the content is ASCII or UC16, or even if the string
wasn't flat.
BUG: v8:1633
Review URL: http://codereview.chromium.org/7709024
http://code.google.com/p/v8/source/detail?r=8999
Modified:
/branches/bleeding_edge/src/handles.cc
/branches/bleeding_edge/src/hydrogen-instructions.cc
/branches/bleeding_edge/src/interpreter-irregexp.cc
/branches/bleeding_edge/src/jsregexp.cc
/branches/bleeding_edge/src/objects-inl.h
/branches/bleeding_edge/src/objects.cc
/branches/bleeding_edge/src/objects.h
/branches/bleeding_edge/src/runtime.cc
/branches/bleeding_edge/test/mjsunit/string-split.js
=======================================
--- /branches/bleeding_edge/src/handles.cc Thu Jul 28 10:21:22 2011
+++ /branches/bleeding_edge/src/handles.cc Tue Aug 23 05:22:12 2011
@@ -617,15 +617,17 @@
{
AssertNoAllocation no_heap_allocation; // ensure vectors stay valid.
// Dispatch on type of strings.
- if (src->IsAsciiRepresentation()) {
+ String::FlatContent content = src->GetFlatContent(no_heap_allocation);
+ ASSERT(content.IsFlat());
+ if (content.IsAscii()) {
CalculateLineEnds(isolate,
&line_ends,
- src->ToAsciiVector(),
+ content.ToAsciiVector(),
with_last_line);
} else {
CalculateLineEnds(isolate,
&line_ends,
- src->ToUC16Vector(),
+ content.ToUC16Vector(),
with_last_line);
}
}
=======================================
--- /branches/bleeding_edge/src/hydrogen-instructions.cc Tue Aug 23
00:34:45 2011
+++ /branches/bleeding_edge/src/hydrogen-instructions.cc Tue Aug 23
05:22:12 2011
@@ -778,7 +778,8 @@
void HTypeofIsAndBranch::PrintDataTo(StringStream* stream) {
value()->PrintNameTo(stream);
stream->Add(" == ");
- stream->Add(type_literal_->ToAsciiVector());
+ AssertNoAllocation no_alloc;
+ stream->Add(type_literal_->GetFlatContent(no_alloc).ToAsciiVector());
}
=======================================
--- /branches/bleeding_edge/src/interpreter-irregexp.cc Fri Mar 18 13:35:07
2011
+++ /branches/bleeding_edge/src/interpreter-irregexp.cc Tue Aug 23 05:22:12
2011
@@ -1,4 +1,4 @@
-// Copyright 2008 the V8 project authors. All rights reserved.
+// Copyright 2011 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
@@ -635,8 +635,9 @@
AssertNoAllocation a;
const byte* code_base = code_array->GetDataStartAddress();
uc16 previous_char = '\n';
- if (subject->IsAsciiRepresentation()) {
- Vector<const char> subject_vector = subject->ToAsciiVector();
+ String::FlatContent subject_content = subject->GetFlatContent(a);
+ if (subject_content.IsAscii()) {
+ Vector<const char> subject_vector = subject_content.ToAsciiVector();
if (start_position != 0) previous_char = subject_vector[start_position
- 1];
return RawMatch(isolate,
code_base,
@@ -645,7 +646,8 @@
start_position,
previous_char);
} else {
- Vector<const uc16> subject_vector = subject->ToUC16Vector();
+ ASSERT(subject_content.IsTwoByte());
+ Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
if (start_position != 0) previous_char = subject_vector[start_position
- 1];
return RawMatch(isolate,
code_base,
=======================================
--- /branches/bleeding_edge/src/jsregexp.cc Fri Aug 5 05:31:37 2011
+++ /branches/bleeding_edge/src/jsregexp.cc Tue Aug 23 05:22:12 2011
@@ -212,19 +212,7 @@
RegExpImpl::SetCapture(array, 1, to);
}
- /* template <typename SubjectChar>, typename PatternChar>
-static int ReStringMatch(Vector<const SubjectChar> sub_vector,
- Vector<const PatternChar> pat_vector,
- int start_index) {
-
- int pattern_length = pat_vector.length();
- if (pattern_length == 0) return start_index;
-
- int subject_length = sub_vector.length();
- if (start_index + pattern_length > subject_length) return -1;
- return SearchString(sub_vector, pat_vector, start_index);
-}
- */
+
Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re,
Handle<String> subject,
int index,
@@ -237,35 +225,41 @@
if (!subject->IsFlat()) FlattenString(subject);
AssertNoAllocation no_heap_allocation; // ensure vectors stay valid
// Extract flattened substrings of cons strings before determining
asciiness.
- String* seq_sub = *subject;
- if (seq_sub->IsConsString()) seq_sub =
ConsString::cast(seq_sub)->first();
String* needle = String::cast(re->DataAt(JSRegExp::kAtomPatternIndex));
int needle_len = needle->length();
+ ASSERT(needle->IsFlat());
if (needle_len != 0) {
- if (index + needle_len > subject->length())
- return isolate->factory()->null_value();
-
+ if (index + needle_len > subject->length()) {
+ return isolate->factory()->null_value();
+ }
+
+ String::FlatContent needle_content =
+ needle->GetFlatContent(no_heap_allocation);
+ String::FlatContent subject_content =
+ subject->GetFlatContent(no_heap_allocation);
+ ASSERT(needle_content.IsFlat());
+ ASSERT(subject_content.IsFlat());
// dispatch on type of strings
- index = (needle->IsAsciiRepresentation()
- ? (seq_sub->IsAsciiRepresentation()
+ index = (needle_content.IsAscii()
+ ? (subject_content.IsAscii()
? SearchString(isolate,
- seq_sub->ToAsciiVector(),
- needle->ToAsciiVector(),
+ subject_content.ToAsciiVector(),
+ needle_content.ToAsciiVector(),
index)
: SearchString(isolate,
- seq_sub->ToUC16Vector(),
- needle->ToAsciiVector(),
+ subject_content.ToUC16Vector(),
+ needle_content.ToAsciiVector(),
index))
- : (seq_sub->IsAsciiRepresentation()
+ : (subject_content.IsAscii()
? SearchString(isolate,
- seq_sub->ToAsciiVector(),
- needle->ToUC16Vector(),
+ subject_content.ToAsciiVector(),
+ needle_content.ToUC16Vector(),
index)
: SearchString(isolate,
- seq_sub->ToUC16Vector(),
- needle->ToUC16Vector(),
+ subject_content.ToUC16Vector(),
+ needle_content.ToUC16Vector(),
index)));
if (index == -1) return isolate->factory()->null_value();
}
=======================================
--- /branches/bleeding_edge/src/objects-inl.h Fri Aug 12 06:54:27 2011
+++ /branches/bleeding_edge/src/objects-inl.h Tue Aug 23 05:22:12 2011
@@ -295,6 +295,11 @@
uint32_t tag = (type_ & kStringRepresentationMask);
return static_cast<StringRepresentationTag>(tag);
}
+
+
+uint32_t StringShape::encoding_tag() {
+ return type_ & kStringEncodingMask;
+}
uint32_t StringShape::full_representation_tag() {
=======================================
--- /branches/bleeding_edge/src/objects.cc Thu Aug 18 02:51:08 2011
+++ /branches/bleeding_edge/src/objects.cc Tue Aug 23 05:22:12 2011
@@ -5038,55 +5038,38 @@
}
-Vector<const char> String::ToAsciiVector() {
- ASSERT(IsAsciiRepresentation());
- ASSERT(IsFlat());
-
- int offset = 0;
+String::FlatContent String::GetFlatContent(const AssertNoAllocation&
promise) {
+ // Argument isn't used, it's only there to ensure that the user is
+ // aware that the extracted vectors may not survive a GC.
int length = this->length();
- StringRepresentationTag string_tag =
StringShape(this).representation_tag();
+ StringShape shape(this);
String* string = this;
- if (string_tag == kConsStringTag) {
+ if (shape.representation_tag() == kConsStringTag) {
ConsString* cons = ConsString::cast(string);
- ASSERT(cons->second()->length() == 0);
+ if (cons->second()->length() != 0) {
+ return FlatContent();
+ }
string = cons->first();
- string_tag = StringShape(string).representation_tag();
- }
- if (string_tag == kSeqStringTag) {
- SeqAsciiString* seq = SeqAsciiString::cast(string);
- char* start = seq->GetChars();
- return Vector<const char>(start + offset, length);
- }
- ASSERT(string_tag == kExternalStringTag);
- ExternalAsciiString* ext = ExternalAsciiString::cast(string);
- const char* start = ext->resource()->data();
- return Vector<const char>(start + offset, length);
-}
-
-
-Vector<const uc16> String::ToUC16Vector() {
- ASSERT(IsTwoByteRepresentation());
- ASSERT(IsFlat());
-
- int offset = 0;
- int length = this->length();
- StringRepresentationTag string_tag =
StringShape(this).representation_tag();
- String* string = this;
- if (string_tag == kConsStringTag) {
- ConsString* cons = ConsString::cast(string);
- ASSERT(cons->second()->length() == 0);
- string = cons->first();
- string_tag = StringShape(string).representation_tag();
- }
- if (string_tag == kSeqStringTag) {
- SeqTwoByteString* seq = SeqTwoByteString::cast(string);
- return Vector<const uc16>(seq->GetChars() + offset, length);
- }
- ASSERT(string_tag == kExternalStringTag);
- ExternalTwoByteString* ext = ExternalTwoByteString::cast(string);
- const uc16* start =
- reinterpret_cast<const uc16*>(ext->resource()->data());
- return Vector<const uc16>(start + offset, length);
+ shape = StringShape(string);
+ }
+ if (shape.encoding_tag() == kAsciiStringTag) {
+ const char* start;
+ if (shape.representation_tag() == kSeqStringTag) {
+ start = SeqAsciiString::cast(string)->GetChars();
+ } else {
+ start = ExternalAsciiString::cast(string)->resource()->data();
+ }
+ return FlatContent(Vector<const char>(start, length));
+ } else {
+ ASSERT(shape.encoding_tag() == kTwoByteStringTag);
+ const uc16* start;
+ if (shape.representation_tag() == kSeqStringTag) {
+ start = SeqTwoByteString::cast(string)->GetChars();
+ } else {
+ start = ExternalTwoByteString::cast(string)->resource()->data();
+ }
+ return FlatContent(Vector<const uc16>(start, length));
+ }
}
@@ -5536,11 +5519,14 @@
if (str_ == NULL) return;
Handle<String> str(str_);
ASSERT(str->IsFlat());
- is_ascii_ = str->IsAsciiRepresentation();
+ AssertNoAllocation no_alloc;
+ String::FlatContent content = str->GetFlatContent(no_alloc);
+ ASSERT(content.is_flat());
+ is_ascii_ = content.IsAscii();
if (is_ascii_) {
- start_ = str->ToAsciiVector().start();
+ start_ = content.ToAsciiVector().start();
} else {
- start_ = str->ToUC16Vector().start();
+ start_ = content.ToUC16Vector().start();
}
}
@@ -5860,12 +5846,14 @@
static inline bool CompareStringContentsPartial(Isolate* isolate,
IteratorA* ia,
String* b) {
- if (b->IsFlat()) {
- if (b->IsAsciiRepresentation()) {
- VectorIterator<char> ib(b->ToAsciiVector());
+ AssertNoAllocation no_alloc;
+ String::FlatContent content = b->GetFlatContent(no_alloc);
+ if (content.IsFlat()) {
+ if (content.IsAscii()) {
+ VectorIterator<char> ib(content.ToAsciiVector());
return CompareStringContents(ia, &ib);
} else {
- VectorIterator<uc16> ib(b->ToUC16Vector());
+ VectorIterator<uc16> ib(content.ToUC16Vector());
return CompareStringContents(ia, &ib);
}
} else {
@@ -5895,6 +5883,8 @@
String* lhs = this->TryFlattenGetString();
String* rhs = other->TryFlattenGetString();
+ AssertNoAllocation no_alloc;
+
if (StringShape(lhs).IsSequentialAscii() &&
StringShape(rhs).IsSequentialAscii()) {
const char* str1 = SeqAsciiString::cast(lhs)->GetChars();
@@ -5904,16 +5894,18 @@
}
Isolate* isolate = GetIsolate();
- if (lhs->IsFlat()) {
- if (lhs->IsAsciiRepresentation()) {
- Vector<const char> vec1 = lhs->ToAsciiVector();
- if (rhs->IsFlat()) {
- if (rhs->IsAsciiRepresentation()) {
- Vector<const char> vec2 = rhs->ToAsciiVector();
+ String::FlatContent lhs_content = lhs->GetFlatContent(no_alloc);
+ String::FlatContent rhs_content = rhs->GetFlatContent(no_alloc);
+ if (lhs_content.IsFlat()) {
+ if (lhs_content.IsAscii()) {
+ Vector<const char> vec1 = lhs_content.ToAsciiVector();
+ if (rhs_content.IsFlat()) {
+ if (rhs_content.IsAscii()) {
+ Vector<const char> vec2 = rhs_content.ToAsciiVector();
return CompareRawStringContents(vec1, vec2);
} else {
VectorIterator<char> buf1(vec1);
- VectorIterator<uc16> ib(rhs->ToUC16Vector());
+ VectorIterator<uc16> ib(rhs_content.ToUC16Vector());
return CompareStringContents(&buf1, &ib);
}
} else {
@@ -5923,14 +5915,14 @@
isolate->objects_string_compare_buffer_b());
}
} else {
- Vector<const uc16> vec1 = lhs->ToUC16Vector();
- if (rhs->IsFlat()) {
- if (rhs->IsAsciiRepresentation()) {
+ Vector<const uc16> vec1 = lhs_content.ToUC16Vector();
+ if (rhs_content.IsFlat()) {
+ if (rhs_content.IsAscii()) {
VectorIterator<uc16> buf1(vec1);
- VectorIterator<char> ib(rhs->ToAsciiVector());
+ VectorIterator<char> ib(rhs_content.ToAsciiVector());
return CompareStringContents(&buf1, &ib);
} else {
- Vector<const uc16> vec2(rhs->ToUC16Vector());
+ Vector<const uc16> vec2(rhs_content.ToUC16Vector());
return CompareRawStringContents(vec1, vec2);
}
} else {
@@ -5981,10 +5973,13 @@
bool String::IsAsciiEqualTo(Vector<const char> str) {
+ AssertNoAllocation no_alloc;
int slen = length();
if (str.length() != slen) return false;
- if (IsFlat() && IsAsciiRepresentation()) {
- return CompareChars(ToAsciiVector().start(), str.start(), slen) == 0;
+ FlatContent content = GetFlatContent(no_alloc);
+ if (content.IsAscii()) {
+ return CompareChars(content.ToAsciiVector().start(),
+ str.start(), slen) == 0;
}
for (int i = 0; i < slen; i++) {
if (Get(i) != static_cast<uint16_t>(str[i])) return false;
@@ -5994,10 +5989,12 @@
bool String::IsTwoByteEqualTo(Vector<const uc16> str) {
+ AssertNoAllocation no_alloc;
int slen = length();
if (str.length() != slen) return false;
- if (IsFlat() && IsTwoByteRepresentation()) {
- return CompareChars(ToUC16Vector().start(), str.start(), slen) == 0;
+ FlatContent content = GetFlatContent(no_alloc);
+ if (content.IsTwoByte()) {
+ return CompareChars(content.ToUC16Vector().start(), str.start(), slen)
== 0;
}
for (int i = 0; i < slen; i++) {
if (Get(i) != str[i]) return false;
=======================================
--- /branches/bleeding_edge/src/objects.h Mon Aug 15 05:55:18 2011
+++ /branches/bleeding_edge/src/objects.h Tue Aug 23 05:22:12 2011
@@ -5789,6 +5789,7 @@
inline bool IsSequentialTwoByte();
inline bool IsSymbol();
inline StringRepresentationTag representation_tag();
+ inline uint32_t encoding_tag();
inline uint32_t full_representation_tag();
inline uint32_t size_tag();
#ifdef DEBUG
@@ -5820,6 +5821,51 @@
// All string values have a length field.
class String: public HeapObject {
public:
+ // Representation of the flat content of a String.
+ // A non-flat string doesn't have flat content.
+ // A flat string has content that's encoded as a sequence of either
+ // ASCII chars or two-byte UC16.
+ // Returned by String::GetFlatContent().
+ class FlatContent {
+ public:
+ // Returns true if the string is flat and this structure contains
content.
+ bool IsFlat() { return state_ != NON_FLAT; }
+ // Returns true if the structure contains ASCII content.
+ bool IsAscii() { return state_ == ASCII; }
+ // Returns true if the structure contains two-byte content.
+ bool IsTwoByte() { return state_ == TWO_BYTE; }
+
+ // Return the ASCII content of the string. Only use if IsAscii()
returns
+ // true.
+ Vector<const char> ToAsciiVector() {
+ ASSERT_EQ(ASCII, state_);
+ return Vector<const char>::cast(buffer_);
+ }
+ // Return the two-byte content of the string. Only use if IsTwoByte()
+ // returns true.
+ Vector<const uc16> ToUC16Vector() {
+ ASSERT_EQ(TWO_BYTE, state_);
+ return Vector<const uc16>::cast(buffer_);
+ }
+
+ private:
+ enum State { NON_FLAT, ASCII, TWO_BYTE };
+
+ // Constructors only used by String::GetFlatContent().
+ explicit FlatContent(Vector<const char> chars)
+ : buffer_(Vector<const byte>::cast(chars)),
+ state_(ASCII) { }
+ explicit FlatContent(Vector<const uc16> chars)
+ : buffer_(Vector<const byte>::cast(chars)),
+ state_(TWO_BYTE) { }
+ FlatContent() : buffer_(), state_(NON_FLAT) { }
+
+ Vector<const byte> buffer_;
+ State state_;
+
+ friend class String;
+ };
+
// Get and set the length of the string.
inline int length();
inline void set_length(int value);
@@ -5831,10 +5877,10 @@
inline bool IsAsciiRepresentation();
inline bool IsTwoByteRepresentation();
- // Returns whether this string has ascii chars, i.e. all of them can
- // be ascii encoded. This might be the case even if the string is
+ // Returns whether this string has only ASCII chars, i.e. all of them can
+ // be ASCII encoded. This might be the case even if the string is
// two-byte. Such strings may appear when the embedder prefers
- // two-byte external representations even for ascii data.
+ // two-byte external representations even for ASCII data.
//
// NOTE: this should be considered only a hint. False negatives are
// possible.
@@ -5868,8 +5914,12 @@
// string.
inline String* TryFlattenGetString(PretenureFlag pretenure =
NOT_TENURED);
- Vector<const char> ToAsciiVector();
- Vector<const uc16> ToUC16Vector();
+ // Tries to return the content of a flat string as a structure holding
either
+ // a flat vector of char or of uc16.
+ // If the string isn't flat, and therefore doesn't have flat content, the
+ // returned structure will report so, and can't provide a vector of
either
+ // kind.
+ FlatContent GetFlatContent(const AssertNoAllocation& safety_promise);
// Mark the string as an undetectable object. It only applies to
// ascii and two byte string types.
=======================================
--- /branches/bleeding_edge/src/runtime.cc Mon Aug 22 06:55:25 2011
+++ /branches/bleeding_edge/src/runtime.cc Tue Aug 23 05:22:12 2011
@@ -2663,21 +2663,22 @@
void CompiledReplacement::Compile(Handle<String> replacement,
int capture_count,
int subject_length) {
- ASSERT(replacement->IsFlat());
- if (replacement->IsAsciiRepresentation()) {
+ {
AssertNoAllocation no_alloc;
- ParseReplacementPattern(&parts_,
- replacement->ToAsciiVector(),
- capture_count,
- subject_length);
- } else {
- ASSERT(replacement->IsTwoByteRepresentation());
- AssertNoAllocation no_alloc;
-
- ParseReplacementPattern(&parts_,
- replacement->ToUC16Vector(),
- capture_count,
- subject_length);
+ String::FlatContent content = replacement->GetFlatContent(no_alloc);
+ ASSERT(content.IsFlat());
+ if (content.IsAscii()) {
+ ParseReplacementPattern(&parts_,
+ content.ToAsciiVector(),
+ capture_count,
+ subject_length);
+ } else {
+ ASSERT(content.IsTwoByte());
+ ParseReplacementPattern(&parts_,
+ content.ToUC16Vector(),
+ capture_count,
+ subject_length);
+ }
}
Isolate* isolate = replacement->GetIsolate();
// Find substrings of replacement string and create them as String
objects.
@@ -3049,34 +3050,32 @@
AssertNoAllocation no_heap_allocation; // ensure vectors stay valid
// Extract flattened substrings of cons strings before determining
asciiness.
- String* seq_sub = *sub;
- if (seq_sub->IsConsString()) seq_sub =
ConsString::cast(seq_sub)->first();
- String* seq_pat = *pat;
- if (seq_pat->IsConsString()) seq_pat =
ConsString::cast(seq_pat)->first();
+ String::FlatContent seq_sub = sub->GetFlatContent(no_heap_allocation);
+ String::FlatContent seq_pat = pat->GetFlatContent(no_heap_allocation);
// dispatch on type of strings
- if (seq_pat->IsAsciiRepresentation()) {
- Vector<const char> pat_vector = seq_pat->ToAsciiVector();
- if (seq_sub->IsAsciiRepresentation()) {
+ if (seq_pat.IsAscii()) {
+ Vector<const char> pat_vector = seq_pat.ToAsciiVector();
+ if (seq_sub.IsAscii()) {
return SearchString(isolate,
- seq_sub->ToAsciiVector(),
+ seq_sub.ToAsciiVector(),
pat_vector,
start_index);
}
return SearchString(isolate,
- seq_sub->ToUC16Vector(),
+ seq_sub.ToUC16Vector(),
pat_vector,
start_index);
}
- Vector<const uc16> pat_vector = seq_pat->ToUC16Vector();
- if (seq_sub->IsAsciiRepresentation()) {
+ Vector<const uc16> pat_vector = seq_pat.ToUC16Vector();
+ if (seq_sub.IsAscii()) {
return SearchString(isolate,
- seq_sub->ToAsciiVector(),
+ seq_sub.ToAsciiVector(),
pat_vector,
start_index);
}
return SearchString(isolate,
- seq_sub->ToUC16Vector(),
+ seq_sub.ToUC16Vector(),
pat_vector,
start_index);
}
@@ -3161,31 +3160,29 @@
int position = -1;
AssertNoAllocation no_heap_allocation; // ensure vectors stay valid
- // Extract flattened substrings of cons strings before determining
asciiness.
- String* seq_sub = *sub;
- if (seq_sub->IsConsString()) seq_sub =
ConsString::cast(seq_sub)->first();
- String* seq_pat = *pat;
- if (seq_pat->IsConsString()) seq_pat =
ConsString::cast(seq_pat)->first();
-
- if (seq_pat->IsAsciiRepresentation()) {
- Vector<const char> pat_vector = seq_pat->ToAsciiVector();
- if (seq_sub->IsAsciiRepresentation()) {
- position = StringMatchBackwards(seq_sub->ToAsciiVector(),
+
+ String::FlatContent sub_content =
sub->GetFlatContent(no_heap_allocation);
+ String::FlatContent pat_content =
pat->GetFlatContent(no_heap_allocation);
+
+ if (pat_content.IsAscii()) {
+ Vector<const char> pat_vector = pat_content.ToAsciiVector();
+ if (sub_content.IsAscii()) {
+ position = StringMatchBackwards(sub_content.ToAsciiVector(),
pat_vector,
start_index);
} else {
- position = StringMatchBackwards(seq_sub->ToUC16Vector(),
+ position = StringMatchBackwards(sub_content.ToUC16Vector(),
pat_vector,
start_index);
}
} else {
- Vector<const uc16> pat_vector = seq_pat->ToUC16Vector();
- if (seq_sub->IsAsciiRepresentation()) {
- position = StringMatchBackwards(seq_sub->ToAsciiVector(),
+ Vector<const uc16> pat_vector = pat_content.ToUC16Vector();
+ if (sub_content.IsAscii()) {
+ position = StringMatchBackwards(sub_content.ToAsciiVector(),
pat_vector,
start_index);
} else {
- position = StringMatchBackwards(seq_sub->ToUC16Vector(),
+ position = StringMatchBackwards(sub_content.ToUC16Vector(),
pat_vector,
start_index);
}
@@ -3403,36 +3400,38 @@
for (;;) { // Break when search complete.
builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
AssertNoAllocation no_gc;
- if (subject->IsAsciiRepresentation()) {
- Vector<const char> subject_vector = subject->ToAsciiVector();
- if (pattern->IsAsciiRepresentation()) {
+ String::FlatContent subject_content = subject->GetFlatContent(no_gc);
+ String::FlatContent pattern_content = pattern->GetFlatContent(no_gc);
+ if (subject_content.IsAscii()) {
+ Vector<const char> subject_vector = subject_content.ToAsciiVector();
+ if (pattern_content.IsAscii()) {
if (SearchStringMultiple(isolate,
subject_vector,
- pattern->ToAsciiVector(),
+ pattern_content.ToAsciiVector(),
*pattern,
builder,
&match_pos)) break;
} else {
if (SearchStringMultiple(isolate,
subject_vector,
- pattern->ToUC16Vector(),
+ pattern_content.ToUC16Vector(),
*pattern,
builder,
&match_pos)) break;
}
} else {
- Vector<const uc16> subject_vector = subject->ToUC16Vector();
- if (pattern->IsAsciiRepresentation()) {
+ Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
+ if (pattern_content.IsAscii()) {
if (SearchStringMultiple(isolate,
subject_vector,
- pattern->ToAsciiVector(),
+ pattern_content.ToAsciiVector(),
*pattern,
builder,
&match_pos)) break;
} else {
if (SearchStringMultiple(isolate,
subject_vector,
- pattern->ToUC16Vector(),
+ pattern_content.ToUC16Vector(),
*pattern,
builder,
&match_pos)) break;
@@ -5420,12 +5419,15 @@
str = String::cast(flat);
ASSERT(str->IsFlat());
}
- if (str->IsTwoByteRepresentation()) {
+ AssertNoAllocation no_alloc;
+ String::FlatContent flat = str->GetFlatContent(no_alloc);
+ ASSERT(flat.IsFlat());
+ if (flat.IsTwoByte()) {
return QuoteJsonString<uc16, SeqTwoByteString, false>(isolate,
-
str->ToUC16Vector());
+
flat.ToUC16Vector());
} else {
return QuoteJsonString<char, SeqAsciiString, false>(isolate,
-
str->ToAsciiVector());
+
flat.ToAsciiVector());
}
}
@@ -5442,12 +5444,14 @@
str = String::cast(flat);
ASSERT(str->IsFlat());
}
- if (str->IsTwoByteRepresentation()) {
+ AssertNoAllocation no_alloc;
+ String::FlatContent flat = str->GetFlatContent(no_alloc);
+ if (flat.IsTwoByte()) {
return QuoteJsonString<uc16, SeqTwoByteString, true>(isolate,
-
str->ToUC16Vector());
+
flat.ToUC16Vector());
} else {
return QuoteJsonString<char, SeqAsciiString, true>(isolate,
-
str->ToAsciiVector());
+
flat.ToAsciiVector());
}
}
@@ -5482,14 +5486,16 @@
for (int i = 0; i < length; i++) {
if (i != 0) *(write_cursor++) = ',';
String* str = String::cast(array->get(i));
- if (str->IsTwoByteRepresentation()) {
+ String::FlatContent content = str->GetFlatContent(no_gc);
+ ASSERT(content.IsFlat());
+ if (content.IsTwoByte()) {
write_cursor = WriteQuoteJsonString<Char, uc16>(isolate,
write_cursor,
- str->ToUC16Vector());
+
content.ToUC16Vector());
} else {
write_cursor = WriteQuoteJsonString<Char, char>(isolate,
write_cursor,
-
str->ToAsciiVector());
+
content.ToAsciiVector());
}
}
*(write_cursor++) = ']';
@@ -5968,11 +5974,15 @@
// No allocation block.
{
- AssertNoAllocation nogc;
- if (subject->IsAsciiRepresentation()) {
- Vector<const char> subject_vector = subject->ToAsciiVector();
- if (pattern->IsAsciiRepresentation()) {
- Vector<const char> pattern_vector = pattern->ToAsciiVector();
+ AssertNoAllocation no_gc;
+ String::FlatContent subject_content = subject->GetFlatContent(no_gc);
+ String::FlatContent pattern_content = pattern->GetFlatContent(no_gc);
+ ASSERT(subject_content.IsFlat());
+ ASSERT(pattern_content.IsFlat());
+ if (subject_content.IsAscii()) {
+ Vector<const char> subject_vector = subject_content.ToAsciiVector();
+ if (pattern_content.IsAscii()) {
+ Vector<const char> pattern_vector =
pattern_content.ToAsciiVector();
if (pattern_vector.length() == 1) {
FindAsciiStringIndices(subject_vector,
pattern_vector[0],
@@ -5988,22 +5998,22 @@
} else {
FindStringIndices(isolate,
subject_vector,
- pattern->ToUC16Vector(),
+ pattern_content.ToUC16Vector(),
&indices,
limit);
}
} else {
- Vector<const uc16> subject_vector = subject->ToUC16Vector();
+ Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
if (pattern->IsAsciiRepresentation()) {
FindStringIndices(isolate,
subject_vector,
- pattern->ToAsciiVector(),
+ pattern_content.ToAsciiVector(),
&indices,
limit);
} else {
FindStringIndices(isolate,
subject_vector,
- pattern->ToUC16Vector(),
+ pattern_content.ToUC16Vector(),
&indices,
limit);
}
@@ -6085,36 +6095,40 @@
CONVERT_ARG_CHECKED(String, s, 0);
CONVERT_NUMBER_CHECKED(uint32_t, limit, Uint32, args[1]);
- s->TryFlatten();
+ s = FlattenGetString(s);
const int length = static_cast<int>(Min<uint32_t>(s->length(), limit));
Handle<FixedArray> elements;
+ int position = 0;
if (s->IsFlat() && s->IsAsciiRepresentation()) {
+ // Try using cached chars where possible.
Object* obj;
{ MaybeObject* maybe_obj =
isolate->heap()->AllocateUninitializedFixedArray(length);
if (!maybe_obj->ToObject(&obj)) return maybe_obj;
}
+ AssertNoAllocation no_alloc;
elements = Handle<FixedArray>(FixedArray::cast(obj), isolate);
-
- Vector<const char> chars = s->ToAsciiVector();
- // Note, this will initialize all elements (not only the prefix)
- // to prevent GC from seeing partially initialized array.
- int num_copied_from_cache =
CopyCachedAsciiCharsToArray(isolate->heap(),
- chars.start(),
- *elements,
- length);
-
- for (int i = num_copied_from_cache; i < length; ++i) {
- Handle<Object> str = LookupSingleCharacterStringFromCode(chars[i]);
- elements->set(i, *str);
+ String::FlatContent content = s->GetFlatContent(no_alloc);
+ if (content.IsAscii()) {
+ Vector<const char> chars = content.ToAsciiVector();
+ // Note, this will initialize all elements (not only the prefix)
+ // to prevent GC from seeing partially initialized array.
+ position = CopyCachedAsciiCharsToArray(isolate->heap(),
+ chars.start(),
+ *elements,
+ length);
+ } else {
+ MemsetPointer(elements->data_start(),
+ isolate->heap()->undefined_value(),
+ length);
}
} else {
elements = isolate->factory()->NewFixedArray(length);
- for (int i = 0; i < length; ++i) {
- Handle<Object> str = LookupSingleCharacterStringFromCode(s->Get(i));
- elements->set(i, *str);
- }
+ }
+ for (int i = position; i < length; ++i) {
+ Handle<Object> str = LookupSingleCharacterStringFromCode(s->Get(i));
+ elements->set(i, *str);
}
#ifdef DEBUG
@@ -6916,6 +6930,7 @@
static Object* FlatStringCompare(String* x, String* y) {
ASSERT(x->IsFlat());
ASSERT(y->IsFlat());
+ AssertNoAllocation no_alloc;
Object* equal_prefix_result = Smi::FromInt(EQUAL);
int prefix_length = x->length();
if (y->length() < prefix_length) {
@@ -6925,22 +6940,24 @@
equal_prefix_result = Smi::FromInt(LESS);
}
int r;
- if (x->IsAsciiRepresentation()) {
- Vector<const char> x_chars = x->ToAsciiVector();
- if (y->IsAsciiRepresentation()) {
- Vector<const char> y_chars = y->ToAsciiVector();
+ String::FlatContent x_content = x->GetFlatContent(no_alloc);
+ String::FlatContent y_content = y->GetFlatContent(no_alloc);
+ if (x_content.IsAscii()) {
+ Vector<const char> x_chars = x_content.ToAsciiVector();
+ if (y_content.IsAscii()) {
+ Vector<const char> y_chars = y_content.ToAsciiVector();
r = CompareChars(x_chars.start(), y_chars.start(), prefix_length);
} else {
- Vector<const uc16> y_chars = y->ToUC16Vector();
+ Vector<const uc16> y_chars = y_content.ToUC16Vector();
r = CompareChars(x_chars.start(), y_chars.start(), prefix_length);
}
} else {
- Vector<const uc16> x_chars = x->ToUC16Vector();
- if (y->IsAsciiRepresentation()) {
- Vector<const char> y_chars = y->ToAsciiVector();
+ Vector<const uc16> x_chars = x_content.ToUC16Vector();
+ if (y_content.IsAscii()) {
+ Vector<const char> y_chars = y_content.ToAsciiVector();
r = CompareChars(x_chars.start(), y_chars.start(), prefix_length);
} else {
- Vector<const uc16> y_chars = y->ToUC16Vector();
+ Vector<const uc16> y_chars = y_content.ToUC16Vector();
r = CompareChars(x_chars.start(), y_chars.start(), prefix_length);
}
}
@@ -8821,13 +8838,14 @@
FixedArray* output_array = FixedArray::cast(output->elements());
RUNTIME_ASSERT(output_array->length() >= DateParser::OUTPUT_SIZE);
bool result;
- if (str->IsAsciiRepresentation()) {
- result = DateParser::Parse(str->ToAsciiVector(),
+ String::FlatContent str_content = str->GetFlatContent(no_allocation);
+ if (str_content.IsAscii()) {
+ result = DateParser::Parse(str_content.ToAsciiVector(),
output_array,
isolate->unicode_cache());
} else {
- ASSERT(str->IsTwoByteRepresentation());
- result = DateParser::Parse(str->ToUC16Vector(),
+ ASSERT(str_content.IsTwoByte());
+ result = DateParser::Parse(str_content.ToUC16Vector(),
output_array,
isolate->unicode_cache());
}
@@ -12805,9 +12823,12 @@
RUNTIME_FUNCTION(MaybeObject*, Runtime_Log) {
ASSERT(args.length() == 2);
+ AssertNoAllocation no_alloc;
CONVERT_CHECKED(String, format, args[0]);
CONVERT_CHECKED(JSArray, elms, args[1]);
- Vector<const char> chars = format->ToAsciiVector();
+ String::FlatContent format_content = format->GetFlatContent(no_alloc);
+ RUNTIME_ASSERT(format_content.IsAscii());
+ Vector<const char> chars = format_content.ToAsciiVector();
LOGGER->LogRuntime(chars, elms);
return isolate->heap()->undefined_value();
}
=======================================
--- /branches/bleeding_edge/test/mjsunit/string-split.js Tue Dec 7
03:01:02 2010
+++ /branches/bleeding_edge/test/mjsunit/string-split.js Tue Aug 23
05:22:12 2011
@@ -116,3 +116,14 @@
assertEquals(["a", "b", "c"], "abc".split("", numberObj(3)));
assertEquals(["a", "b", "c"], "abc".split("", 4));
assertEquals(["a", "b", "c"], "abc".split("", numberObj(4)));
+
+var all_ascii_chars = [];
+for (var i = 0; i < 128; i++) all_ascii_chars[i] = String.fromCharCode(i);
+var all_ascii_string = all_ascii_chars.join("");
+
+var split_chars = all_ascii_string.split("");
+assertEquals(128, split_chars.length);
+for (var i = 0; i < 128; i++) {
+ assertEquals(1, split_chars[i].length);
+ assertEquals(i, split_chars[i].charCodeAt(0));
+}
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev