Revision: 11422
Author: [email protected]
Date: Mon Apr 23 11:56:07 2012
Log: Fix some bugs in accessing details of the lastest regexp
match. Sometimes were were not updating it when we should
and sometimes we were leaving the lastMatchInfoOverride in
place when we should be using the updated regular last match
info. Small optimization for zero length match in
String.prototype.replace.
Review URL: https://chromiumcodereview.appspot.com/10184004
http://code.google.com/p/v8/source/detail?r=11422
Modified:
/branches/bleeding_edge/src/macros.py
/branches/bleeding_edge/src/runtime.cc
/branches/bleeding_edge/src/string.js
/branches/bleeding_edge/test/mjsunit/regexp-capture-3.js
=======================================
--- /branches/bleeding_edge/src/macros.py Fri Apr 13 04:03:22 2012
+++ /branches/bleeding_edge/src/macros.py Mon Apr 23 11:56:07 2012
@@ -196,6 +196,7 @@
macro SET_LOCAL_DATE_VALUE(arg, value) = (%DateSetValue(arg, value, 0));
# Last input and last subject of regexp matches.
+const LAST_SUBJECT_INDEX = 1;
macro LAST_SUBJECT(array) = ((array)[1]);
macro LAST_INPUT(array) = ((array)[2]);
=======================================
--- /branches/bleeding_edge/src/runtime.cc Mon Apr 23 06:59:43 2012
+++ /branches/bleeding_edge/src/runtime.cc Mon Apr 23 11:56:07 2012
@@ -2883,14 +2883,81 @@
}
}
}
+
+
+// Two smis before and after the match, for very long strings.
+const int kMaxBuilderEntriesPerRegExpMatch = 5;
+
+
+static void SetLastMatchInfoNoCaptures(Handle<String> subject,
+ Handle<JSArray> last_match_info,
+ int match_start,
+ int match_end) {
+ // Fill last_match_info with a single capture.
+ last_match_info->EnsureSize(2 + RegExpImpl::kLastMatchOverhead);
+ AssertNoAllocation no_gc;
+ FixedArray* elements = FixedArray::cast(last_match_info->elements());
+ RegExpImpl::SetLastCaptureCount(elements, 2);
+ RegExpImpl::SetLastInput(elements, *subject);
+ RegExpImpl::SetLastSubject(elements, *subject);
+ RegExpImpl::SetCapture(elements, 0, match_start);
+ RegExpImpl::SetCapture(elements, 1, match_end);
+}
+
+
+template <typename SubjectChar, typename PatternChar>
+static bool SearchStringMultiple(Isolate* isolate,
+ Vector<const SubjectChar> subject,
+ Vector<const PatternChar> pattern,
+ String* pattern_string,
+ FixedArrayBuilder* builder,
+ int* match_pos) {
+ int pos = *match_pos;
+ int subject_length = subject.length();
+ int pattern_length = pattern.length();
+ int max_search_start = subject_length - pattern_length;
+ StringSearch<PatternChar, SubjectChar> search(isolate, pattern);
+ while (pos <= max_search_start) {
+ if (!builder->HasCapacity(kMaxBuilderEntriesPerRegExpMatch)) {
+ *match_pos = pos;
+ return false;
+ }
+ // Position of end of previous match.
+ int match_end = pos + pattern_length;
+ int new_pos = search.Search(subject, match_end);
+ if (new_pos >= 0) {
+ // A match.
+ if (new_pos > match_end) {
+ ReplacementStringBuilder::AddSubjectSlice(builder,
+ match_end,
+ new_pos);
+ }
+ pos = new_pos;
+ builder->Add(pattern_string);
+ } else {
+ break;
+ }
+ }
+
+ if (pos < max_search_start) {
+ ReplacementStringBuilder::AddSubjectSlice(builder,
+ pos + pattern_length,
+ subject_length);
+ }
+ *match_pos = pos;
+ return true;
+}
+
+
template<typename ResultSeqString>
-MUST_USE_RESULT static MaybeObject* StringReplaceStringWithString(
+MUST_USE_RESULT static MaybeObject* StringReplaceAtomRegExpWithString(
Isolate* isolate,
Handle<String> subject,
Handle<JSRegExp> pattern_regexp,
- Handle<String> replacement) {
+ Handle<String> replacement,
+ Handle<JSArray> last_match_info) {
ASSERT(subject->IsFlat());
ASSERT(replacement->IsFlat());
@@ -2949,6 +3016,12 @@
subject_pos,
subject_len);
}
+
+ SetLastMatchInfoNoCaptures(subject,
+ last_match_info,
+ indices.at(matches - 1),
+ indices.at(matches - 1) + pattern_len);
+
return *result;
}
@@ -2997,11 +3070,19 @@
compiled_replacement.simple_hint()) {
if (subject_handle->HasOnlyAsciiChars() &&
replacement_handle->HasOnlyAsciiChars()) {
- return StringReplaceStringWithString<SeqAsciiString>(
- isolate, subject_handle, regexp_handle, replacement_handle);
+ return StringReplaceAtomRegExpWithString<SeqAsciiString>(
+ isolate,
+ subject_handle,
+ regexp_handle,
+ replacement_handle,
+ last_match_info_handle);
} else {
- return StringReplaceStringWithString<SeqTwoByteString>(
- isolate, subject_handle, regexp_handle, replacement_handle);
+ return StringReplaceAtomRegExpWithString<SeqTwoByteString>(
+ isolate,
+ subject_handle,
+ regexp_handle,
+ replacement_handle,
+ last_match_info_handle);
}
}
@@ -3090,21 +3171,29 @@
Handle<String> subject_handle(subject);
Handle<JSRegExp> regexp_handle(regexp);
+ Handle<JSArray> last_match_info_handle(last_match_info);
// Shortcut for simple non-regexp global replacements
if (regexp_handle->GetFlags().is_global() &&
regexp_handle->TypeTag() == JSRegExp::ATOM) {
Handle<String> empty_string_handle(HEAP->empty_string());
if (subject_handle->HasOnlyAsciiChars()) {
- return StringReplaceStringWithString<SeqAsciiString>(
- isolate, subject_handle, regexp_handle, empty_string_handle);
+ return StringReplaceAtomRegExpWithString<SeqAsciiString>(
+ isolate,
+ subject_handle,
+ regexp_handle,
+ empty_string_handle,
+ last_match_info_handle);
} else {
- return StringReplaceStringWithString<SeqTwoByteString>(
- isolate, subject_handle, regexp_handle, empty_string_handle);
+ return StringReplaceAtomRegExpWithString<SeqTwoByteString>(
+ isolate,
+ subject_handle,
+ regexp_handle,
+ empty_string_handle,
+ last_match_info_handle);
}
}
- Handle<JSArray> last_match_info_handle(last_match_info);
Handle<Object> match = RegExpImpl::Exec(regexp_handle,
subject_handle,
0,
@@ -3123,6 +3212,10 @@
start = RegExpImpl::GetCapture(match_info_array, 0);
end = RegExpImpl::GetCapture(match_info_array, 1);
}
+
+ bool global = regexp_handle->GetFlags().is_global();
+
+ if (start == end && !global) return *subject_handle;
int length = subject_handle->length();
int new_length = length - (end - start);
@@ -3139,7 +3232,7 @@
}
// If the regexp isn't global, only match once.
- if (!regexp_handle->GetFlags().is_global()) {
+ if (!global) {
if (start > 0) {
String::WriteToFlat(*subject_handle,
answer->GetChars(),
@@ -3638,70 +3731,6 @@
}
-// Two smis before and after the match, for very long strings.
-const int kMaxBuilderEntriesPerRegExpMatch = 5;
-
-
-static void SetLastMatchInfoNoCaptures(Handle<String> subject,
- Handle<JSArray> last_match_info,
- int match_start,
- int match_end) {
- // Fill last_match_info with a single capture.
- last_match_info->EnsureSize(2 + RegExpImpl::kLastMatchOverhead);
- AssertNoAllocation no_gc;
- FixedArray* elements = FixedArray::cast(last_match_info->elements());
- RegExpImpl::SetLastCaptureCount(elements, 2);
- RegExpImpl::SetLastInput(elements, *subject);
- RegExpImpl::SetLastSubject(elements, *subject);
- RegExpImpl::SetCapture(elements, 0, match_start);
- RegExpImpl::SetCapture(elements, 1, match_end);
-}
-
-
-template <typename SubjectChar, typename PatternChar>
-static bool SearchStringMultiple(Isolate* isolate,
- Vector<const SubjectChar> subject,
- Vector<const PatternChar> pattern,
- String* pattern_string,
- FixedArrayBuilder* builder,
- int* match_pos) {
- int pos = *match_pos;
- int subject_length = subject.length();
- int pattern_length = pattern.length();
- int max_search_start = subject_length - pattern_length;
- StringSearch<PatternChar, SubjectChar> search(isolate, pattern);
- while (pos <= max_search_start) {
- if (!builder->HasCapacity(kMaxBuilderEntriesPerRegExpMatch)) {
- *match_pos = pos;
- return false;
- }
- // Position of end of previous match.
- int match_end = pos + pattern_length;
- int new_pos = search.Search(subject, match_end);
- if (new_pos >= 0) {
- // A match.
- if (new_pos > match_end) {
- ReplacementStringBuilder::AddSubjectSlice(builder,
- match_end,
- new_pos);
- }
- pos = new_pos;
- builder->Add(pattern_string);
- } else {
- break;
- }
- }
-
- if (pos < max_search_start) {
- ReplacementStringBuilder::AddSubjectSlice(builder,
- pos + pattern_length,
- subject_length);
- }
- *match_pos = pos;
- return true;
-}
-
-
static bool SearchStringMultiple(Isolate* isolate,
Handle<String> subject,
Handle<String> pattern,
@@ -3841,6 +3870,8 @@
}
+// Only called from Runtime_RegExpExecMultiple so it doesn't need to
maintain
+// separate last match info. See comment on that function.
static RegExpImpl::IrregexpResult SearchRegExpMultiple(
Isolate* isolate,
Handle<String> subject,
@@ -3869,10 +3900,6 @@
// End of previous match. Differs from pos if match was empty.
int match_end = 0;
if (result == RegExpImpl::RE_SUCCESS) {
- // Need to keep a copy of the previous match for creating
last_match_info
- // at the end, so we have two vectors that we swap between.
- OffsetsVector registers2(required_registers, isolate);
- Vector<int> prev_register_vector(registers2.vector(),
registers2.length());
bool first = true;
do {
int match_start = register_vector[0];
@@ -3925,11 +3952,6 @@
elements->set(capture_count + 2, *subject);
builder->Add(*isolate->factory()->NewJSArrayWithElements(elements));
}
- // Swap register vectors, so the last successful match is in
- // prev_register_vector.
- Vector<int32_t> tmp = prev_register_vector;
- prev_register_vector = register_vector;
- register_vector = tmp;
if (match_end > match_start) {
pos = match_end;
@@ -3961,12 +3983,12 @@
last_match_array->EnsureSize(last_match_array_size);
AssertNoAllocation no_gc;
FixedArray* elements =
FixedArray::cast(last_match_array->elements());
+ // We have to set this even though the rest of the last match array
is
+ // ignored.
RegExpImpl::SetLastCaptureCount(elements, last_match_capture_count);
+ // These are also read without consulting the override.
RegExpImpl::SetLastSubject(elements, *subject);
RegExpImpl::SetLastInput(elements, *subject);
- for (int i = 0; i < last_match_capture_count; i++) {
- RegExpImpl::SetCapture(elements, i, prev_register_vector[i]);
- }
return RegExpImpl::RE_SUCCESS;
}
}
@@ -3975,6 +3997,9 @@
}
+// This is only called for StringReplaceGlobalRegExpWithFunction. This
sets
+// lastMatchInfoOverride to maintain the last match info, so we don't need
to
+// set any other last match array info.
RUNTIME_FUNCTION(MaybeObject*, Runtime_RegExpExecMultiple) {
ASSERT(args.length() == 4);
HandleScope handles(isolate);
=======================================
--- /branches/bleeding_edge/src/string.js Fri Apr 20 08:20:52 2012
+++ /branches/bleeding_edge/src/string.js Mon Apr 23 11:56:07 2012
@@ -237,10 +237,28 @@
replace);
}
} else {
- return %StringReplaceRegExpWithString(subject,
- search,
- TO_STRING_INLINE(replace),
- lastMatchInfo);
+ if (lastMatchInfoOverride == null) {
+ return %StringReplaceRegExpWithString(subject,
+ search,
+ TO_STRING_INLINE(replace),
+ lastMatchInfo);
+ } else {
+ // We use this hack to detect whether StringReplaceRegExpWithString
+ // found at least one hit. In that case we need to remove any
+ // override.
+ var saved_subject = lastMatchInfo[LAST_SUBJECT_INDEX];
+ lastMatchInfo[LAST_SUBJECT_INDEX] = 0;
+ var answer = %StringReplaceRegExpWithString(subject,
+ search,
+
TO_STRING_INLINE(replace),
+ lastMatchInfo);
+ if (%_IsSmi(lastMatchInfo[LAST_SUBJECT_INDEX])) {
+ lastMatchInfo[LAST_SUBJECT_INDEX] = saved_subject;
+ } else {
+ lastMatchInfoOverride = null;
+ }
+ return answer;
+ }
}
}
@@ -429,14 +447,22 @@
return subject;
}
var len = res.length;
- var i = 0;
if (NUMBER_OF_CAPTURES(lastMatchInfo) == 2) {
+ // If the number of captures is two then there are no explicit
captures in
+ // the regexp, just the implicit capture that captures the whole
match. In
+ // this case we can simplify quite a bit and end up with something
faster.
+ // The builder will consist of some integers that indicate slices of
the
+ // input string and some replacements that were returned from the
replace
+ // function.
var match_start = 0;
var override = new InternalArray(null, 0, subject);
var receiver = %GetDefaultReceiver(replace);
- while (i < len) {
+ for (var i = 0; i < len; i++) {
var elem = res[i];
if (%_IsSmi(elem)) {
+ // Integers represent slices of the original string. Use these to
+ // get the offsets we need for the override array (so things like
+ // RegExp.leftContext work during the callback function.
if (elem > 0) {
match_start = (elem >> 11) + (elem & 0x7ff);
} else {
@@ -448,23 +474,25 @@
lastMatchInfoOverride = override;
var func_result =
%_CallFunction(receiver, elem, match_start, subject, replace);
+ // Overwrite the i'th element in the results with the string we got
+ // back from the callback function.
res[i] = TO_STRING_INLINE(func_result);
match_start += elem.length;
}
- i++;
}
} else {
var receiver = %GetDefaultReceiver(replace);
- while (i < len) {
+ for (var i = 0; i < len; i++) {
var elem = res[i];
if (!%_IsSmi(elem)) {
// elem must be an Array.
// Use the apply argument as backing for global RegExp properties.
lastMatchInfoOverride = elem;
var func_result = %Apply(replace, receiver, elem, 0, elem.length);
+ // Overwrite the i'th element in the results with the string we got
+ // back from the callback function.
res[i] = TO_STRING_INLINE(func_result);
}
- i++;
}
}
var resultBuilder = new ReplaceResultBuilder(subject, res);
=======================================
--- /branches/bleeding_edge/test/mjsunit/regexp-capture-3.js Fri Apr 13
04:03:22 2012
+++ /branches/bleeding_edge/test/mjsunit/regexp-capture-3.js Mon Apr 23
11:56:07 2012
@@ -86,3 +86,71 @@
for (var i = 3; i < 10; i++) {
assertEquals("", RegExp['$' + i]);
}
+
+
+function Override() {
+ // Set the internal lastMatchInfoOverride. After calling this we do a
normal
+ // match and verify the override was cleared and that we record the new
+ // captures.
+ "abcdabcd".replace(/(b)(c)/g, function() { });
+}
+
+
+function TestOverride(input, expect, property, re_src) {
+ var re = new RegExp(re_src);
+ var re_g = new RegExp(re_src, "g");
+
+ function OverrideCase(fn) {
+ Override();
+ fn();
+ assertEquals(expect, RegExp[property]);
+ }
+
+ OverrideCase(function() { return input.replace(re, "x"); });
+ OverrideCase(function() { return input.replace(re_g, "x"); });
+ OverrideCase(function() { return input.replace(re, ""); });
+ OverrideCase(function() { return input.replace(re_g, ""); });
+ OverrideCase(function() { return input.match(re); });
+ OverrideCase(function() { return input.match(re_g); });
+ OverrideCase(function() { return re.test(input); });
+ OverrideCase(function() { return re_g.test(input); });
+}
+
+var input = "bar.foo baz......";
+var re_str = "(ba.).*?f";
+TestOverride(input, "bar", "$1", re_str);
+
+input = "foo bar baz";
+var re_str = "bar";
+TestOverride(input, "bar", "$&", re_str);
+
+
+function no_last_match(fn) {
+ fn();
+ assertEquals("hestfisk", RegExp.$1);
+}
+
+/(hestfisk)/.test("There's no such thing as a hestfisk!");
+
+no_last_match(function() { "foo".replace("f", ""); });
+no_last_match(function() { "foo".replace("f", "f"); });
+no_last_match(function() { "foo".split("o"); });
+
+var base = "In the music. In the music. ";
+var cons = base + base + base + base;
+no_last_match(function() { cons.replace("x", "y"); });
+no_last_match(function() { cons.replace("e", "E"); });
+
+
+// Here's one that matches once, then tries to match again, but fails.
+// Verify that the last match info is from the last match, not from the
+// failure that came after.
+"bar.foo baz......".replace(/(ba.).*?f/g, function() { return "x";});
+assertEquals("bar", RegExp.$1);
+
+
+var a = "foo bar baz".replace(/^|bar/g, "");
+assertEquals("foo baz", a);
+
+a = "foo bar baz".replace(/^|bar/g, "*");
+assertEquals("*foo * baz", a);
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev