Diff
Modified: trunk/Source/_javascript_Core/CMakeLists.txt (201450 => 201451)
--- trunk/Source/_javascript_Core/CMakeLists.txt 2016-05-27 13:51:02 UTC (rev 201450)
+++ trunk/Source/_javascript_Core/CMakeLists.txt 2016-05-27 14:59:46 UTC (rev 201451)
@@ -744,6 +744,7 @@
runtime/MapConstructor.cpp
runtime/MapIteratorPrototype.cpp
runtime/MapPrototype.cpp
+ runtime/MatchResult.cpp
runtime/MathCommon.cpp
runtime/MathObject.cpp
runtime/MemoryStatistics.cpp
Modified: trunk/Source/_javascript_Core/ChangeLog (201450 => 201451)
--- trunk/Source/_javascript_Core/ChangeLog 2016-05-27 13:51:02 UTC (rev 201450)
+++ trunk/Source/_javascript_Core/ChangeLog 2016-05-27 14:59:46 UTC (rev 201451)
@@ -1,3 +1,37 @@
+2016-05-26 Filip Pizlo <[email protected]>
+
+ Bogus uses of regexp matching should realize that they will OOM before they start swapping
+ https://bugs.webkit.org/show_bug.cgi?id=158142
+
+ Reviewed by Michael Saboff.
+
+ Refactored the RegExpObject::matchGlobal() code so that there is less duplication. Took
+ advantage of this to make the code more resilient in case of absurd situations: if the
+ result array gets large, it proceeds with a dry run to detect how many matches there will
+ be. This allows it to OOM before it starts swapping.
+
+ This also improves the overall performance of the code by using lightweight substrings and
+ skipping the whole intermediate argument array.
+
+ This makes some jsfunfuzz tests run a lot faster and use a lot less memory.
+
+ * builtins/RegExpPrototype.js:
+ * CMakeLists.txt:
+ * _javascript_Core.xcodeproj/project.pbxproj:
+ * runtime/MatchResult.cpp: Added.
+ (JSC::MatchResult::dump):
+ * runtime/MatchResult.h:
+ (JSC::MatchResult::empty):
+ (MatchResult::empty): Deleted.
+ * runtime/RegExpObject.cpp:
+ (JSC::RegExpObject::match):
+ (JSC::collectMatches):
+ (JSC::RegExpObject::matchGlobal):
+ * runtime/StringObject.h:
+ (JSC::jsStringWithReuse):
+ (JSC::jsSubstring):
+ * tests/stress/big-match.js: Added. Make sure that this optimization doesn't break big matches.
+
2016-05-26 Gavin & Ellie Barraclough <[email protected]>
Static table property lookup should not require getOwnPropertySlot override.
Modified: trunk/Source/_javascript_Core/_javascript_Core.xcodeproj/project.pbxproj (201450 => 201451)
--- trunk/Source/_javascript_Core/_javascript_Core.xcodeproj/project.pbxproj 2016-05-27 13:51:02 UTC (rev 201450)
+++ trunk/Source/_javascript_Core/_javascript_Core.xcodeproj/project.pbxproj 2016-05-27 14:59:46 UTC (rev 201451)
@@ -1987,6 +1987,7 @@
DC605B5E1CE26EA200593718 /* ProfilerEvent.h in Headers */ = {isa = PBXBuildFile; fileRef = DC605B5A1CE26E9800593718 /* ProfilerEvent.h */; settings = {ATTRIBUTES = (Private, ); }; };
DC605B5F1CE26EA500593718 /* ProfilerUID.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DC605B5B1CE26E9800593718 /* ProfilerUID.cpp */; };
DC605B601CE26EA700593718 /* ProfilerUID.h in Headers */ = {isa = PBXBuildFile; fileRef = DC605B5C1CE26E9800593718 /* ProfilerUID.h */; settings = {ATTRIBUTES = (Private, ); }; };
+ DC69AA661CF7A1F200C6272F /* MatchResult.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DC69AA651CF7A1EF00C6272F /* MatchResult.cpp */; };
DC7997831CDE9FA0004D4A09 /* TagRegistersMode.h in Headers */ = {isa = PBXBuildFile; fileRef = DC7997821CDE9F9E004D4A09 /* TagRegistersMode.h */; settings = {ATTRIBUTES = (Private, ); }; };
DC7997841CDE9FA2004D4A09 /* TagRegistersMode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = DC7997811CDE9F9E004D4A09 /* TagRegistersMode.cpp */; };
DCEE22091CEB9895000C2396 /* DFGBackwardsCFG.h in Headers */ = {isa = PBXBuildFile; fileRef = DCEE22061CEB9890000C2396 /* DFGBackwardsCFG.h */; };
@@ -4200,6 +4201,7 @@
DC605B5A1CE26E9800593718 /* ProfilerEvent.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ProfilerEvent.h; path = profiler/ProfilerEvent.h; sourceTree = "<group>"; };
DC605B5B1CE26E9800593718 /* ProfilerUID.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = ProfilerUID.cpp; path = profiler/ProfilerUID.cpp; sourceTree = "<group>"; };
DC605B5C1CE26E9800593718 /* ProfilerUID.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ProfilerUID.h; path = profiler/ProfilerUID.h; sourceTree = "<group>"; };
+ DC69AA651CF7A1EF00C6272F /* MatchResult.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = MatchResult.cpp; sourceTree = "<group>"; };
DC7997811CDE9F9E004D4A09 /* TagRegistersMode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = TagRegistersMode.cpp; sourceTree = "<group>"; };
DC7997821CDE9F9E004D4A09 /* TagRegistersMode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = TagRegistersMode.h; sourceTree = "<group>"; };
DCEE22061CEB9890000C2396 /* DFGBackwardsCFG.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = DFGBackwardsCFG.h; path = dfg/DFGBackwardsCFG.h; sourceTree = "<group>"; };
@@ -5504,12 +5506,6 @@
7EF6E0BB0EB7A1EC0079AFAF /* runtime */ = {
isa = PBXGroup;
children = (
- 708EBE231CE8F35000453146 /* IntlObjectInlines.h */,
- DCF3D5641CD29468003D5C65 /* LazyClassStructure.cpp */,
- DCF3D5651CD29468003D5C65 /* LazyClassStructure.h */,
- DCF3D5661CD29468003D5C65 /* LazyClassStructureInlines.h */,
- DCF3D5671CD29468003D5C65 /* LazyProperty.h */,
- DCF3D5681CD29468003D5C65 /* LazyPropertyInlines.h */,
BCF605110E203EF800B9A64D /* ArgList.cpp */,
BCF605120E203EF800B9A64D /* ArgList.h */,
0FE0500C1AA9091100D33B33 /* ArgumentsMode.h */,
@@ -5684,6 +5680,7 @@
A1D792FB1B43864B004516F5 /* IntlNumberFormatPrototype.h */,
A12BBFF31B044A9800664B69 /* IntlObject.cpp */,
A12BBFF11B044A8B00664B69 /* IntlObject.h */,
+ 708EBE231CE8F35000453146 /* IntlObjectInlines.h */,
86BF642A148DB2B5004DE36A /* Intrinsic.h */,
FE4D55B71AE716CA0052E459 /* IterationStatus.h */,
70113D491A8DB093003848C4 /* IteratorOperations.cpp */,
@@ -5839,6 +5836,11 @@
1442566015EDE98D0066A49B /* JSWithScope.h */,
65C7A1710A8EAACB00FA37EA /* JSWrapperObject.cpp */,
65C7A1720A8EAACB00FA37EA /* JSWrapperObject.h */,
+ DCF3D5641CD29468003D5C65 /* LazyClassStructure.cpp */,
+ DCF3D5651CD29468003D5C65 /* LazyClassStructure.h */,
+ DCF3D5661CD29468003D5C65 /* LazyClassStructureInlines.h */,
+ DCF3D5671CD29468003D5C65 /* LazyProperty.h */,
+ DCF3D5681CD29468003D5C65 /* LazyPropertyInlines.h */,
A7E2EA6A0FB460CF00601F06 /* LiteralParser.cpp */,
A7E2EA690FB460CF00601F06 /* LiteralParser.h */,
F692A8680255597D01FF60F7 /* Lookup.cpp */,
@@ -5851,6 +5853,7 @@
A74DEF8E182D991400522C22 /* MapIteratorPrototype.h */,
A700873B17CBE8D300C3E643 /* MapPrototype.cpp */,
A700873C17CBE8D300C3E643 /* MapPrototype.h */,
+ DC69AA651CF7A1EF00C6272F /* MatchResult.cpp */,
8612E4CB1522918400C836BE /* MatchResult.h */,
4340A4821A9051AF00D73CCA /* MathCommon.cpp */,
4340A4831A9051AF00D73CCA /* MathCommon.h */,
@@ -9223,6 +9226,7 @@
14469DE1107EC7E700650446 /* NativeErrorPrototype.cpp in Sources */,
E33E8D201B9013DE00346B52 /* NativeStdFunctionCell.cpp in Sources */,
148F21B7107EC5470042EC2C /* Nodes.cpp in Sources */,
+ DC69AA661CF7A1F200C6272F /* MatchResult.cpp in Sources */,
E3963CEE1B73F75000EB4CE5 /* NodesAnalyzeModule.cpp in Sources */,
655EB29B10CE2581001A990E /* NodesCodegen.cpp in Sources */,
6546F5211A32B313006F07D5 /* NullGetterFunction.cpp in Sources */,
Modified: trunk/Source/_javascript_Core/builtins/RegExpPrototype.js (201450 => 201451)
--- trunk/Source/_javascript_Core/builtins/RegExpPrototype.js 2016-05-27 13:51:02 UTC (rev 201450)
+++ trunk/Source/_javascript_Core/builtins/RegExpPrototype.js 2016-05-27 14:59:46 UTC (rev 201451)
@@ -98,6 +98,9 @@
regexp.lastIndex = 0;
let resultList = [];
+ // FIXME: It would be great to implement a solution similar to what we do in
+ // RegExpObject::matchGlobal(). It's not clear if this is possible, since this loop has
+ // effects. https://bugs.webkit.org/show_bug.cgi?id=158145
const maximumReasonableMatchSize = 100000000;
while (true) {
Added: trunk/Source/_javascript_Core/runtime/MatchResult.cpp (0 => 201451)
--- trunk/Source/_javascript_Core/runtime/MatchResult.cpp (rev 0)
+++ trunk/Source/_javascript_Core/runtime/MatchResult.cpp 2016-05-27 14:59:46 UTC (rev 201451)
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2016 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "MatchResult.h"
+
+namespace JSC {
+
+void MatchResult::dump(PrintStream& out) const
+{
+ if (start == WTF::notFound)
+ out.print("notFound");
+ else
+ out.print(start, "...", end);
+}
+
+} // namespace JSC
+
Modified: trunk/Source/_javascript_Core/runtime/MatchResult.h (201450 => 201451)
--- trunk/Source/_javascript_Core/runtime/MatchResult.h 2016-05-27 13:51:02 UTC (rev 201450)
+++ trunk/Source/_javascript_Core/runtime/MatchResult.h 2016-05-27 14:59:46 UTC (rev 201451)
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2012 Apple Inc. All rights reserved.
+ * Copyright (C) 2012, 2016 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -26,6 +26,11 @@
#ifndef MatchResult_h
#define MatchResult_h
+#include <wtf/PrintStream.h>
+#include <wtf/Vector.h> // for notFound
+
+namespace JSC {
+
typedef uint64_t EncodedMatchResult;
struct MatchResult {
@@ -69,9 +74,13 @@
{
return start == end;
}
+
+ void dump(PrintStream&) const;
size_t start;
size_t end;
};
+} // namespace JSC
+
#endif
Modified: trunk/Source/_javascript_Core/runtime/RegExpObject.cpp (201450 => 201451)
--- trunk/Source/_javascript_Core/runtime/RegExpObject.cpp 2016-05-27 13:51:02 UTC (rev 201450)
+++ trunk/Source/_javascript_Core/runtime/RegExpObject.cpp 2016-05-27 14:59:46 UTC (rev 201451)
@@ -169,6 +169,65 @@
return matchInline(exec, globalObject, string);
}
+template<typename FixEndFunc>
+JSValue collectMatches(VM& vm, ExecState* exec, JSString* string, const String& s, RegExpConstructor* constructor, RegExp* regExp, const FixEndFunc& fixEnd)
+{
+ MatchResult result = constructor->performMatch(vm, regExp, string, s, 0);
+ if (!result)
+ return jsNull();
+
+ static unsigned maxSizeForDirectPath = 100000;
+
+ JSArray* array = constructEmptyArray(exec, nullptr);
+
+ auto iterate = [&] () {
+ size_t end = result.end;
+ size_t length = end - result.start;
+ array->push(exec, JSRopeString::createSubstringOfResolved(vm, string, result.start, length));
+ if (!length)
+ end = fixEnd(end);
+ result = constructor->performMatch(vm, regExp, string, s, end);
+ };
+
+ do {
+ if (array->length() >= maxSizeForDirectPath) {
+ // First do a throw-away match to see how many matches we'll get.
+ unsigned matchCount = 0;
+ MatchResult savedResult = result;
+ do {
+ if (array->length() + matchCount >= MAX_STORAGE_VECTOR_LENGTH) {
+ throwOutOfMemoryError(exec);
+ return jsUndefined();
+ }
+
+ size_t end = result.end;
+ matchCount++;
+ if (result.empty())
+ end = fixEnd(end);
+
+ // Using RegExpConstructor::performMatch() instead of calling RegExp::match()
+ // directly is a surprising but profitable choice: it means that when we do OOM, we
+ // will leave the cached result in the state it ought to have had just before the
+ // OOM! On the other hand, if this loop concludes that the result is small enough,
+ // then the iterate() loop below will overwrite the cached result anyway.
+ result = constructor->performMatch(vm, regExp, string, s, end);
+ } while (result);
+
+ // OK, we have a sensible number of matches. Now we can create them for reals.
+ result = savedResult;
+ do
+ iterate();
+ while (result);
+
+ return array;
+ }
+
+ iterate();
+ } while (result);
+
+ return array;
+}
+
JSValue RegExpObject::matchGlobal(ExecState* exec, JSGlobalObject* globalObject, JSString* string)
{
RegExp* regExp = this->regExp();
@@ -183,52 +242,21 @@
String s = string->value(exec);
RegExpConstructor* regExpConstructor = globalObject->regExpConstructor();
- MatchResult result = regExpConstructor->performMatch(*vm, regExp, string, s, 0);
-
- // return array of matches
- MarkedArgumentBuffer list;
- // We defend ourselves from crazy.
- const size_t maximumReasonableMatchSize = 1000000000;
-
+
if (regExp->unicode()) {
unsigned stringLength = s.length();
- while (result) {
- if (list.size() > maximumReasonableMatchSize) {
- throwOutOfMemoryError(exec);
- return jsUndefined();
- }
-
- size_t end = result.end;
- size_t length = end - result.start;
- list.append(jsSubstring(exec, s, result.start, length));
- if (!length)
- end = advanceStringUnicode(s, stringLength, end);
- result = regExpConstructor->performMatch(*vm, regExp, string, s, end);
- }
- } else {
- while (result) {
- if (list.size() > maximumReasonableMatchSize) {
- throwOutOfMemoryError(exec);
- return jsUndefined();
- }
-
- size_t end = result.end;
- size_t length = end - result.start;
- list.append(jsSubstring(exec, s, result.start, length));
- if (!length)
- ++end;
- result = regExpConstructor->performMatch(*vm, regExp, string, s, end);
- }
+ return collectMatches(
+ *vm, exec, string, s, regExpConstructor, regExp,
+ [&] (size_t end) -> size_t {
+ return advanceStringUnicode(s, stringLength, end);
+ });
}
-
- if (list.isEmpty()) {
- // if there are no matches at all, it's important to return
- // Null instead of an empty array, because this matches
- // other browsers and because Null is a false value.
- return jsNull();
- }
-
- return constructArray(exec, static_cast<ArrayAllocationProfile*>(0), list);
+
+ return collectMatches(
+ *vm, exec, string, s, regExpConstructor, regExp,
+ [&] (size_t end) -> size_t {
+ return end + 1;
+ });
}
} // namespace JSC
Modified: trunk/Source/_javascript_Core/runtime/StringObject.h (201450 => 201451)
--- trunk/Source/_javascript_Core/runtime/StringObject.h 2016-05-27 13:51:02 UTC (rev 201450)
+++ trunk/Source/_javascript_Core/runtime/StringObject.h 2016-05-27 14:59:46 UTC (rev 201451)
@@ -94,6 +94,11 @@
}
// Helper that tries to use the JSString substring sharing mechanism if 'originalValue' is a JSString.
+// FIXME: It would be even better if toString returned a JSString*, or if anyone who called
+// toString with the intent of later calling this functon first created a jsString from the String
+// that toString returned. That way, we'd get the substring optimization even when the input was
+// not a JSString.
+// https://bugs.webkit.org/show_bug.cgi?id=158140
static inline JSString* jsSubstring(ExecState* exec, JSValue originalValue, const String& string, unsigned offset, unsigned length)
{
if (originalValue.isString()) {
Added: trunk/Source/_javascript_Core/tests/stress/big-match.js (0 => 201451)
--- trunk/Source/_javascript_Core/tests/stress/big-match.js (rev 0)
+++ trunk/Source/_javascript_Core/tests/stress/big-match.js 2016-05-27 14:59:46 UTC (rev 201451)
@@ -0,0 +1,20 @@
+"use strict";
+
+var bigString = "x";
+while (bigString.length < 200000)
+ bigString = bigString + bigString;
+
+if (bigString.length != 262144)
+ throw "Error: bad string length: " + bigString.length;
+
+var result = /x/g[Symbol.match](bigString);
+
+if (result.length != 262144)
+ throw "Error: bad result array length: " + result.length;
+
+for (var i = 0; i < result.length; ++i) {
+ if (result[i] != "x")
+ throw "Error: array does not contain \"x\" at i = " + i + ": " + result[i];
+}
+
+