Title: [243839] trunk
Revision
243839
Author
msab...@apple.com
Date
2019-04-03 16:51:12 -0700 (Wed, 03 Apr 2019)

Log Message

REGRESSION (r243642): com.apple._javascript_Core crash in JSC::RegExpObject::execInline
https://bugs.webkit.org/show_bug.cgi?id=196477

Reviewed by Keith Miller.

Source/_javascript_Core:

The problem here is that when we advance the index by 2 for a character class that only
has non-BMP characters, we might go past the end of the string.  This can happen for
greedy counted character classes that are part of a alternative where there is one
character to match after the greedy non-BMP character class.

The "do we have string left to match" check at the top of the JIT loop for the counted
character class checks to see if index is not equal to the string length.  For non-BMP
character classes, we need to check to see if there are at least 2 characters left.
Therefore we now temporarily add 1 to the current index before comparing.  This checks
to see if there are iat least 2 characters left to match, instead of 1.

* yarr/YarrJIT.cpp:
(JSC::Yarr::YarrGenerator::generateCharacterClassGreedy):
(JSC::Yarr::YarrGenerator::backtrackCharacterClassNonGreedy):

LayoutTests:

Updated the test with a couple more test cases to test a few variants of this bug.
Also added a couple of non-greedy counted non-BMP character class tests that don't have
the bug just to be sure.

* js/regexp-unicode-expected.txt:
* js/script-tests/regexp-unicode.js:

Modified Paths

Diff

Modified: trunk/LayoutTests/ChangeLog (243838 => 243839)


--- trunk/LayoutTests/ChangeLog	2019-04-03 23:39:12 UTC (rev 243838)
+++ trunk/LayoutTests/ChangeLog	2019-04-03 23:51:12 UTC (rev 243839)
@@ -1,3 +1,17 @@
+2019-04-03  Michael Saboff  <msab...@apple.com>
+
+        REGRESSION (r243642): com.apple._javascript_Core crash in JSC::RegExpObject::execInline
+        https://bugs.webkit.org/show_bug.cgi?id=196477
+
+        Reviewed by Keith Miller.
+
+        Updated the test with a couple more test cases to test a few variants of this bug.
+        Also added a couple of non-greedy counted non-BMP character class tests that don't have
+        the bug just to be sure.
+
+        * js/regexp-unicode-expected.txt:
+        * js/script-tests/regexp-unicode.js:
+
 2019-04-03  Myles C. Maxfield  <mmaxfi...@apple.com>
 
         Documents can be destroyed before their CSSFontFaceSet is destroyed

Modified: trunk/LayoutTests/js/regexp-unicode-expected.txt (243838 => 243839)


--- trunk/LayoutTests/js/regexp-unicode-expected.txt	2019-04-03 23:39:12 UTC (rev 243838)
+++ trunk/LayoutTests/js/regexp-unicode-expected.txt	2019-04-03 23:51:12 UTC (rev 243839)
@@ -148,6 +148,9 @@
 PASS "𐌑𐌐𐌑".match(/[𐌁𐌑]*?a|[𐌐𐌑]*?./iu)[0] is "𐌑"
 PASS "𐌑𐌐𐌑".match(/[𐌁𐌑]+a|[𐌐𐌑]+./iu)[0] is "𐌑𐌐𐌑"
 PASS "𐌑𐌐𐌑".match(/[𐌁𐌑]+?a|[𐌐𐌑]+?./iu)[0] is "𐌑𐌐"
+PASS "𐌑𐌐𐌑".match(/[𐌁𐌑]+?a$|[𐌐𐌑]+?.$/iu)[0] is "𐌑𐌐𐌑"
+PASS "𐌑𐌐𐌑".match(/[𐌁𐌑x]+a|[𐌐𐌑x]+./iu)[0] is "𐌑𐌐𐌑"
+PASS "𐌑𐌐𐌑".match(/[𐌁𐌑x]+?a|[𐌐𐌑x]+?./iu)[0] is "𐌑𐌐"
 PASS "C83|НАЧАТЬ".match(re8)[0] is "C83|НАЧАТЬ"
 PASS "This.Is.16.Chars|НАЧАТЬ".match(re8)[0] is "This.Is.16.Chars|НАЧАТЬ"
 PASS "Testing\nሴ 1 2 3".match(/^[က-𐃿] 1 2 3/um)[0] is "ሴ 1 2 3"

Modified: trunk/LayoutTests/js/script-tests/regexp-unicode.js (243838 => 243839)


--- trunk/LayoutTests/js/script-tests/regexp-unicode.js	2019-04-03 23:39:12 UTC (rev 243838)
+++ trunk/LayoutTests/js/script-tests/regexp-unicode.js	2019-04-03 23:51:12 UTC (rev 243839)
@@ -205,6 +205,9 @@
 shouldBe('"\u{10311}\u{10310}\u{10311}".match(/[\u{10301}\u{10311}]*?a|[\u{10310}\u{10311}]*?./iu)[0]', '"\u{10311}"');
 shouldBe('"\u{10311}\u{10310}\u{10311}".match(/[\u{10301}\u{10311}]+a|[\u{10310}\u{10311}]+./iu)[0]', '"\u{10311}\u{10310}\u{10311}"');
 shouldBe('"\u{10311}\u{10310}\u{10311}".match(/[\u{10301}\u{10311}]+?a|[\u{10310}\u{10311}]+?./iu)[0]', '"\u{10311}\u{10310}"');
+shouldBe('"\u{10311}\u{10310}\u{10311}".match(/[\u{10301}\u{10311}]+?a$|[\u{10310}\u{10311}]+?.$/iu)[0]', '"\u{10311}\u{10310}\u{10311}"');
+shouldBe('"\u{10311}\u{10310}\u{10311}".match(/[\u{10301}\u{10311}x]+a|[\u{10310}\u{10311}x]+./iu)[0]', '"\u{10311}\u{10310}\u{10311}"');
+shouldBe('"\u{10311}\u{10310}\u{10311}".match(/[\u{10301}\u{10311}x]+?a|[\u{10310}\u{10311}x]+?./iu)[0]', '"\u{10311}\u{10310}"');
 
 var re8 = new  RegExp("^([0-9a-z\.]{3,16})\\|\u{041d}\u{0410}\u{0427}\u{0410}\u{0422}\u{042c}", "ui");
 shouldBe('"C83|\u{041d}\u{0410}\u{0427}\u{0410}\u{0422}\u{042c}".match(re8)[0]', '"C83|\u{041d}\u{0410}\u{0427}\u{0410}\u{0422}\u{042c}"');

Modified: trunk/Source/_javascript_Core/ChangeLog (243838 => 243839)


--- trunk/Source/_javascript_Core/ChangeLog	2019-04-03 23:39:12 UTC (rev 243838)
+++ trunk/Source/_javascript_Core/ChangeLog	2019-04-03 23:51:12 UTC (rev 243839)
@@ -1,3 +1,25 @@
+2019-04-03  Michael Saboff  <msab...@apple.com>
+
+        REGRESSION (r243642): com.apple._javascript_Core crash in JSC::RegExpObject::execInline
+        https://bugs.webkit.org/show_bug.cgi?id=196477
+
+        Reviewed by Keith Miller.
+
+        The problem here is that when we advance the index by 2 for a character class that only
+        has non-BMP characters, we might go past the end of the string.  This can happen for
+        greedy counted character classes that are part of a alternative where there is one
+        character to match after the greedy non-BMP character class.
+
+        The "do we have string left to match" check at the top of the JIT loop for the counted
+        character class checks to see if index is not equal to the string length.  For non-BMP
+        character classes, we need to check to see if there are at least 2 characters left.
+        Therefore we now temporarily add 1 to the current index before comparing.  This checks
+        to see if there are iat least 2 characters left to match, instead of 1.
+
+        * yarr/YarrJIT.cpp:
+        (JSC::Yarr::YarrGenerator::generateCharacterClassGreedy):
+        (JSC::Yarr::YarrGenerator::backtrackCharacterClassNonGreedy):
+
 2019-04-03  Yusuke Suzuki  <ysuz...@apple.com>
 
         [JSC] Exception verification crash on operationArrayIndexOfValueInt32OrContiguous

Modified: trunk/Source/_javascript_Core/yarr/YarrJIT.cpp (243838 => 243839)


--- trunk/Source/_javascript_Core/yarr/YarrJIT.cpp	2019-04-03 23:39:12 UTC (rev 243838)
+++ trunk/Source/_javascript_Core/yarr/YarrJIT.cpp	2019-04-03 23:51:12 UTC (rev 243839)
@@ -1849,7 +1849,13 @@
 
         JumpList failures;
         Label loop(this);
-        failures.append(atEndOfInput());
+#ifdef JIT_UNICODE_EXPRESSIONS
+        if (term->characterClass->hasOneCharacterSize() && !term->invert() && term->characterClass->hasNonBMPCharacters()) {
+            move(TrustedImm32(1), character);
+            failures.append(checkNotEnoughInput(character));
+        } else
+#endif
+            failures.append(atEndOfInput());
 
         if (term->invert()) {
             readCharacter(m_checkedOffset - term->inputPosition, character);
@@ -1956,11 +1962,13 @@
 
         m_backtrackingState.link(this);
 
+#ifdef JIT_UNICODE_EXPRESSIONS
         if (m_decodeSurrogatePairs) {
             if (!term->characterClass->hasOneCharacterSize() || term->invert())
                 loadFromFrame(term->frameLocation + BackTrackInfoCharacterClass::beginIndex(), index);
             loadFromFrame(term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex(), countRegister);
         }
+#endif
 
         nonGreedyFailures.append(atEndOfInput());
         nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityMaxCount.unsafeGet())));
_______________________________________________
webkit-changes mailing list
webkit-changes@lists.webkit.org
https://lists.webkit.org/mailman/listinfo/webkit-changes

Reply via email to