Title: [250508] branches/safari-608-branch
Revision
250508
Author
[email protected]
Date
2019-09-30 01:30:48 -0700 (Mon, 30 Sep 2019)

Log Message

Cherry-pick r249926. rdar://problem/55826870

    [JSC] Perform check again when we found non-BMP characters
    https://bugs.webkit.org/show_bug.cgi?id=201647

    Reviewed by Yusuke Suzuki.

    JSTests:

    * stress/regexp-unicode-surrogate-pair-increment-should-involve-length-check.js: Added.
    * stress/regexp-unicode-within-string.js: Updated test to eliminate the bogus print().
    (testRegExpInbounds):

    Source/_javascript_Core:

    We need to check for end of input for non-BMP characters when matching a character class that contains
    both BMP and non-BMP characters.  In advanceIndexAfterCharacterClassTermMatch() we were checking for
    end of input for both BMP and non-BMP characters.  For BMP characters, this check is redundant.
    After moving the check to after the "is BMP check", we need to decrement index after reaching the failure
    label to back out the index++ for the first surrogate of the non-BMP character.

    Added the same kind of check in generateCharacterClassOnce().  In that case, we have pre-checked the
    first character (surrogate) for a non-BMP codepoint, so we just need to check for end of input before
    we increment for the second surrogate.

    While writing tests, I found an off by one error in backtrackCharacterClassGreedy() and changed the
    loop to check the count at loop top instead of loop bottom.

    * yarr/YarrJIT.cpp:
    (JSC::Yarr::YarrGenerator::advanceIndexAfterCharacterClassTermMatch):
    (JSC::Yarr::YarrGenerator::generateCharacterClassOnce):
    (JSC::Yarr::YarrGenerator::generateCharacterClassGreedy):
    (JSC::Yarr::YarrGenerator::backtrackCharacterClassGreedy):
    (JSC::Yarr::YarrGenerator::backtrackCharacterClassNonGreedy):

    git-svn-id: https://svn.webkit.org/repository/webkit/trunk@249926 268f45cc-cd09-0410-ab3c-d52691b4dbfc

Modified Paths

Added Paths

Diff

Modified: branches/safari-608-branch/JSTests/ChangeLog (250507 => 250508)


--- branches/safari-608-branch/JSTests/ChangeLog	2019-09-30 08:30:45 UTC (rev 250507)
+++ branches/safari-608-branch/JSTests/ChangeLog	2019-09-30 08:30:48 UTC (rev 250508)
@@ -1,5 +1,56 @@
 2019-09-30  Babak Shafiei  <[email protected]>
 
+        Cherry-pick r249926. rdar://problem/55826870
+
+    [JSC] Perform check again when we found non-BMP characters
+    https://bugs.webkit.org/show_bug.cgi?id=201647
+    
+    Reviewed by Yusuke Suzuki.
+    
+    JSTests:
+    
+    * stress/regexp-unicode-surrogate-pair-increment-should-involve-length-check.js: Added.
+    * stress/regexp-unicode-within-string.js: Updated test to eliminate the bogus print().
+    (testRegExpInbounds):
+    
+    Source/_javascript_Core:
+    
+    We need to check for end of input for non-BMP characters when matching a character class that contains
+    both BMP and non-BMP characters.  In advanceIndexAfterCharacterClassTermMatch() we were checking for
+    end of input for both BMP and non-BMP characters.  For BMP characters, this check is redundant.
+    After moving the check to after the "is BMP check", we need to decrement index after reaching the failure
+    label to back out the index++ for the first surrogate of the non-BMP character.
+    
+    Added the same kind of check in generateCharacterClassOnce().  In that case, we have pre-checked the
+    first character (surrogate) for a non-BMP codepoint, so we just need to check for end of input before
+    we increment for the second surrogate.
+    
+    While writing tests, I found an off by one error in backtrackCharacterClassGreedy() and changed the
+    loop to check the count at loop top instead of loop bottom.
+    
+    * yarr/YarrJIT.cpp:
+    (JSC::Yarr::YarrGenerator::advanceIndexAfterCharacterClassTermMatch):
+    (JSC::Yarr::YarrGenerator::generateCharacterClassOnce):
+    (JSC::Yarr::YarrGenerator::generateCharacterClassGreedy):
+    (JSC::Yarr::YarrGenerator::backtrackCharacterClassGreedy):
+    (JSC::Yarr::YarrGenerator::backtrackCharacterClassNonGreedy):
+    
+    
+    git-svn-id: https://svn.webkit.org/repository/webkit/trunk@249926 268f45cc-cd09-0410-ab3c-d52691b4dbfc
+
+    2019-09-16  Michael Saboff  <[email protected]>
+
+            [JSC] Perform check again when we found non-BMP characters
+            https://bugs.webkit.org/show_bug.cgi?id=201647
+
+            Reviewed by Yusuke Suzuki.
+
+            * stress/regexp-unicode-surrogate-pair-increment-should-involve-length-check.js: Added.
+            * stress/regexp-unicode-within-string.js: Updated test to eliminate the bogus print().
+            (testRegExpInbounds):
+
+2019-09-30  Babak Shafiei  <[email protected]>
+
         Cherry-pick r249777. rdar://problem/55826876
 
     JSC crashes due to stack overflow while building RegExp

Added: branches/safari-608-branch/JSTests/stress/regexp-unicode-surrogate-pair-increment-should-involve-length-check.js (0 => 250508)


--- branches/safari-608-branch/JSTests/stress/regexp-unicode-surrogate-pair-increment-should-involve-length-check.js	                        (rev 0)
+++ branches/safari-608-branch/JSTests/stress/regexp-unicode-surrogate-pair-increment-should-involve-length-check.js	2019-09-30 08:30:48 UTC (rev 250508)
@@ -0,0 +1,27 @@
+// This test checks for proper incrementing around / over individual surrogates and surrogate pairs.
+// This test should run without crashing.
+
+function testRegExpMatch(re, str)
+{
+    for (let i = 0; i < 100; ++i) {
+        let match = re.exec(str);
+        if (!match || match[0] != str) {
+            print(match);
+            throw "Expected " + re + " to match \"" + str + "\" but it didn't";
+        }
+    }
+}
+
+let testString = "\ud800\ud800\udc00";
+let greedyRegExp = /([^x]+)[^]*\1([^])/u;
+
+testRegExpMatch(greedyRegExp, testString);
+
+let nonGreedyRegExp = /(.*[^x]+?)[^]*([^])/u;
+
+testRegExpMatch(nonGreedyRegExp, testString);
+
+let testString2 = "\ud800\ud800\udc00Test\udc00123";
+let backtrackGreedyRegExp = /.*[\x20-\udffff].\w*.\d{3}/u;
+
+testRegExpMatch(backtrackGreedyRegExp, testString2);

Modified: branches/safari-608-branch/JSTests/stress/regexp-unicode-within-string.js (250507 => 250508)


--- branches/safari-608-branch/JSTests/stress/regexp-unicode-within-string.js	2019-09-30 08:30:45 UTC (rev 250507)
+++ branches/safari-608-branch/JSTests/stress/regexp-unicode-within-string.js	2019-09-30 08:30:48 UTC (rev 250508)
@@ -10,7 +10,6 @@
     if (match !== null && match[0] === str) 
         throw "Error: Read past end of a Unicode substring processing a Unicode RegExp";
     else if (match === null || match[0] !== subStr) {
-        print("Error: match[0].length = " + match[0].length + ", match[0] = \"" + match[0] + "\"");
         throw "Error: Didn't properly match a Unicode substring with a matching Unicode RegExp";
     }
 }

Modified: branches/safari-608-branch/Source/_javascript_Core/ChangeLog (250507 => 250508)


--- branches/safari-608-branch/Source/_javascript_Core/ChangeLog	2019-09-30 08:30:45 UTC (rev 250507)
+++ branches/safari-608-branch/Source/_javascript_Core/ChangeLog	2019-09-30 08:30:48 UTC (rev 250508)
@@ -1,5 +1,72 @@
 2019-09-30  Babak Shafiei  <[email protected]>
 
+        Cherry-pick r249926. rdar://problem/55826870
+
+    [JSC] Perform check again when we found non-BMP characters
+    https://bugs.webkit.org/show_bug.cgi?id=201647
+    
+    Reviewed by Yusuke Suzuki.
+    
+    JSTests:
+    
+    * stress/regexp-unicode-surrogate-pair-increment-should-involve-length-check.js: Added.
+    * stress/regexp-unicode-within-string.js: Updated test to eliminate the bogus print().
+    (testRegExpInbounds):
+    
+    Source/_javascript_Core:
+    
+    We need to check for end of input for non-BMP characters when matching a character class that contains
+    both BMP and non-BMP characters.  In advanceIndexAfterCharacterClassTermMatch() we were checking for
+    end of input for both BMP and non-BMP characters.  For BMP characters, this check is redundant.
+    After moving the check to after the "is BMP check", we need to decrement index after reaching the failure
+    label to back out the index++ for the first surrogate of the non-BMP character.
+    
+    Added the same kind of check in generateCharacterClassOnce().  In that case, we have pre-checked the
+    first character (surrogate) for a non-BMP codepoint, so we just need to check for end of input before
+    we increment for the second surrogate.
+    
+    While writing tests, I found an off by one error in backtrackCharacterClassGreedy() and changed the
+    loop to check the count at loop top instead of loop bottom.
+    
+    * yarr/YarrJIT.cpp:
+    (JSC::Yarr::YarrGenerator::advanceIndexAfterCharacterClassTermMatch):
+    (JSC::Yarr::YarrGenerator::generateCharacterClassOnce):
+    (JSC::Yarr::YarrGenerator::generateCharacterClassGreedy):
+    (JSC::Yarr::YarrGenerator::backtrackCharacterClassGreedy):
+    (JSC::Yarr::YarrGenerator::backtrackCharacterClassNonGreedy):
+    
+    
+    git-svn-id: https://svn.webkit.org/repository/webkit/trunk@249926 268f45cc-cd09-0410-ab3c-d52691b4dbfc
+
+    2019-09-16  Michael Saboff  <[email protected]>
+
+            [JSC] Perform check again when we found non-BMP characters
+            https://bugs.webkit.org/show_bug.cgi?id=201647
+
+            Reviewed by Yusuke Suzuki.
+
+            We need to check for end of input for non-BMP characters when matching a character class that contains
+            both BMP and non-BMP characters.  In advanceIndexAfterCharacterClassTermMatch() we were checking for
+            end of input for both BMP and non-BMP characters.  For BMP characters, this check is redundant.
+            After moving the check to after the "is BMP check", we need to decrement index after reaching the failure
+            label to back out the index++ for the first surrogate of the non-BMP character.
+
+            Added the same kind of check in generateCharacterClassOnce().  In that case, we have pre-checked the
+            first character (surrogate) for a non-BMP codepoint, so we just need to check for end of input before
+            we increment for the second surrogate.
+
+            While writing tests, I found an off by one error in backtrackCharacterClassGreedy() and changed the
+            loop to check the count at loop top instead of loop bottom.
+
+            * yarr/YarrJIT.cpp:
+            (JSC::Yarr::YarrGenerator::advanceIndexAfterCharacterClassTermMatch):
+            (JSC::Yarr::YarrGenerator::generateCharacterClassOnce):
+            (JSC::Yarr::YarrGenerator::generateCharacterClassGreedy):
+            (JSC::Yarr::YarrGenerator::backtrackCharacterClassGreedy):
+            (JSC::Yarr::YarrGenerator::backtrackCharacterClassNonGreedy):
+
+2019-09-30  Babak Shafiei  <[email protected]>
+
         Cherry-pick r249777. rdar://problem/55826876
 
     JSC crashes due to stack overflow while building RegExp

Modified: branches/safari-608-branch/Source/_javascript_Core/yarr/YarrJIT.cpp (250507 => 250508)


--- branches/safari-608-branch/Source/_javascript_Core/yarr/YarrJIT.cpp	2019-09-30 08:30:45 UTC (rev 250507)
+++ branches/safari-608-branch/Source/_javascript_Core/yarr/YarrJIT.cpp	2019-09-30 08:30:48 UTC (rev 250508)
@@ -453,16 +453,16 @@
     }
 
 #ifdef JIT_UNICODE_EXPRESSIONS
-    void advanceIndexAfterCharacterClassTermMatch(const PatternTerm* term, JumpList& failures, const RegisterID character)
+    void advanceIndexAfterCharacterClassTermMatch(const PatternTerm* term, JumpList& failuresAfterIncrementingIndex, const RegisterID character)
     {
         ASSERT(term->type == PatternTerm::TypeCharacterClass);
 
-        if (term->characterClass->hasOneCharacterSize() && !term->invert())
+        if (term->isFixedWidthCharacterClass())
             add32(TrustedImm32(term->characterClass->hasNonBMPCharacters() ? 2 : 1), index);
         else {
             add32(TrustedImm32(1), index);
-            failures.append(atEndOfInput());
             Jump isBMPChar = branch32(LessThan, character, supplementaryPlanesBase);
+            failuresAfterIncrementingIndex.append(atEndOfInput());
             add32(TrustedImm32(1), index);
             isBMPChar.link(this);
         }
@@ -1757,6 +1757,7 @@
 #ifdef JIT_UNICODE_EXPRESSIONS
         if (m_decodeSurrogatePairs && (!term->characterClass->hasOneCharacterSize() || term->invert())) {
             Jump isBMPChar = branch32(LessThan, character, supplementaryPlanesBase);
+            op.m_jumps.append(atEndOfInput());
             add32(TrustedImm32(1), index);
             isBMPChar.link(this);
         }
@@ -1816,7 +1817,7 @@
 
 #ifdef JIT_UNICODE_EXPRESSIONS
         if (m_decodeSurrogatePairs) {
-            if (term->characterClass->hasOneCharacterSize() && !term->invert())
+            if (term->isFixedWidthCharacterClass())
                 add32(TrustedImm32(term->characterClass->hasNonBMPCharacters() ? 2 : 1), countRegister);
             else {
                 add32(TrustedImm32(1), countRegister);
@@ -1849,9 +1850,10 @@
         move(TrustedImm32(0), countRegister);
 
         JumpList failures;
+        JumpList failuresDecrementIndex;
         Label loop(this);
 #ifdef JIT_UNICODE_EXPRESSIONS
-        if (term->characterClass->hasOneCharacterSize() && !term->invert() && term->characterClass->hasNonBMPCharacters()) {
+        if (term->isFixedWidthCharacterClass() && term->characterClass->hasNonBMPCharacters()) {
             move(TrustedImm32(1), character);
             failures.append(checkNotEnoughInput(character));
         } else
@@ -1875,7 +1877,7 @@
 
 #ifdef JIT_UNICODE_EXPRESSIONS
         if (m_decodeSurrogatePairs)
-            advanceIndexAfterCharacterClassTermMatch(term, failures, character);
+            advanceIndexAfterCharacterClassTermMatch(term, failuresDecrementIndex, character);
         else
 #endif
             add32(TrustedImm32(1), index);
@@ -1887,6 +1889,11 @@
         } else
             jump(loop);
 
+        if (!failuresDecrementIndex.empty()) {
+            failuresDecrementIndex.link(this);
+            sub32(TrustedImm32(1), index);
+        }
+
         failures.link(this);
         op.m_reentry = label();
 
@@ -1908,7 +1915,7 @@
 
         if (!m_decodeSurrogatePairs)
             sub32(TrustedImm32(1), index);
-        else if (term->characterClass->hasOneCharacterSize() && !term->invert())
+        else if (term->isFixedWidthCharacterClass())
             sub32(TrustedImm32(term->characterClass->hasNonBMPCharacters() ? 2 : 1), index);
         else {
             // Rematch one less
@@ -1917,6 +1924,8 @@
             loadFromFrame(term->frameLocation + BackTrackInfoCharacterClass::beginIndex(), index);
 
             Label rematchLoop(this);
+            Jump doneRematching = branchTest32(Zero, countRegister);
+
             readCharacter(m_checkedOffset - term->inputPosition, character);
 
             sub32(TrustedImm32(1), countRegister);
@@ -1928,7 +1937,8 @@
             isBMPChar.link(this);
 #endif
 
-            branchTest32(Zero, countRegister).linkTo(rematchLoop, this);
+            jump(rematchLoop);
+            doneRematching.link(this);
 
             loadFromFrame(term->frameLocation + BackTrackInfoCharacterClass::matchAmountIndex(), countRegister);
         }
@@ -1964,6 +1974,7 @@
         const RegisterID countRegister = regT1;
 
         JumpList nonGreedyFailures;
+        JumpList nonGreedyFailuresDecrementIndex;
 
         m_backtrackingState.link(this);
 
@@ -1996,7 +2007,7 @@
 
 #ifdef JIT_UNICODE_EXPRESSIONS
         if (m_decodeSurrogatePairs)
-            advanceIndexAfterCharacterClassTermMatch(term, nonGreedyFailures, character);
+            advanceIndexAfterCharacterClassTermMatch(term, nonGreedyFailuresDecrementIndex, character);
         else
 #endif
             add32(TrustedImm32(1), index);
@@ -2004,6 +2015,10 @@
 
         jump(op.m_reentry);
 
+        if (!nonGreedyFailuresDecrementIndex.empty()) {
+            nonGreedyFailuresDecrementIndex.link(this);
+            breakpoint();
+        }
         nonGreedyFailures.link(this);
         sub32(countRegister, index);
         m_backtrackingState.fallthrough();

Modified: branches/safari-608-branch/Source/_javascript_Core/yarr/YarrPattern.h (250507 => 250508)


--- branches/safari-608-branch/Source/_javascript_Core/yarr/YarrPattern.h	2019-09-30 08:30:45 UTC (rev 250507)
+++ branches/safari-608-branch/Source/_javascript_Core/yarr/YarrPattern.h	2019-09-30 08:30:48 UTC (rev 250508)
@@ -258,6 +258,11 @@
         return m_capture;
     }
 
+    bool isFixedWidthCharacterClass() const
+    {
+        return type == TypeCharacterClass && characterClass->hasOneCharacterSize() && !invert();
+    }
+
     bool containsAnyCaptures()
     {
         ASSERT(this->type == TypeParenthesesSubpattern);
_______________________________________________
webkit-changes mailing list
[email protected]
https://lists.webkit.org/mailman/listinfo/webkit-changes

Reply via email to