Title: [288995] trunk/Source/_javascript_Core
Revision
288995
Author
[email protected]
Date
2022-02-02 13:53:20 -0800 (Wed, 02 Feb 2022)

Log Message

[JSC] YarrJIT m_checkedOffset should be pre-computed and stored to Yarr op
https://bugs.webkit.org/show_bug.cgi?id=235932

Reviewed by Keith Miller.

Instead of adhocly calculating m_checkedOffset while generating the machine code,
let's just pre-compute it and store it to Yarr JIT IR so that we can simplify
m_checkedOffset handling. This paves the way to also pre-compute safe-length
so that we can skip some of length check in parenthesized-assertions.

* yarr/YarrJIT.cpp:

Modified Paths

Diff

Modified: trunk/Source/_javascript_Core/ChangeLog (288994 => 288995)


--- trunk/Source/_javascript_Core/ChangeLog	2022-02-02 21:41:06 UTC (rev 288994)
+++ trunk/Source/_javascript_Core/ChangeLog	2022-02-02 21:53:20 UTC (rev 288995)
@@ -1,3 +1,17 @@
+2022-02-02  Yusuke Suzuki  <[email protected]>
+
+        [JSC] YarrJIT m_checkedOffset should be pre-computed and stored to Yarr op
+        https://bugs.webkit.org/show_bug.cgi?id=235932
+
+        Reviewed by Keith Miller.
+
+        Instead of adhocly calculating m_checkedOffset while generating the machine code,
+        let's just pre-compute it and store it to Yarr JIT IR so that we can simplify
+        m_checkedOffset handling. This paves the way to also pre-compute safe-length
+        so that we can skip some of length check in parenthesized-assertions.
+
+        * yarr/YarrJIT.cpp:
+
 2022-02-02  Patrick Griffis  <[email protected]>
 
         CSP: Fix returned WebAssembly error type when blocked

Modified: trunk/Source/_javascript_Core/yarr/YarrJIT.cpp (288994 => 288995)


--- trunk/Source/_javascript_Core/yarr/YarrJIT.cpp	2022-02-02 21:41:06 UTC (rev 288994)
+++ trunk/Source/_javascript_Core/yarr/YarrJIT.cpp	2022-02-02 21:53:20 UTC (rev 288995)
@@ -888,6 +888,14 @@
         // recalculating it.
         Checked<unsigned> m_checkAdjust;
 
+        // This records the current input offset being applied due to the current
+        // set of alternatives we are nested within. E.g. when matching the
+        // character 'b' within the regular _expression_ /abc/, we will know that
+        // the minimum size for the alternative is 3, checked upon entry to the
+        // alternative, and that 'b' is at offset 1 from the start, and as such
+        // when matching 'b' we need to apply an offset of -2 to the load.
+        Checked<unsigned> m_checkedOffset { };
+
         // Used by YarrOpCode::NestedAlternativeNext/End to hold the pointer to the
         // value that will be pushed into the pattern's frame to return to,
         // upon backtracking back into the disjunction.
@@ -1052,9 +1060,9 @@
 
             MacroAssembler::JumpList matchDest;
             if (!term->inputPosition)
-                matchDest.append(m_jit.branch32(MacroAssembler::Equal, m_regs.index, MacroAssembler::Imm32(m_checkedOffset)));
+                matchDest.append(m_jit.branch32(MacroAssembler::Equal, m_regs.index, MacroAssembler::Imm32(op.m_checkedOffset)));
 
-            readCharacter(m_checkedOffset - term->inputPosition + 1, character);
+            readCharacter(op.m_checkedOffset - term->inputPosition + 1, character);
             matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
             op.m_jumps.append(m_jit.jump());
 
@@ -1064,7 +1072,7 @@
             if (term->inputPosition)
                 op.m_jumps.append(m_jit.jump());
             else
-                op.m_jumps.append(m_jit.branch32(MacroAssembler::NotEqual, m_regs.index, MacroAssembler::Imm32(m_checkedOffset)));
+                op.m_jumps.append(m_jit.branch32(MacroAssembler::NotEqual, m_regs.index, MacroAssembler::Imm32(op.m_checkedOffset)));
         }
     }
     void backtrackAssertionBOL(size_t opIndex)
@@ -1081,16 +1089,16 @@
             const MacroAssembler::RegisterID character = m_regs.regT0;
 
             MacroAssembler::JumpList matchDest;
-            if (term->inputPosition == m_checkedOffset)
+            if (term->inputPosition == op.m_checkedOffset)
                 matchDest.append(atEndOfInput());
 
-            readCharacter(m_checkedOffset - term->inputPosition, character);
+            readCharacter(op.m_checkedOffset - term->inputPosition, character);
             matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
             op.m_jumps.append(m_jit.jump());
 
             matchDest.link(&m_jit);
         } else {
-            if (term->inputPosition == m_checkedOffset)
+            if (term->inputPosition == op.m_checkedOffset)
                 op.m_jumps.append(notAtEndOfInput());
             // Erk, really should poison out these alternatives early. :-/
             else
@@ -1110,10 +1118,10 @@
 
         const MacroAssembler::RegisterID character = m_regs.regT0;
 
-        if (term->inputPosition == m_checkedOffset)
+        if (term->inputPosition == op.m_checkedOffset)
             nextIsNotWordChar.append(atEndOfInput());
 
-        readCharacter(m_checkedOffset - term->inputPosition, character);
+        readCharacter(op.m_checkedOffset - term->inputPosition, character);
 
         CharacterClass* wordcharCharacterClass;
 
@@ -1135,8 +1143,8 @@
         MacroAssembler::Jump atBegin;
         MacroAssembler::JumpList matchDest;
         if (!term->inputPosition)
-            atBegin = m_jit.branch32(MacroAssembler::Equal, m_regs.index, MacroAssembler::Imm32(m_checkedOffset));
-        readCharacter(m_checkedOffset - term->inputPosition + 1, character);
+            atBegin = m_jit.branch32(MacroAssembler::Equal, m_regs.index, MacroAssembler::Imm32(op.m_checkedOffset));
+        readCharacter(op.m_checkedOffset - term->inputPosition + 1, character);
 
         CharacterClass* wordcharCharacterClass;
 
@@ -1193,7 +1201,7 @@
         MacroAssembler::Label loop(&m_jit);
 
         readCharacter(0, patternCharacter, patternIndex);
-        readCharacter(m_checkedOffset - term->inputPosition, character);
+        readCharacter(op.m_checkedOffset - term->inputPosition, character);
     
         if (!m_pattern.ignoreCase())
             characterMatchFails.append(m_jit.branch32(MacroAssembler::NotEqual, character, patternCharacter));
@@ -1525,40 +1533,40 @@
             switch (numberCharacters) {
             case 1:
                 // Use 32bit width of allCharacters since Yarr counts surrogate pairs as one character with unicode flag.
-                check1(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff);
+                check1(op.m_checkedOffset - startTermPosition, allCharacters & 0xffffffff);
                 return;
             case 2: {
-                check2(m_checkedOffset - startTermPosition, allCharacters & 0xffff, ignoreCaseMask & 0xffff);
+                check2(op.m_checkedOffset - startTermPosition, allCharacters & 0xffff, ignoreCaseMask & 0xffff);
                 return;
             }
             case 3: {
-                check2(m_checkedOffset - startTermPosition, allCharacters & 0xffff, ignoreCaseMask & 0xffff);
-                check1(m_checkedOffset - startTermPosition - 2, (allCharacters >> 16) & 0xff);
+                check2(op.m_checkedOffset - startTermPosition, allCharacters & 0xffff, ignoreCaseMask & 0xffff);
+                check1(op.m_checkedOffset - startTermPosition - 2, (allCharacters >> 16) & 0xff);
                 return;
             }
             case 4: {
-                check4(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
+                check4(op.m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
                 return;
             }
 #if CPU(X86_64) || CPU(ARM64) || CPU(RISCV64)
             case 5: {
-                check4(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
-                check1(m_checkedOffset - startTermPosition - 4, (allCharacters >> 32) & 0xff);
+                check4(op.m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
+                check1(op.m_checkedOffset - startTermPosition - 4, (allCharacters >> 32) & 0xff);
                 return;
             }
             case 6: {
-                check4(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
-                check2(m_checkedOffset - startTermPosition - 4, (allCharacters >> 32) & 0xffff, (ignoreCaseMask >> 32) & 0xffff);
+                check4(op.m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
+                check2(op.m_checkedOffset - startTermPosition - 4, (allCharacters >> 32) & 0xffff, (ignoreCaseMask >> 32) & 0xffff);
                 return;
             }
             case 7: {
-                check4(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
-                check2(m_checkedOffset - startTermPosition - 4, (allCharacters >> 32) & 0xffff, (ignoreCaseMask >> 32) & 0xffff);
-                check1(m_checkedOffset - startTermPosition - 6, (allCharacters >> 48) & 0xff);
+                check4(op.m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
+                check2(op.m_checkedOffset - startTermPosition - 4, (allCharacters >> 32) & 0xffff, (ignoreCaseMask >> 32) & 0xffff);
+                check1(op.m_checkedOffset - startTermPosition - 6, (allCharacters >> 48) & 0xff);
                 return;
             }
             case 8: {
-                check8(m_checkedOffset - startTermPosition, allCharacters, ignoreCaseMask);
+                check8(op.m_checkedOffset - startTermPosition, allCharacters, ignoreCaseMask);
                 return;
             }
 #endif
@@ -1591,18 +1599,18 @@
             switch (numberCharacters) {
             case 1:
                 // Use 32bit width of allCharacters since Yarr counts surrogate pairs as one character with unicode flag.
-                check1(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff);
+                check1(op.m_checkedOffset - startTermPosition, allCharacters & 0xffffffff);
                 return;
             case 2:
-                check2(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
+                check2(op.m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
                 return;
 #if CPU(X86_64) || CPU(ARM64) || CPU(RISCV64)
             case 3:
-                check2(m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
-                check1(m_checkedOffset - startTermPosition - 2, (allCharacters >> 32) & 0xffff);
+                check2(op.m_checkedOffset - startTermPosition, allCharacters & 0xffffffff, ignoreCaseMask & 0xffffffff);
+                check1(op.m_checkedOffset - startTermPosition - 2, (allCharacters >> 32) & 0xffff);
                 return;
             case 4:
-                check4(m_checkedOffset - startTermPosition, allCharacters, ignoreCaseMask);
+                check4(op.m_checkedOffset - startTermPosition, allCharacters, ignoreCaseMask);
                 return;
 #endif
             }
@@ -1630,7 +1638,7 @@
         m_jit.sub32(m_regs.index, MacroAssembler::Imm32(scaledMaxCount), countRegister);
 
         MacroAssembler::Label loop(&m_jit);
-        readCharacter(m_checkedOffset - term->inputPosition - scaledMaxCount, character, countRegister);
+        readCharacter(op.m_checkedOffset - term->inputPosition - scaledMaxCount, character, countRegister);
         // For case-insesitive compares, non-ascii characters that have different
         // upper & lower case representations are converted to a character class.
         ASSERT(!m_pattern.ignoreCase() || isASCIIAlpha(ch) || isCanonicallyUnique(ch, m_canonicalMode));
@@ -1669,7 +1677,7 @@
             MacroAssembler::JumpList failures;
             MacroAssembler::Label loop(&m_jit);
             failures.append(atEndOfInput());
-            failures.append(jumpIfCharNotEquals(ch, m_checkedOffset - term->inputPosition, character));
+            failures.append(jumpIfCharNotEquals(ch, op.m_checkedOffset - term->inputPosition, character));
 
             m_jit.add32(MacroAssembler::TrustedImm32(1), m_regs.index);
 #if ENABLE(YARR_JIT_UNICODE_EXPRESSIONS)
@@ -1743,7 +1751,7 @@
             nonGreedyFailures.append(atEndOfInput());
             if (term->quantityMaxCount != quantifyInfinite)
                 nonGreedyFailures.append(m_jit.branch32(MacroAssembler::Equal, countRegister, MacroAssembler::Imm32(term->quantityMaxCount)));
-            nonGreedyFailures.append(jumpIfCharNotEquals(ch, m_checkedOffset - term->inputPosition, character));
+            nonGreedyFailures.append(jumpIfCharNotEquals(ch, op.m_checkedOffset - term->inputPosition, character));
 
             m_jit.add32(MacroAssembler::TrustedImm32(1), m_regs.index);
 #if ENABLE(YARR_JIT_UNICODE_EXPRESSIONS)
@@ -1783,7 +1791,7 @@
         }
 
         MacroAssembler::JumpList matchDest;
-        readCharacter(m_checkedOffset - term->inputPosition, character);
+        readCharacter(op.m_checkedOffset - term->inputPosition, character);
         // If we are matching the "any character" builtin class we only need to read the
         // character and don't need to match as it will always succeed.
         if (term->invert() || !term->characterClass->m_anyCharacter) {
@@ -1840,7 +1848,7 @@
 
         MacroAssembler::Label loop(&m_jit);
         MacroAssembler::JumpList matchDest;
-        readCharacter(m_checkedOffset - term->inputPosition - scaledMaxCount, character, countRegister);
+        readCharacter(op.m_checkedOffset - term->inputPosition - scaledMaxCount, character, countRegister);
         // If we are matching the "any character" builtin class we only need to read the
         // character and don't need to match as it will always succeed.
         if (term->invert() || !term->characterClass->m_anyCharacter) {
@@ -1900,11 +1908,11 @@
             failures.append(atEndOfInput());
 
         if (term->invert()) {
-            readCharacter(m_checkedOffset - term->inputPosition, character);
+            readCharacter(op.m_checkedOffset - term->inputPosition, character);
             matchCharacterClass(character, failures, term->characterClass);
         } else {
             MacroAssembler::JumpList matchDest;
-            readCharacter(m_checkedOffset - term->inputPosition, character);
+            readCharacter(op.m_checkedOffset - term->inputPosition, character);
             // If we are matching the "any character" builtin class for non-unicode patterns,
             // we only need to read the character and don't need to match as it will always succeed.
             if (!term->characterClass->m_anyCharacter) {
@@ -1965,7 +1973,7 @@
             MacroAssembler::Label rematchLoop(&m_jit);
             MacroAssembler::Jump doneRematching = m_jit.branchTest32(MacroAssembler::Zero, countRegister);
 
-            readCharacter(m_checkedOffset - term->inputPosition, character);
+            readCharacter(op.m_checkedOffset - term->inputPosition, character);
 
             m_jit.sub32(MacroAssembler::TrustedImm32(1), countRegister);
             m_jit.add32(MacroAssembler::TrustedImm32(1), m_regs.index);
@@ -2030,7 +2038,7 @@
         nonGreedyFailures.append(m_jit.branch32(MacroAssembler::Equal, countRegister, MacroAssembler::Imm32(term->quantityMaxCount)));
 
         MacroAssembler::JumpList matchDest;
-        readCharacter(m_checkedOffset - term->inputPosition, character);
+        readCharacter(op.m_checkedOffset - term->inputPosition, character);
         // If we are matching the "any character" builtin class for non-unicode patterns,
         // we only need to read the character and don't need to match as it will always succeed.
         if (term->invert() || !term->characterClass->m_anyCharacter) {
@@ -2325,7 +2333,6 @@
                 // Upon entry at the head of the set of alternatives, check if input is available
                 // to run the first alternative. (This progresses the input position).
                 op.m_jumps.append(jumpIfNoAvailableInput(alternative->m_minimumSize));
-                m_checkedOffset += alternative->m_minimumSize;
 
                 // We will reenter after the check, and assume the input position to have been
                 // set as appropriate to this alternative.
@@ -2351,7 +2358,7 @@
                             dataLogLnIf(Options::verboseRegExpCompilation(), "Found characters fastpath lookahead ", charactersFastPath, " range:[", beginIndex, ", ", endIndex, ")");
 
                             auto loopHead = m_jit.label();
-                            readCharacter(m_checkedOffset - endIndex + 1, m_regs.regT0);
+                            readCharacter(op.m_checkedOffset - endIndex + 1, m_regs.regT0);
                             matched.append(m_jit.branch32(MacroAssembler::Equal, m_regs.regT0, MacroAssembler::TrustedImm32(charactersFastPath.at(0))));
                             if (charactersFastPath.size() > 1)
                                 matched.append(m_jit.branch32(MacroAssembler::Equal, m_regs.regT0, MacroAssembler::TrustedImm32(charactersFastPath.at(1))));
@@ -2362,7 +2369,7 @@
 
                             m_jit.move(MacroAssembler::TrustedImmPtr(pointer), m_regs.regT1);
                             auto loopHead = m_jit.label();
-                            readCharacter(m_checkedOffset - endIndex + 1, m_regs.regT0);
+                            readCharacter(op.m_checkedOffset - endIndex + 1, m_regs.regT0);
 #if CPU(ARM64) || CPU(RISCV64)
                             static_assert(sizeof(BoyerMooreBitmap::Map::WordType) == sizeof(uint64_t));
                             static_assert(1 << 6 == 64);
@@ -2471,10 +2478,6 @@
                     op.m_reentry = m_jit.label();
                     m_jit.sub32(MacroAssembler::Imm32(priorAlternative->m_minimumSize), m_regs.index);
                 }
-
-                if (op.m_op == YarrOpCode::BodyAlternativeNext)
-                    m_checkedOffset += alternative->m_minimumSize;
-                m_checkedOffset -= priorAlternative->m_minimumSize;
                 break;
             }
 
@@ -2506,8 +2509,6 @@
                     op.m_checkAdjust -= disjunction->m_minimumSize;
                 if (op.m_checkAdjust)
                     op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust));
-
-                m_checkedOffset += op.m_checkAdjust;
                 break;
             }
             case YarrOpCode::SimpleNestedAlternativeNext:
@@ -2553,10 +2554,6 @@
                     op.m_checkAdjust -= disjunction->m_minimumSize;
                 if (op.m_checkAdjust)
                     op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust));
-
-                YarrOp& lastOp = m_ops[op.m_previousOp];
-                m_checkedOffset -= lastOp.m_checkAdjust;
-                m_checkedOffset += op.m_checkAdjust;
                 break;
             }
             case YarrOpCode::SimpleNestedAlternativeEnd:
@@ -2580,9 +2577,6 @@
                 // them to this node's m_jumps list.
                 op.m_jumps.link(&m_jit);
                 op.m_jumps.clear();
-
-                YarrOp& lastOp = m_ops[op.m_previousOp];
-                m_checkedOffset -= lastOp.m_checkAdjust;
                 break;
             }
 
@@ -2626,7 +2620,7 @@
                 // offsets only afterwards, at the point the results array is
                 // being accessed.
                 if (term->capture() && m_compileMode == JITCompileMode::IncludeSubpatterns) {
-                    unsigned inputOffset = m_checkedOffset - term->inputPosition;
+                    unsigned inputOffset = op.m_checkedOffset - term->inputPosition;
                     if (term->quantityType == QuantifierType::FixedCount)
                         inputOffset += term->parentheses.disjunction->m_minimumSize;
                     if (inputOffset) {
@@ -2655,7 +2649,7 @@
                 // offsets only afterwards, at the point the results array is
                 // being accessed.
                 if (term->capture() && m_compileMode == JITCompileMode::IncludeSubpatterns) {
-                    unsigned inputOffset = m_checkedOffset - term->inputPosition;
+                    unsigned inputOffset = op.m_checkedOffset - term->inputPosition;
                     if (inputOffset) {
                         m_jit.sub32(m_regs.index, MacroAssembler::Imm32(inputOffset), indexTemporary);
                         setSubpatternEnd(indexTemporary, term->parentheses.subpatternId);
@@ -2761,7 +2755,7 @@
                 // being accessed.
                 if (term->capture() && m_compileMode == JITCompileMode::IncludeSubpatterns) {
                     const MacroAssembler::RegisterID indexTemporary = m_regs.regT0;
-                    unsigned inputOffset = m_checkedOffset - term->inputPosition;
+                    unsigned inputOffset = op.m_checkedOffset - term->inputPosition;
                     if (term->quantityType == QuantifierType::FixedCount)
                         inputOffset += term->parentheses.disjunction->m_minimumSize;
                     if (inputOffset) {
@@ -2802,7 +2796,7 @@
                 if (term->capture() && m_compileMode == JITCompileMode::IncludeSubpatterns) {
                     const MacroAssembler::RegisterID indexTemporary = m_regs.regT0;
                     
-                    unsigned inputOffset = m_checkedOffset - term->inputPosition;
+                    unsigned inputOffset = op.m_checkedOffset - term->inputPosition;
                     if (inputOffset) {
                         m_jit.sub32(m_regs.index, MacroAssembler::Imm32(inputOffset), indexTemporary);
                         setSubpatternEnd(indexTemporary, term->parentheses.subpatternId);
@@ -2840,12 +2834,8 @@
                 unsigned parenthesesFrameLocation = term->frameLocation;
                 storeToFrame(m_regs.index, parenthesesFrameLocation + BackTrackInfoParentheticalAssertion::beginIndex());
 
-                // Check 
-                op.m_checkAdjust = m_checkedOffset - term->inputPosition;
                 if (op.m_checkAdjust)
                     m_jit.sub32(MacroAssembler::Imm32(op.m_checkAdjust), m_regs.index);
-
-                m_checkedOffset -= op.m_checkAdjust;
                 break;
             }
             case YarrOpCode::ParentheticalAssertionEnd: {
@@ -2861,9 +2851,6 @@
                     op.m_jumps.append(m_jit.jump());
                     op.m_reentry = m_jit.label();
                 }
-
-                YarrOp& lastOp = m_ops[op.m_previousOp];
-                m_checkedOffset += lastOp.m_checkAdjust;
                 break;
             }
 
@@ -2923,12 +2910,6 @@
             case YarrOpCode::BodyAlternativeNext: {
                 PatternAlternative* alternative = op.m_alternative;
 
-                m_checkedOffset -= alternative->m_minimumSize;
-                if (op.m_op == YarrOpCode::BodyAlternativeNext) {
-                    PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
-                    m_checkedOffset += priorAlternative->m_minimumSize;
-                }
-
                 // Is this the last alternative? If not, then if we backtrack to this point we just
                 // need to jump to try to match the next alternative.
                 if (m_ops[op.m_nextOp].m_op != YarrOpCode::BodyAlternativeEnd) {
@@ -3127,9 +3108,6 @@
             case YarrOpCode::BodyAlternativeEnd: {
                 // We should never backtrack back into a body disjunction.
                 ASSERT(m_backtrackingState.isEmpty());
-
-                PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
-                m_checkedOffset += priorAlternative->m_minimumSize;
                 break;
             }
 
@@ -3160,7 +3138,7 @@
                 // Set the backtracks to jump to the appropriate place. We may need
                 // to link the backtracks in one of three different way depending on
                 // the type of alternative we are dealing with:
-                //  - A single alternative, with no simplings.
+                //  - A single alternative, with no siblings.
                 //  - The last alternative of a set of two or more.
                 //  - An alternative other than the last of a set of two or more.
                 //
@@ -3227,12 +3205,6 @@
                     ASSERT(endOp->m_op == YarrOpCode::SimpleNestedAlternativeEnd || endOp->m_op == YarrOpCode::NestedAlternativeEnd);
                     m_backtrackingState.append(endOp->m_jumps);
                 }
-
-                m_checkedOffset -= op.m_checkAdjust;
-                if (!isBegin) {
-                    YarrOp& lastOp = m_ops[op.m_previousOp];
-                    m_checkedOffset += lastOp.m_checkAdjust;
-                }
                 break;
             }
             case YarrOpCode::SimpleNestedAlternativeEnd:
@@ -3258,9 +3230,6 @@
                     // alternative to this point.
                     m_backtrackingState.append(op.m_returnAddress);
                 }
-
-                YarrOp& lastOp = m_ops[op.m_previousOp];
-                m_checkedOffset += lastOp.m_checkAdjust;
                 break;
             }
 
@@ -3509,8 +3478,6 @@
                 // the end of the assertion. Also, if inverted, we will have
                 // added the failure caused by a successful match to this.
                 m_backtrackingState.append(endOp.m_jumps);
-
-                m_checkedOffset += op.m_checkAdjust;
                 break;
             }
             case YarrOpCode::ParentheticalAssertionEnd: {
@@ -3520,9 +3487,6 @@
 
                 // Never backtrack into an assertion; later failures bail to before the begin.
                 m_backtrackingState.takeBacktracksToJumpList(op.m_jumps, &m_jit);
-
-                YarrOp& lastOp = m_ops[op.m_previousOp];
-                m_checkedOffset -= lastOp.m_checkAdjust;
                 break;
             }
 
@@ -3529,7 +3493,6 @@
             case YarrOpCode::MatchFailed:
                 break;
             }
-
         } while (opIndex);
     }
 
@@ -3546,7 +3509,7 @@
     // Alternatives will use the 'Simple' set of ops if either the
     // subpattern is terminal (in which case we will never need to
     // backtrack), or if the subpattern only contains one alternative.
-    void opCompileParenthesesSubpattern(PatternTerm* term)
+    void opCompileParenthesesSubpattern(Checked<unsigned> checkedOffset, PatternTerm* term)
     {
         YarrOpCode parenthesesBeginOpCode;
         YarrOpCode parenthesesEndOpCode;
@@ -3620,12 +3583,22 @@
         m_ops.append(alternativeBeginOpCode);
         m_ops.last().m_previousOp = notFound;
         m_ops.last().m_term = term;
-        Vector<std::unique_ptr<PatternAlternative>>& alternatives = term->parentheses.disjunction->m_alternatives;
+        PatternDisjunction* disjunction = term->parentheses.disjunction;
+        auto& alternatives = disjunction->m_alternatives;
+        auto originalCheckedOffset = checkedOffset;
         for (unsigned i = 0; i < alternatives.size(); ++i) {
             size_t lastOpIndex = m_ops.size() - 1;
 
             PatternAlternative* nestedAlternative = alternatives[i].get();
-            opCompileAlternative(nestedAlternative);
+            {
+                // Calculate how much input we need to check for, and if non-zero check.
+                YarrOp& lastOp = m_ops[lastOpIndex];
+                lastOp.m_checkAdjust = nestedAlternative->m_minimumSize;
+                if ((term->quantityType == QuantifierType::FixedCount) && (term->type != PatternTerm::Type::ParentheticalAssertion))
+                    lastOp.m_checkAdjust -= disjunction->m_minimumSize;
+                lastOp.m_checkedOffset = checkedOffset + lastOp.m_checkAdjust;
+            }
+            opCompileAlternative(m_ops[lastOpIndex].m_checkedOffset, nestedAlternative);
 
             size_t thisOpIndex = m_ops.size();
             m_ops.append(YarrOp(alternativeNextOpCode));
@@ -3643,6 +3616,7 @@
         lastOp.m_op = alternativeEndOpCode;
         lastOp.m_alternative = nullptr;
         lastOp.m_nextOp = notFound;
+        lastOp.m_checkedOffset = checkedOffset;
 
         size_t parenEnd = m_ops.size();
         m_ops.append(parenthesesEndOpCode);
@@ -3650,9 +3624,11 @@
         m_ops[parenBegin].m_term = term;
         m_ops[parenBegin].m_previousOp = notFound;
         m_ops[parenBegin].m_nextOp = parenEnd;
+        m_ops[parenBegin].m_checkedOffset = checkedOffset;
         m_ops[parenEnd].m_term = term;
         m_ops[parenEnd].m_previousOp = parenBegin;
         m_ops[parenEnd].m_nextOp = notFound;
+        m_ops[parenEnd].m_checkedOffset = checkedOffset;
     }
 
     // opCompileParentheticalAssertion
@@ -3663,7 +3639,7 @@
     // We can always use the OpSimpleNestedAlternative nodes in the
     // case of parenthetical assertions since these only ever match
     // once, and will never backtrack back into the assertion.
-    void opCompileParentheticalAssertion(PatternTerm* term)
+    void opCompileParentheticalAssertion(Checked<unsigned> checkedOffset, PatternTerm* term)
     {
         if (UNLIKELY(!isSafeToRecurse())) {
             m_failureReason = JITFailureReason::ParenthesisNestedTooDeep;
@@ -3670,18 +3646,31 @@
             return;
         }
 
+        auto originalCheckedOffset = checkedOffset;
         size_t parenBegin = m_ops.size();
         m_ops.append(YarrOpCode::ParentheticalAssertionBegin);
+        m_ops.last().m_checkAdjust = checkedOffset - term->inputPosition;
+        checkedOffset -= m_ops.last().m_checkAdjust;
+        m_ops.last().m_checkedOffset = checkedOffset;
 
         m_ops.append(YarrOpCode::SimpleNestedAlternativeBegin);
         m_ops.last().m_previousOp = notFound;
         m_ops.last().m_term = term;
-        Vector<std::unique_ptr<PatternAlternative>>& alternatives =  term->parentheses.disjunction->m_alternatives;
+        PatternDisjunction* disjunction = term->parentheses.disjunction;
+        auto& alternatives = disjunction->m_alternatives;
         for (unsigned i = 0; i < alternatives.size(); ++i) {
             size_t lastOpIndex = m_ops.size() - 1;
 
             PatternAlternative* nestedAlternative = alternatives[i].get();
-            opCompileAlternative(nestedAlternative);
+            {
+                // Calculate how much input we need to check for, and if non-zero check.
+                YarrOp& lastOp = m_ops[lastOpIndex];
+                lastOp.m_checkAdjust = nestedAlternative->m_minimumSize;
+                if ((term->quantityType == QuantifierType::FixedCount) && (term->type != PatternTerm::Type::ParentheticalAssertion))
+                    lastOp.m_checkAdjust -= disjunction->m_minimumSize;
+                lastOp.m_checkedOffset = checkedOffset + lastOp.m_checkAdjust;
+            }
+            opCompileAlternative(m_ops[lastOpIndex].m_checkedOffset, nestedAlternative);
 
             size_t thisOpIndex = m_ops.size();
             m_ops.append(YarrOp(YarrOpCode::SimpleNestedAlternativeNext));
@@ -3699,6 +3688,7 @@
         lastOp.m_op = YarrOpCode::SimpleNestedAlternativeEnd;
         lastOp.m_alternative = nullptr;
         lastOp.m_nextOp = notFound;
+        lastOp.m_checkedOffset = checkedOffset;
 
         size_t parenEnd = m_ops.size();
         m_ops.append(YarrOpCode::ParentheticalAssertionEnd);
@@ -3709,11 +3699,12 @@
         m_ops[parenEnd].m_term = term;
         m_ops[parenEnd].m_previousOp = parenBegin;
         m_ops[parenEnd].m_nextOp = notFound;
+        m_ops[parenEnd].m_checkedOffset = originalCheckedOffset;
     }
 
     // opCompileAlternative
     // Called to emit nodes for all terms in an alternative.
-    void opCompileAlternative(PatternAlternative* alternative)
+    void opCompileAlternative(Checked<unsigned> checkedOffset, PatternAlternative* alternative)
     {
         optimizeAlternative(alternative);
 
@@ -3722,15 +3713,16 @@
 
             switch (term->type) {
             case PatternTerm::Type::ParenthesesSubpattern:
-                opCompileParenthesesSubpattern(term);
+                opCompileParenthesesSubpattern(checkedOffset, term);
                 break;
 
             case PatternTerm::Type::ParentheticalAssertion:
-                opCompileParentheticalAssertion(term);
+                opCompileParentheticalAssertion(checkedOffset, term);
                 break;
 
             default:
                 m_ops.append(term);
+                m_ops.last().m_checkedOffset = checkedOffset;
             }
         }
     }
@@ -3755,7 +3747,7 @@
             return;
         }
         
-        Vector<std::unique_ptr<PatternAlternative>>& alternatives = disjunction->m_alternatives;
+        auto& alternatives = disjunction->m_alternatives;
         size_t currentAlternativeIndex = 0;
 
         // Emit the 'once through' alternatives.
@@ -3765,8 +3757,10 @@
 
             do {
                 size_t lastOpIndex = m_ops.size() - 1;
+
                 PatternAlternative* alternative = alternatives[currentAlternativeIndex].get();
-                opCompileAlternative(alternative);
+                m_ops[lastOpIndex].m_checkedOffset = alternative->m_minimumSize;
+                opCompileAlternative(alternative->m_minimumSize, alternative);
 
                 size_t thisOpIndex = m_ops.size();
                 m_ops.append(YarrOp(YarrOpCode::BodyAlternativeNext));
@@ -3787,10 +3781,12 @@
             lastOp.m_op = YarrOpCode::BodyAlternativeEnd;
             lastOp.m_alternative = nullptr;
             lastOp.m_nextOp = notFound;
+            lastOp.m_checkedOffset = 0;
         }
 
         if (currentAlternativeIndex == alternatives.size()) {
             m_ops.append(YarrOp(YarrOpCode::MatchFailed));
+            m_ops.last().m_checkedOffset = 0;
             return;
         }
 
@@ -3815,9 +3811,11 @@
 
         do {
             size_t lastOpIndex = m_ops.size() - 1;
+
             PatternAlternative* alternative = alternatives[currentAlternativeIndex].get();
             ASSERT(!alternative->onceThrough());
-            opCompileAlternative(alternative);
+            m_ops[lastOpIndex].m_checkedOffset = alternative->m_minimumSize;
+            opCompileAlternative(alternative->m_minimumSize, alternative);
 
             size_t thisOpIndex = m_ops.size();
             m_ops.append(YarrOp(YarrOpCode::BodyAlternativeNext));
@@ -3836,6 +3834,7 @@
         lastOp.m_op = YarrOpCode::BodyAlternativeEnd;
         lastOp.m_alternative = nullptr;
         lastOp.m_nextOp = repeatLoop;
+        lastOp.m_checkedOffset = 0;
     }
 
     bool collectBoyerMooreInfo(PatternDisjunction* disjunction, size_t currentAlternativeIndex, BoyerMooreInfo& bmInfo)
@@ -4477,29 +4476,29 @@
             out.print("Term ");
             switch (term->type) {
             case PatternTerm::Type::AssertionBOL:
-                out.print("Assert BOL");
+                out.printf("Assert BOL checked-offset:(%u)", op.m_checkedOffset.value());
                 break;
 
             case PatternTerm::Type::AssertionEOL:
-                out.print("Assert EOL");
+                out.printf("Assert EOL checked-offset:(%u)", op.m_checkedOffset.value());
                 break;
 
             case PatternTerm::Type::BackReference:
-                out.printf("BackReference pattern #%u", term->backReferenceSubpatternId);
+                out.printf("BackReference pattern #%u checked-offset:(%u)", term->backReferenceSubpatternId, op.m_checkedOffset.value());
                 term->dumpQuantifier(out);
                 break;
 
             case PatternTerm::Type::PatternCharacter:
-                out.print("PatternCharacter ");
+                out.printf("PatternCharacter checked-offset:(%u) ", op.m_checkedOffset.value());
                 dumpUChar32(out, term->patternCharacter);
                 if (m_pattern.ignoreCase())
-                    out.print(" ignore case");
+                    out.print("ignore case ");
 
                 term->dumpQuantifier(out);
                 break;
 
             case PatternTerm::Type::CharacterClass:
-                out.print("PatternCharacterClass ");
+                out.printf("PatternCharacterClass checked-offset:(%u) ", op.m_checkedOffset.value());
                 if (term->invert())
                     out.print("not ");
                 dumpCharacterClass(out, &m_pattern, term->characterClass);
@@ -4507,15 +4506,15 @@
                 break;
 
             case PatternTerm::Type::AssertionWordBoundary:
-                out.printf("%sword boundary", term->invert() ? "non-" : "");
+                out.printf("%sword boundary checked-offset:(%u)", term->invert() ? "non-" : "", op.m_checkedOffset.value());
                 break;
 
             case PatternTerm::Type::DotStarEnclosure:
-                out.print(".* enclosure");
+                out.printf(".* enclosure checked-offset:(%u)", op.m_checkedOffset.value());
                 break;
 
             case PatternTerm::Type::ForwardReference:
-                out.print("ForwardReference <not handled>");
+                out.printf("ForwardReference <not handled> checked-offset:(%u)", op.m_checkedOffset.value());
                 break;
 
             case PatternTerm::Type::ParenthesesSubpattern:
@@ -4531,67 +4530,67 @@
         }
 
         case YarrOpCode::BodyAlternativeBegin:
-            out.printf("BodyAlternativeBegin minimum size %u\n", op.m_alternative->m_minimumSize);
+            out.printf("BodyAlternativeBegin minimum-size:(%u),checked-offset:(%u)\n", op.m_alternative->m_minimumSize, op.m_checkedOffset.value());
             return 0;
 
         case YarrOpCode::BodyAlternativeNext:
-            out.printf("BodyAlternativeNext minimum size %u\n", op.m_alternative->m_minimumSize);
+            out.printf("BodyAlternativeNext minimum-size:(%u),checked-offset:(%u)\n", op.m_alternative->m_minimumSize, op.m_checkedOffset.value());
             return 0;
 
         case YarrOpCode::BodyAlternativeEnd:
-            out.print("BodyAlternativeEnd\n");
+            out.printf("BodyAlternativeEnd checked-offset:(%u)\n", op.m_checkedOffset.value());
             return 0;
 
         case YarrOpCode::SimpleNestedAlternativeBegin:
-            out.printf("SimpleNestedAlternativeBegin minimum size %u\n", op.m_alternative->m_minimumSize);
+            out.printf("SimpleNestedAlternativeBegin minimum-size:(%u),checked-offset:(%u)\n", op.m_alternative->m_minimumSize, op.m_checkedOffset.value());
             return 1;
 
         case YarrOpCode::NestedAlternativeBegin:
-            out.printf("NestedAlternativeBegin minimum size %u\n", op.m_alternative->m_minimumSize);
+            out.printf("NestedAlternativeBegin minimum-size:(%u),checked-offset:(%u)\n", op.m_alternative->m_minimumSize, op.m_checkedOffset.value());
             return 1;
 
         case YarrOpCode::SimpleNestedAlternativeNext:
-            out.printf("SimpleNestedAlternativeNext minimum size %u\n", op.m_alternative->m_minimumSize);
+            out.printf("SimpleNestedAlternativeNext minimum-size:(%u),checked-offset:(%u)\n", op.m_alternative->m_minimumSize, op.m_checkedOffset.value());
             return 0;
 
         case YarrOpCode::NestedAlternativeNext:
-            out.printf("NestedAlternativeNext minimum size %u\n", op.m_alternative->m_minimumSize);
+            out.printf("NestedAlternativeNext minimum-size:(%u),checked-offset:(%u)\n", op.m_alternative->m_minimumSize, op.m_checkedOffset.value());
             return 0;
 
         case YarrOpCode::SimpleNestedAlternativeEnd:
-            out.print("SimpleNestedAlternativeEnd");
+            out.printf("SimpleNestedAlternativeEnd checked-offset:(%u) ", op.m_checkedOffset.value());
             term->dumpQuantifier(out);
             out.print("\n");
             return -1;
 
         case YarrOpCode::NestedAlternativeEnd:
-            out.print("NestedAlternativeEnd");
+            out.printf("NestedAlternativeEnd checked-offset:(%u) ", op.m_checkedOffset.value());
             term->dumpQuantifier(out);
             out.print("\n");
             return -1;
 
         case YarrOpCode::ParenthesesSubpatternOnceBegin:
-            out.print("ParenthesesSubpatternOnceBegin ");
+            out.printf("ParenthesesSubpatternOnceBegin checked-offset:(%u) ", op.m_checkedOffset.value());
             if (term->capture())
-                out.printf("capturing pattern #%u", term->parentheses.subpatternId);
+                out.printf("capturing pattern #%u ", term->parentheses.subpatternId);
             else
-                out.print("non-capturing");
+                out.print("non-capturing ");
             term->dumpQuantifier(out);
             out.print("\n");
             return 0;
 
         case YarrOpCode::ParenthesesSubpatternOnceEnd:
-            out.print("ParenthesesSubpatternOnceEnd ");
+            out.printf("ParenthesesSubpatternOnceEnd checked-offset:(%u) ", op.m_checkedOffset.value());
             if (term->capture())
-                out.printf("capturing pattern #%u", term->parentheses.subpatternId);
+                out.printf("capturing pattern #%u ", term->parentheses.subpatternId);
             else
-                out.print("non-capturing");
+                out.print("non-capturing ");
             term->dumpQuantifier(out);
             out.print("\n");
             return 0;
 
         case YarrOpCode::ParenthesesSubpatternTerminalBegin:
-            out.print("ParenthesesSubpatternTerminalBegin ");
+            out.printf("ParenthesesSubpatternTerminalBegin checked-offset:(%u) ", op.m_checkedOffset.value());
             if (term->capture())
                 out.printf("capturing pattern #%u\n", term->parentheses.subpatternId);
             else
@@ -4599,7 +4598,7 @@
             return 0;
 
         case YarrOpCode::ParenthesesSubpatternTerminalEnd:
-            out.print("ParenthesesSubpatternTerminalEnd ");
+            out.printf("ParenthesesSubpatternTerminalEnd checked-offset:(%u) ", op.m_checkedOffset.value());
             if (term->capture())
                 out.printf("capturing pattern #%u\n", term->parentheses.subpatternId);
             else
@@ -4607,7 +4606,7 @@
             return 0;
 
         case YarrOpCode::ParenthesesSubpatternBegin:
-            out.print("ParenthesesSubpatternBegin ");
+            out.printf("ParenthesesSubpatternBegin checked-offset:(%u) ", op.m_checkedOffset.value());
             if (term->capture())
                 out.printf("capturing pattern #%u", term->parentheses.subpatternId);
             else
@@ -4617,7 +4616,7 @@
             return 0;
 
         case YarrOpCode::ParenthesesSubpatternEnd:
-            out.print("ParenthesesSubpatternEnd ");
+            out.printf("ParenthesesSubpatternEnd checked-offset:(%u) ", op.m_checkedOffset.value());
             if (term->capture())
                 out.printf("capturing pattern #%u", term->parentheses.subpatternId);
             else
@@ -4627,15 +4626,15 @@
             return 0;
 
         case YarrOpCode::ParentheticalAssertionBegin:
-            out.printf("ParentheticalAssertionBegin%s\n", term->invert() ? " inverted" : "");
+            out.printf("ParentheticalAssertionBegin%s checked-offset:(%u)\n", term->invert() ? " inverted" : "", op.m_checkedOffset.value());
             return 0;
 
         case YarrOpCode::ParentheticalAssertionEnd:
-            out.printf("ParentheticalAssertionEnd%s\n", term->invert() ? " inverted" : "");
+            out.printf("ParentheticalAssertionEnd%s checked-offset:(%u)\n", term->invert() ? " inverted" : "", op.m_checkedOffset.value());
             return 0;
 
         case YarrOpCode::MatchFailed:
-            out.print("MatchFailed\n");
+            out.printf("MatchFailed checked-offset:(%u)\n", op.m_checkedOffset.value());
             return 0;
         }
 
@@ -4681,18 +4680,6 @@
     Vector<UniqueRef<BoyerMooreInfo>, 4> m_bmInfos;
     Vector<UniqueRef<BoyerMooreBitmap::Map>> m_bmMaps;
 
-    // This records the current input offset being applied due to the current
-    // set of alternatives we are nested within. E.g. when matching the
-    // character 'b' within the regular _expression_ /abc/, we will know that
-    // the minimum size for the alternative is 3, checked upon entry to the
-    // alternative, and that 'b' is at offset 1 from the start, and as such
-    // when matching 'b' we need to apply an offset of -2 to the load.
-    //
-    // FIXME: This should go away. Rather than tracking this value throughout
-    // code generation, we should gather this information up front & store it
-    // on the YarrOp structure.
-    Checked<unsigned> m_checkedOffset;
-
     // This class records state whilst generating the backtracking path of code.
     BacktrackingState m_backtrackingState;
     
_______________________________________________
webkit-changes mailing list
[email protected]
https://lists.webkit.org/mailman/listinfo/webkit-changes

Reply via email to