Author: vgritsenko Date: Tue Mar 13 17:22:27 2007 New Revision: 517946 URL: http://svn.apache.org/viewvc?view=rev&rev=517946 Log: optimize inner loop
Modified: jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java jakarta/regexp/trunk/src/java/org/apache/regexp/RETest.java Modified: jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java URL: http://svn.apache.org/viewvc/jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java?view=diff&rev=517946&r1=517945&r2=517946 ============================================================================== --- jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java (original) +++ jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java Tue Mar 13 17:22:27 2007 @@ -818,9 +818,11 @@ switch (opcode) { case OP_MAYBE: + case OP_STAR: { - // Try to match the following subexpr. - // If it succeeds, it will continue matching by itself without returning here. + // Try to match the following subexpr. If it matches: + // MAYBE: Continues matching rest of the expression + // STAR: Points back here to repeat subexpr matching if ((idxNew = matchNodes(node + nodeSize, maxNode, idx)) != -1) { return idxNew; @@ -832,7 +834,7 @@ case OP_PLUS: { - // Try to match the following subexpr again (and again (and ... + // Try to match the subexpr again (and again (and ... if ((idxNew = matchNodes(next, maxNode, idx)) != -1) { return idxNew; @@ -840,24 +842,13 @@ // If failed, just continue with the rest of expression // Rest is located at the next pointer of the next instruction - // which must be OP_CONTINUE. - node = next + instruction[next + offsetNext]; + // (which must be OP_CONTINUE) + node = next + (short) instruction[next + offsetNext]; continue; } - case OP_STAR: - { - // Try to match the following subexpr (and again (and again (and ... - if ((idxNew = matchNodes(node + nodeSize, maxNode, idx)) != -1) - { - return idxNew; - } - - // If failed, just continue with the rest of expression - break; - } - case OP_RELUCTANTMAYBE: + case OP_RELUCTANTSTAR: { // Try to match the rest without using the reluctant subexpr if ((idxNew = matchNodes(next, maxNode, idx)) != -1) @@ -865,14 +856,16 @@ return idxNew; } - // Try reluctant subexpr, which continues to the rest of the expression + // Try reluctant subexpr. If it matches: + // RELUCTANTMAYBE: Continues matching rest of the expression + // RELUCTANTSTAR: Points back here to repeat reluctant star matching return matchNodes(node + nodeSize, next, idx); } case OP_RELUCTANTPLUS: { // Continue matching the rest without using the reluctant subexpr - if ((idxNew = matchNodes(next + instruction[next + offsetNext], maxNode, idx)) != -1) + if ((idxNew = matchNodes(next + (short) instruction[next + offsetNext], maxNode, idx)) != -1) { return idxNew; } @@ -881,17 +874,6 @@ break; } - case OP_RELUCTANTSTAR: - { - // Try to match the rest without using the reluctant subexpr - if ((idxNew = matchNodes(next, maxNode, idx)) != -1) { - return idxNew; - } - - // Try reluctant subexpr - return matchNodes(node + nodeSize, next, idx); - } - case OP_OPEN: // Match subexpression @@ -902,7 +884,7 @@ if ((idxNew = matchNodes(next, maxNode, idx)) != -1) { // Increase valid paren count - if ((opdata + 1) > parenCount) + if (opdata >= parenCount) { parenCount = opdata + 1; } @@ -925,7 +907,7 @@ if ((idxNew = matchNodes(next, maxNode, idx)) != -1) { // Increase valid paren count - if ((opdata + 1) > parenCount) + if (opdata >= parenCount) { parenCount = opdata + 1; } @@ -938,11 +920,6 @@ } return idxNew; - case OP_OPEN_CLUSTER: - case OP_CLOSE_CLUSTER: - // starting or ending the matching of a subexpression which has no backref. - return matchNodes(next, maxNode, idx); - case OP_BACKREF: { // Get the start and end of the backref @@ -992,10 +969,9 @@ // If we're multiline matching, we could still be at the start of a line if ((matchFlags & MATCH_MULTILINE) == MATCH_MULTILINE) { - // If not at start of line, give up - if (idx <= 0 || !isNewline(idx - 1)) { - return -1; - } else { + // Continue if at the start of a line + if (isNewline(idx - 1)) + { break; } } @@ -1011,10 +987,9 @@ // If we're multi-line matching if ((matchFlags & MATCH_MULTILINE) == MATCH_MULTILINE) { - // Give up if we're not at the end of a line - if (!isNewline(idx)) { - return -1; - } else { + // Continue if we're at the end of a line + if (isNewline(idx)) + { break; } } @@ -1321,6 +1296,7 @@ case OP_BRANCH: { // Check for choices + // FIXME Dead code - only reason to keep is backward compat with pre-compiled exprs. Remove? if (instruction[next /* + offsetOpcode */] != OP_BRANCH) { // If there aren't any other choices, just evaluate this branch. @@ -1347,6 +1323,10 @@ // Failed to match any branch! return -1; } + + case OP_OPEN_CLUSTER: + case OP_CLOSE_CLUSTER: + // starting or ending the matching of a subexpression which has no backref. case OP_NOTHING: case OP_GOTO: Modified: jakarta/regexp/trunk/src/java/org/apache/regexp/RETest.java URL: http://svn.apache.org/viewvc/jakarta/regexp/trunk/src/java/org/apache/regexp/RETest.java?view=diff&rev=517946&r1=517945&r2=517946 ============================================================================== --- jakarta/regexp/trunk/src/java/org/apache/regexp/RETest.java (original) +++ jakarta/regexp/trunk/src/java/org/apache/regexp/RETest.java Tue Mar 13 17:22:27 2007 @@ -423,8 +423,10 @@ // Bug 38331: Large program try { - new RE("a{8192}"); - fail("a{8192} should fail to compile."); + REDebugCompiler c = new REDebugCompiler(); + c.compile("(a{8192})?"); + fail("(a{8192})? should fail to compile."); + c.dumpProgram(); } catch (RESyntaxException e) { // expected } --------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]