Author: vgritsenko
Date: Tue Mar 13 17:22:27 2007
New Revision: 517946
URL: http://svn.apache.org/viewvc?view=rev&rev=517946
Log:
optimize inner loop
Modified:
jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java
jakarta/regexp/trunk/src/java/org/apache/regexp/RETest.java
Modified: jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java
URL:
http://svn.apache.org/viewvc/jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java?view=diff&rev=517946&r1=517945&r2=517946
==============================================================================
--- jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java (original)
+++ jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java Tue Mar 13 17:22:27
2007
@@ -818,9 +818,11 @@
switch (opcode)
{
case OP_MAYBE:
+ case OP_STAR:
{
- // Try to match the following subexpr.
- // If it succeeds, it will continue matching by itself
without returning here.
+ // Try to match the following subexpr. If it matches:
+ // MAYBE: Continues matching rest of the expression
+ // STAR: Points back here to repeat subexpr
matching
if ((idxNew = matchNodes(node + nodeSize, maxNode,
idx)) != -1)
{
return idxNew;
@@ -832,7 +834,7 @@
case OP_PLUS:
{
- // Try to match the following subexpr again (and again
(and ...
+ // Try to match the subexpr again (and again (and ...
if ((idxNew = matchNodes(next, maxNode, idx)) != -1)
{
return idxNew;
@@ -840,24 +842,13 @@
// If failed, just continue with the rest of expression
// Rest is located at the next pointer of the next
instruction
- // which must be OP_CONTINUE.
- node = next + instruction[next + offsetNext];
+ // (which must be OP_CONTINUE)
+ node = next + (short) instruction[next + offsetNext];
continue;
}
- case OP_STAR:
- {
- // Try to match the following subexpr (and again (and
again (and ...
- if ((idxNew = matchNodes(node + nodeSize, maxNode,
idx)) != -1)
- {
- return idxNew;
- }
-
- // If failed, just continue with the rest of expression
- break;
- }
-
case OP_RELUCTANTMAYBE:
+ case OP_RELUCTANTSTAR:
{
// Try to match the rest without using the reluctant
subexpr
if ((idxNew = matchNodes(next, maxNode, idx)) != -1)
@@ -865,14 +856,16 @@
return idxNew;
}
- // Try reluctant subexpr, which continues to the rest
of the expression
+ // Try reluctant subexpr. If it matches:
+ // RELUCTANTMAYBE: Continues matching rest of the
expression
+ // RELUCTANTSTAR: Points back here to repeat
reluctant star matching
return matchNodes(node + nodeSize, next, idx);
}
case OP_RELUCTANTPLUS:
{
// Continue matching the rest without using the
reluctant subexpr
- if ((idxNew = matchNodes(next + instruction[next +
offsetNext], maxNode, idx)) != -1)
+ if ((idxNew = matchNodes(next + (short)
instruction[next + offsetNext], maxNode, idx)) != -1)
{
return idxNew;
}
@@ -881,17 +874,6 @@
break;
}
- case OP_RELUCTANTSTAR:
- {
- // Try to match the rest without using the reluctant
subexpr
- if ((idxNew = matchNodes(next, maxNode, idx)) != -1) {
- return idxNew;
- }
-
- // Try reluctant subexpr
- return matchNodes(node + nodeSize, next, idx);
- }
-
case OP_OPEN:
// Match subexpression
@@ -902,7 +884,7 @@
if ((idxNew = matchNodes(next, maxNode, idx)) != -1)
{
// Increase valid paren count
- if ((opdata + 1) > parenCount)
+ if (opdata >= parenCount)
{
parenCount = opdata + 1;
}
@@ -925,7 +907,7 @@
if ((idxNew = matchNodes(next, maxNode, idx)) != -1)
{
// Increase valid paren count
- if ((opdata + 1) > parenCount)
+ if (opdata >= parenCount)
{
parenCount = opdata + 1;
}
@@ -938,11 +920,6 @@
}
return idxNew;
- case OP_OPEN_CLUSTER:
- case OP_CLOSE_CLUSTER:
- // starting or ending the matching of a subexpression
which has no backref.
- return matchNodes(next, maxNode, idx);
-
case OP_BACKREF:
{
// Get the start and end of the backref
@@ -992,10 +969,9 @@
// If we're multiline matching, we could still be at
the start of a line
if ((matchFlags & MATCH_MULTILINE) == MATCH_MULTILINE)
{
- // If not at start of line, give up
- if (idx <= 0 || !isNewline(idx - 1)) {
- return -1;
- } else {
+ // Continue if at the start of a line
+ if (isNewline(idx - 1))
+ {
break;
}
}
@@ -1011,10 +987,9 @@
// If we're multi-line matching
if ((matchFlags & MATCH_MULTILINE) == MATCH_MULTILINE)
{
- // Give up if we're not at the end of a line
- if (!isNewline(idx)) {
- return -1;
- } else {
+ // Continue if we're at the end of a line
+ if (isNewline(idx))
+ {
break;
}
}
@@ -1321,6 +1296,7 @@
case OP_BRANCH:
{
// Check for choices
+ // FIXME Dead code - only reason to keep is backward
compat with pre-compiled exprs. Remove?
if (instruction[next /* + offsetOpcode */] != OP_BRANCH)
{
// If there aren't any other choices, just evaluate
this branch.
@@ -1347,6 +1323,10 @@
// Failed to match any branch!
return -1;
}
+
+ case OP_OPEN_CLUSTER:
+ case OP_CLOSE_CLUSTER:
+ // starting or ending the matching of a subexpression
which has no backref.
case OP_NOTHING:
case OP_GOTO:
Modified: jakarta/regexp/trunk/src/java/org/apache/regexp/RETest.java
URL:
http://svn.apache.org/viewvc/jakarta/regexp/trunk/src/java/org/apache/regexp/RETest.java?view=diff&rev=517946&r1=517945&r2=517946
==============================================================================
--- jakarta/regexp/trunk/src/java/org/apache/regexp/RETest.java (original)
+++ jakarta/regexp/trunk/src/java/org/apache/regexp/RETest.java Tue Mar 13
17:22:27 2007
@@ -423,8 +423,10 @@
// Bug 38331: Large program
try {
- new RE("a{8192}");
- fail("a{8192} should fail to compile.");
+ REDebugCompiler c = new REDebugCompiler();
+ c.compile("(a{8192})?");
+ fail("(a{8192})? should fail to compile.");
+ c.dumpProgram();
} catch (RESyntaxException e) {
// expected
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]