Author: vgritsenko
Date: Mon Mar 12 20:02:04 2007
New Revision: 517503
URL: http://svn.apache.org/viewvc?view=rev&rev=517503
Log:
Reimplemented */+/? closures to use STAR/PLUS/MAYBE operations.
Fixed bug #9153.
Modified:
jakarta/regexp/trunk/docs/RETest.txt
jakarta/regexp/trunk/docs/changes.html
jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java
jakarta/regexp/trunk/src/java/org/apache/regexp/RECompiler.java
jakarta/regexp/trunk/src/java/org/apache/regexp/REDebugCompiler.java
jakarta/regexp/trunk/src/java/org/apache/regexp/RETest.java
jakarta/regexp/trunk/xdocs/RETest.txt
jakarta/regexp/trunk/xdocs/changes.xml
Modified: jakarta/regexp/trunk/docs/RETest.txt
URL:
http://svn.apache.org/viewvc/jakarta/regexp/trunk/docs/RETest.txt?view=diff&rev=517503&r1=517502&r2=517503
==============================================================================
--- jakarta/regexp/trunk/docs/RETest.txt (original)
+++ jakarta/regexp/trunk/docs/RETest.txt Mon Mar 12 20:02:04 2007
@@ -1566,3 +1566,8 @@
YES
1
aaabcca
+
+#231
+^a{1,35}$
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab
+NO
Modified: jakarta/regexp/trunk/docs/changes.html
URL:
http://svn.apache.org/viewvc/jakarta/regexp/trunk/docs/changes.html?view=diff&rev=517503&r1=517502&r2=517503
==============================================================================
--- jakarta/regexp/trunk/docs/changes.html (original)
+++ jakarta/regexp/trunk/docs/changes.html Mon Mar 12 20:02:04 2007
@@ -92,6 +92,9 @@
<h3>Version 1.5-dev</h3>
<ul>
<li>Fixed Bug
+ <a href="http://issues.apache.org/bugzilla/show_bug.cgi?id=9153">9153</a>:
+ {m,n} implementation had exponential complexity (VG)</li>
+<li>Fixed Bug
<a
href="http://issues.apache.org/bugzilla/show_bug.cgi?id=27763">27763</a>:
RE incorrectly processed reluctant matchers (VG)</li>
<li>Fixed Bug
Modified: jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java
URL:
http://svn.apache.org/viewvc/jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java?view=diff&rev=517503&r1=517502&r2=517503
==============================================================================
--- jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java (original)
+++ jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java Mon Mar 12 20:02:04
2007
@@ -817,6 +817,46 @@
switch (opcode)
{
+ case OP_MAYBE:
+ {
+ // Try to match the following subexpr.
+ // If it succeeds, it will continue matching by itself
without returning here.
+ if ((idxNew = matchNodes(node + nodeSize, maxNode,
idx)) != -1)
+ {
+ return idxNew;
+ }
+
+ // If failed, just continue with the rest of expression
+ break;
+ }
+
+ case OP_PLUS:
+ {
+ // Try to match the following subexpr again (and again
(and ...
+ if ((idxNew = matchNodes(next, maxNode, idx)) != -1)
+ {
+ return idxNew;
+ }
+
+ // If failed, just continue with the rest of expression
+ // Rest is located at the next pointer of the next
instruction
+ // which must be OP_CONTINUE.
+ node = next + instruction[next + offsetNext];
+ continue;
+ }
+
+ case OP_STAR:
+ {
+ // Try to match the following subexpr (and again (and
again (and ...
+ if ((idxNew = matchNodes(node + nodeSize, maxNode,
idx)) != -1)
+ {
+ return idxNew;
+ }
+
+ // If failed, just continue with the rest of expression
+ break;
+ }
+
case OP_RELUCTANTMAYBE:
{
// Try to match the rest without using the reluctant
subexpr
Modified: jakarta/regexp/trunk/src/java/org/apache/regexp/RECompiler.java
URL:
http://svn.apache.org/viewvc/jakarta/regexp/trunk/src/java/org/apache/regexp/RECompiler.java?view=diff&rev=517503&r1=517502&r2=517503
==============================================================================
--- jakarta/regexp/trunk/src/java/org/apache/regexp/RECompiler.java (original)
+++ jakarta/regexp/trunk/src/java/org/apache/regexp/RECompiler.java Mon Mar 12
20:02:04 2007
@@ -56,13 +56,9 @@
static final int ESC_CLASS = 0xffffd; // Escape represents a
whole class of characters
// {m,n} stacks
- int maxBrackets = 10; // Maximum number of
bracket pairs
static final int bracketUnbounded = -1; // Unbounded value
- int brackets = 0; // Number of bracket
sets
- int[] bracketStart = null; // Starting point
- int[] bracketEnd = null; // Ending point
- int[] bracketMin = null; // Minimum number of
matches
- int[] bracketOpt = null; // Additional optional
matches
+ int bracketMin; // Minimum number of
matches
+ int bracketOpt; // Additional optional
matches
// Lookup table for POSIX character class names
static final Hashtable hashPOSIX = new Hashtable();
@@ -236,59 +232,10 @@
}
/**
- * Allocate storage for brackets only as needed
- */
- void allocBrackets()
- {
- // Allocate bracket stacks if not already done
- if (bracketStart == null)
- {
- // Allocate storage
- bracketStart = new int[maxBrackets];
- bracketEnd = new int[maxBrackets];
- bracketMin = new int[maxBrackets];
- bracketOpt = new int[maxBrackets];
-
- // Initialize to invalid values
- for (int i = 0; i < maxBrackets; i++)
- {
- bracketStart[i] = bracketEnd[i] = bracketMin[i] =
bracketOpt[i] = -1;
- }
- }
- }
-
- /** Enlarge storage for brackets only as needed. */
- synchronized void reallocBrackets() {
- // trick the tricky
- if (bracketStart == null) {
- allocBrackets();
- }
-
- int new_size = maxBrackets * 2;
- int[] new_bS = new int[new_size];
- int[] new_bE = new int[new_size];
- int[] new_bM = new int[new_size];
- int[] new_bO = new int[new_size];
- // Initialize to invalid values
- for (int i=brackets; i<new_size; i++) {
- new_bS[i] = new_bE[i] = new_bM[i] = new_bO[i] = -1;
- }
- System.arraycopy(bracketStart,0, new_bS,0, brackets);
- System.arraycopy(bracketEnd,0, new_bE,0, brackets);
- System.arraycopy(bracketMin,0, new_bM,0, brackets);
- System.arraycopy(bracketOpt,0, new_bO,0, brackets);
- bracketStart = new_bS;
- bracketEnd = new_bE;
- bracketMin = new_bM;
- bracketOpt = new_bO;
- maxBrackets = new_size;
- }
-
- /**
* Match bracket {m,n} expression put results in bracket member variables
* @exception RESyntaxException Thrown if the regular expression has
invalid syntax.
*/
- void bracket(int bracket) throws RESyntaxException
+ void bracket() throws RESyntaxException
{
// Current character must be a '{'
if (idx >= len || pattern.charAt(idx++) != '{')
@@ -310,7 +257,7 @@
}
try
{
- bracketMin[bracket] = Integer.parseInt(number.toString());
+ bracketMin = Integer.parseInt(number.toString());
}
catch (NumberFormatException e)
{
@@ -327,7 +274,7 @@
if (pattern.charAt(idx) == '}')
{
idx++;
- bracketOpt[bracket] = 0;
+ bracketOpt = 0;
return;
}
@@ -347,7 +294,7 @@
if (pattern.charAt(idx) == '}')
{
idx++;
- bracketOpt[bracket] = bracketUnbounded;
+ bracketOpt = bracketUnbounded;
return;
}
@@ -365,7 +312,7 @@
}
try
{
- bracketOpt[bracket] = Integer.parseInt(number.toString()) -
bracketMin[bracket];
+ bracketOpt = Integer.parseInt(number.toString()) - bracketMin;
}
catch (NumberFormatException e)
{
@@ -373,7 +320,7 @@
}
// Optional repetitions must be >= 0
- if (bracketOpt[bracket] < 0)
+ if (bracketOpt < 0)
{
syntaxError("Bad range");
}
@@ -1018,132 +965,92 @@
{
case '{':
{
- // We look for our bracket in the list
- boolean found = false;
- int i;
- allocBrackets();
- for (i = 0; i < brackets; i++)
- {
- if (bracketStart[i] == idx)
- {
- // Re-initialize bracketMin, bracketOpt since they
are all used up by now
- if (bracketMin[i] == -1) {
- bracket(i);
- }
- found = true;
- break;
- }
- }
+ bracket();
+ int bracketEnd = idx;
+ int bracketMin = this.bracketMin;
+ int bracketOpt = this.bracketOpt;
- // If its not in the list we parse the {m,n}
- if (!found)
- {
- if (brackets >= maxBrackets)
- {
- reallocBrackets();
- }
- bracketStart[brackets] = idx;
- bracket(brackets);
- bracketEnd[brackets] = idx;
- i = brackets++;
- }
+ // Pointer to the last terminal
+ int pos = ret;
// Process min first
- if (bracketMin[i] > 0)
+ for (int c = 0; c < bracketMin; c++)
{
- if (--bracketMin[i] > 0 || bracketOpt[i] != 0) {
- // Rewind stream and run it through again - more
matchers coming
- for (int j = 0; j < i; j++) {
- bracketMin[j] = -1;
- }
- idx = idxBeforeTerminal;
- } else {
- // Bug #1030: No optional matches - no need to
rewind
- idx = bracketEnd[i];
- }
- break;
+ // Rewind stream and run it through again - more
matchers coming
+ idx = idxBeforeTerminal;
+ setNextOfEnd(pos, pos = terminal(terminalFlags));
}
// Do the right thing for maximum ({m,})
- if (bracketOpt[i] == bracketUnbounded)
+ if (bracketOpt == bracketUnbounded)
{
// Drop through now and closure expression.
// We are done with the {m,} expr, so skip rest
- closureType = '*';
- bracketOpt[i] = 0;
- idx = bracketEnd[i];
+ idx = bracketEnd;
+ nodeInsert(RE.OP_STAR, 0, pos);
+ setNextOfEnd(pos + RE.nodeSize, pos);
+ break;
}
- else
- if (bracketOpt[i]-- > 0)
+ else if (bracketOpt > 0)
+ {
+ int opt[] = new int[bracketOpt + 1];
+ // Surround first optional terminal with MAYBE
+ nodeInsert(RE.OP_MAYBE, 0, pos);
+ opt[0] = pos;
+
+ // Add all the rest optional terminals with preceeding
MAYBEs
+ for (int c = 1; c < bracketOpt; c++)
{
- if (bracketOpt[i] > 0) {
- // More optional matchers - 'play it again
sam!'
- for (int j = 0; j < i; j++) {
- bracketMin[j] = -1;
- }
- idx = idxBeforeTerminal;
- } else {
- // Bug #1030: We are done - this one is last
and optional
- idx = bracketEnd[i];
- }
- // Drop through to optionally close
- closureType = '?';
+ opt[c] = node(RE.OP_MAYBE, 0);
+ // Rewind stream and run it through again - more
matchers coming
+ idx = idxBeforeTerminal;
+ terminal(terminalFlags);
}
- else
- {
- // Rollback terminal - neither min nor opt
matchers present
- lenInstruction = ret;
- node(RE.OP_NOTHING, 0);
- // We are done. skip the rest of {m,n} expr
- idx = bracketEnd[i];
- break;
+ // Tie ends together
+ int end = opt[bracketOpt] = node(RE.OP_NOTHING, 0);
+ for (int c = 0; c < bracketOpt; c++)
+ {
+ setNextOfEnd(opt[c], end);
+ setNextOfEnd(opt[c] + RE.nodeSize, opt[c + 1]);
}
- }
-
- // Fall through!
-
- case '?':
- case '*':
-
- if (!greedy)
- {
- break;
}
-
- if (closureType == '?')
+ else
{
- // X? is compiled as (X|)
- nodeInsert(RE.OP_BRANCH, 0, ret); //
branch before X
- setNextOfEnd(ret, node (RE.OP_BRANCH, 0)); //
inserted branch to option
- int nothing = node (RE.OP_NOTHING, 0); //
which is OP_NOTHING
- setNextOfEnd(ret, nothing); //
point (second) branch to OP_NOTHING
- setNextOfEnd(ret + RE.nodeSize, nothing); //
point the end of X to OP_NOTHING node
+ // Rollback terminal - no opt matchers present
+ lenInstruction = pos;
+ node(RE.OP_NOTHING, 0);
}
- if (closureType == '*')
- {
- // X* is compiled as (X{gotoX}|)
- nodeInsert(RE.OP_BRANCH, 0, ret);
// branch before X
- setNextOfEnd(ret + RE.nodeSize, node(RE.OP_BRANCH,
0)); // end of X points to an option
- setNextOfEnd(ret + RE.nodeSize, node(RE.OP_GOTO, 0));
// to goto
- setNextOfEnd(ret + RE.nodeSize, ret);
// the start again
- setNextOfEnd(ret, node(RE.OP_BRANCH, 0));
// the other option is
- setNextOfEnd(ret, node(RE.OP_NOTHING, 0));
// OP_NOTHING
- }
+ // We are done. skip the reminder of {m,n} expr
+ idx = bracketEnd;
break;
+ }
+
+ case '?':
+ {
+ nodeInsert(RE.OP_MAYBE, 0, ret);
+ int n = node(RE.OP_NOTHING, 0);
+ setNextOfEnd(ret, n);
+ setNextOfEnd(ret + RE.nodeSize, n);
+ break;
+ }
+
+ case '*':
+ {
+ nodeInsert(RE.OP_STAR, 0, ret);
+ setNextOfEnd(ret + RE.nodeSize, ret);
+ break;
+ }
case '+':
{
- // X+ is compiled as X({gotoX}|)
- int branch;
- branch = node(RE.OP_BRANCH, 0); // a new
branch
- setNextOfEnd(ret, branch); // is
added to the end of X
- setNextOfEnd(node(RE.OP_GOTO, 0), ret); // one
option is to go back to the start
- setNextOfEnd(branch, node(RE.OP_BRANCH, 0)); // the
other option
- setNextOfEnd(ret, node(RE.OP_NOTHING, 0)); // is
OP_NOTHING
+ nodeInsert(RE.OP_CONTINUE, 0, ret);
+ int n = node(RE.OP_PLUS, 0);
+ setNextOfEnd(ret + RE.nodeSize, n);
+ setNextOfEnd(n, ret);
+ break;
}
- break;
}
}
else
@@ -1177,6 +1084,7 @@
}
}
}
+
return ret;
}
@@ -1309,16 +1217,16 @@
// Hook the ends of each branch to the end node
int currentNode = ret;
- int nextNodeOffset = instruction[ currentNode + RE.offsetNext ];
+ int nextNodeOffset = instruction[currentNode + RE.offsetNext];
// while the next node o
- while ( nextNodeOffset != 0 && currentNode < lenInstruction )
+ while (nextNodeOffset != 0 && currentNode < lenInstruction)
{
// If branch, make the end of the branch's operand chain point to
the end node.
- if ( instruction[ currentNode + RE.offsetOpcode ] == RE.OP_BRANCH )
+ if (instruction[currentNode /* + RE.offsetOpcode */] ==
RE.OP_BRANCH)
{
- setNextOfEnd( currentNode + RE.nodeSize, end );
+ setNextOfEnd(currentNode + RE.nodeSize, end);
}
- nextNodeOffset = instruction[ currentNode + RE.offsetNext ];
+ nextNodeOffset = instruction[currentNode + RE.offsetNext];
currentNode += nextNodeOffset;
}
@@ -1344,7 +1252,6 @@
idx = 0; // Set parsing index
to the first character
lenInstruction = 0; // Set emitted
instruction count to zero
parens = 1; // Set paren level to
1 (the implicit outer parens)
- brackets = 0; // No bracketed
closures yet
// Initialize pass by reference flags value
int[] flags = { NODE_TOPLEVEL };
Modified: jakarta/regexp/trunk/src/java/org/apache/regexp/REDebugCompiler.java
URL:
http://svn.apache.org/viewvc/jakarta/regexp/trunk/src/java/org/apache/regexp/REDebugCompiler.java?view=diff&rev=517503&r1=517502&r2=517503
==============================================================================
--- jakarta/regexp/trunk/src/java/org/apache/regexp/REDebugCompiler.java
(original)
+++ jakarta/regexp/trunk/src/java/org/apache/regexp/REDebugCompiler.java Mon
Mar 12 20:02:04 2007
@@ -49,6 +49,7 @@
hashOpcode.put(new Integer(RE.OP_MAYBE), "OP_MAYBE");
hashOpcode.put(new Integer(RE.OP_NOTHING), "OP_NOTHING");
hashOpcode.put(new Integer(RE.OP_GOTO), "OP_GOTO");
+ hashOpcode.put(new Integer(RE.OP_CONTINUE), "OP_CONTINUE");
hashOpcode.put(new Integer(RE.OP_ESCAPE), "OP_ESCAPE");
hashOpcode.put(new Integer(RE.OP_OPEN), "OP_OPEN");
hashOpcode.put(new Integer(RE.OP_CLOSE), "OP_CLOSE");
Modified: jakarta/regexp/trunk/src/java/org/apache/regexp/RETest.java
URL:
http://svn.apache.org/viewvc/jakarta/regexp/trunk/src/java/org/apache/regexp/RETest.java?view=diff&rev=517503&r1=517502&r2=517503
==============================================================================
--- jakarta/regexp/trunk/src/java/org/apache/regexp/RETest.java (original)
+++ jakarta/regexp/trunk/src/java/org/apache/regexp/RETest.java Mon Mar 12
20:02:04 2007
@@ -17,17 +17,16 @@
package org.apache.regexp;
import java.io.BufferedReader;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
import java.io.FileReader;
+import java.io.IOException;
import java.io.InputStreamReader;
-import java.io.PrintWriter;
-import java.io.File;
-import java.io.ByteArrayOutputStream;
-import java.io.ObjectOutputStream;
-import java.io.ByteArrayInputStream;
import java.io.ObjectInputStream;
-import java.io.StringBufferInputStream;
+import java.io.ObjectOutputStream;
+import java.io.PrintWriter;
import java.io.StringReader;
-import java.io.IOException;
/**
* Data driven (and optionally interactive) testing harness to exercise regular
@@ -760,7 +759,7 @@
return;
log.append(" Match using StreamCharacterIterator\n");
- if (!tryMatchUsingCI(new StreamCharacterIterator(new
StringBufferInputStream(toMatch))))
+ if (!tryMatchUsingCI(new StreamCharacterIterator(new
ByteArrayInputStream(toMatch.getBytes()))))
return;
log.append(" Match using ReaderCharacterIterator\n");
Modified: jakarta/regexp/trunk/xdocs/RETest.txt
URL:
http://svn.apache.org/viewvc/jakarta/regexp/trunk/xdocs/RETest.txt?view=diff&rev=517503&r1=517502&r2=517503
==============================================================================
--- jakarta/regexp/trunk/xdocs/RETest.txt (original)
+++ jakarta/regexp/trunk/xdocs/RETest.txt Mon Mar 12 20:02:04 2007
@@ -1566,3 +1566,8 @@
YES
1
aaabcca
+
+#231
+^a{1,30}$
+aaaaaaaaaaaaaaaaaaaaaaaaaaaab
+NO
Modified: jakarta/regexp/trunk/xdocs/changes.xml
URL:
http://svn.apache.org/viewvc/jakarta/regexp/trunk/xdocs/changes.xml?view=diff&rev=517503&r1=517502&r2=517503
==============================================================================
--- jakarta/regexp/trunk/xdocs/changes.xml (original)
+++ jakarta/regexp/trunk/xdocs/changes.xml Mon Mar 12 20:02:04 2007
@@ -35,6 +35,9 @@
<h3>Version 1.5-dev</h3>
<ul>
<li>Fixed Bug
+ <a href="http://issues.apache.org/bugzilla/show_bug.cgi?id=9153">9153</a>:
+ {m,n} implementation had exponential complexity (VG)</li>
+<li>Fixed Bug
<a
href="http://issues.apache.org/bugzilla/show_bug.cgi?id=27763">27763</a>:
RE incorrectly processed reluctant matchers (VG)</li>
<li>Fixed Bug
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]