Author: vgritsenko
Date: Mon Mar 12 20:02:04 2007
New Revision: 517503

URL: http://svn.apache.org/viewvc?view=rev&rev=517503
Log:
Reimplemented */+/? closures to use STAR/PLUS/MAYBE operations.
Fixed bug #9153.

Modified:
    jakarta/regexp/trunk/docs/RETest.txt
    jakarta/regexp/trunk/docs/changes.html
    jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java
    jakarta/regexp/trunk/src/java/org/apache/regexp/RECompiler.java
    jakarta/regexp/trunk/src/java/org/apache/regexp/REDebugCompiler.java
    jakarta/regexp/trunk/src/java/org/apache/regexp/RETest.java
    jakarta/regexp/trunk/xdocs/RETest.txt
    jakarta/regexp/trunk/xdocs/changes.xml

Modified: jakarta/regexp/trunk/docs/RETest.txt
URL: 
http://svn.apache.org/viewvc/jakarta/regexp/trunk/docs/RETest.txt?view=diff&rev=517503&r1=517502&r2=517503
==============================================================================
--- jakarta/regexp/trunk/docs/RETest.txt (original)
+++ jakarta/regexp/trunk/docs/RETest.txt Mon Mar 12 20:02:04 2007
@@ -1566,3 +1566,8 @@
 YES
 1
 aaabcca
+
+#231
+^a{1,35}$
+aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab
+NO

Modified: jakarta/regexp/trunk/docs/changes.html
URL: 
http://svn.apache.org/viewvc/jakarta/regexp/trunk/docs/changes.html?view=diff&rev=517503&r1=517502&r2=517503
==============================================================================
--- jakarta/regexp/trunk/docs/changes.html (original)
+++ jakarta/regexp/trunk/docs/changes.html Mon Mar 12 20:02:04 2007
@@ -92,6 +92,9 @@
 <h3>Version 1.5-dev</h3>
 <ul>
 <li>Fixed Bug
+    <a href="http://issues.apache.org/bugzilla/show_bug.cgi?id=9153";>9153</a>:
+    {m,n} implementation had exponential complexity (VG)</li>
+<li>Fixed Bug
     <a 
href="http://issues.apache.org/bugzilla/show_bug.cgi?id=27763";>27763</a>:
     RE incorrectly processed reluctant matchers (VG)</li>
 <li>Fixed Bug

Modified: jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java
URL: 
http://svn.apache.org/viewvc/jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java?view=diff&rev=517503&r1=517502&r2=517503
==============================================================================
--- jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java (original)
+++ jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java Mon Mar 12 20:02:04 
2007
@@ -817,6 +817,46 @@
 
             switch (opcode)
             {
+                case OP_MAYBE:
+                    {
+                        // Try to match the following subexpr.
+                        // If it succeeds, it will continue matching by itself 
without returning here.
+                        if ((idxNew = matchNodes(node + nodeSize, maxNode, 
idx)) != -1)
+                        {
+                            return idxNew;
+                        }
+
+                        // If failed, just continue with the rest of expression
+                        break;
+                    }
+
+                case OP_PLUS:
+                    {
+                        // Try to match the following subexpr again (and again 
(and ...
+                        if ((idxNew = matchNodes(next, maxNode, idx)) != -1)
+                        {
+                            return idxNew;
+                        }
+
+                        // If failed, just continue with the rest of expression
+                        // Rest is located at the next pointer of the next 
instruction
+                        // which must be OP_CONTINUE.
+                        node = next + instruction[next + offsetNext];
+                        continue;
+                    }
+
+                case OP_STAR:
+                    {
+                        // Try to match the following subexpr (and again (and 
again (and ...
+                        if ((idxNew = matchNodes(node + nodeSize, maxNode, 
idx)) != -1)
+                        {
+                            return idxNew;
+                        }
+
+                        // If failed, just continue with the rest of expression
+                        break;
+                    }
+
                 case OP_RELUCTANTMAYBE:
                     {
                         // Try to match the rest without using the reluctant 
subexpr

Modified: jakarta/regexp/trunk/src/java/org/apache/regexp/RECompiler.java
URL: 
http://svn.apache.org/viewvc/jakarta/regexp/trunk/src/java/org/apache/regexp/RECompiler.java?view=diff&rev=517503&r1=517502&r2=517503
==============================================================================
--- jakarta/regexp/trunk/src/java/org/apache/regexp/RECompiler.java (original)
+++ jakarta/regexp/trunk/src/java/org/apache/regexp/RECompiler.java Mon Mar 12 
20:02:04 2007
@@ -56,13 +56,9 @@
     static final int ESC_CLASS     = 0xffffd;           // Escape represents a 
whole class of characters
 
     // {m,n} stacks
-    int maxBrackets = 10;                               // Maximum number of 
bracket pairs
     static final int bracketUnbounded = -1;             // Unbounded value
-    int brackets = 0;                                   // Number of bracket 
sets
-    int[] bracketStart = null;                          // Starting point
-    int[] bracketEnd = null;                            // Ending point
-    int[] bracketMin = null;                            // Minimum number of 
matches
-    int[] bracketOpt = null;                            // Additional optional 
matches
+    int bracketMin;                                     // Minimum number of 
matches
+    int bracketOpt;                                     // Additional optional 
matches
 
     // Lookup table for POSIX character class names
     static final Hashtable hashPOSIX = new Hashtable();
@@ -236,59 +232,10 @@
     }
 
     /**
-     * Allocate storage for brackets only as needed
-     */
-    void allocBrackets()
-    {
-        // Allocate bracket stacks if not already done
-        if (bracketStart == null)
-        {
-            // Allocate storage
-            bracketStart = new int[maxBrackets];
-            bracketEnd   = new int[maxBrackets];
-            bracketMin   = new int[maxBrackets];
-            bracketOpt   = new int[maxBrackets];
-
-            // Initialize to invalid values
-            for (int i = 0; i < maxBrackets; i++)
-            {
-                bracketStart[i] = bracketEnd[i] = bracketMin[i] = 
bracketOpt[i] = -1;
-            }
-        }
-    }
-
-    /** Enlarge storage for brackets only as needed. */
-    synchronized void reallocBrackets() {
-        // trick the tricky
-        if (bracketStart == null) {
-            allocBrackets();
-        }
-
-        int new_size = maxBrackets * 2;
-        int[] new_bS = new int[new_size];
-        int[] new_bE = new int[new_size];
-        int[] new_bM = new int[new_size];
-        int[] new_bO = new int[new_size];
-        // Initialize to invalid values
-        for (int i=brackets; i<new_size; i++) {
-            new_bS[i] = new_bE[i] = new_bM[i] = new_bO[i] = -1;
-        }
-        System.arraycopy(bracketStart,0, new_bS,0, brackets);
-        System.arraycopy(bracketEnd,0,   new_bE,0, brackets);
-        System.arraycopy(bracketMin,0,   new_bM,0, brackets);
-        System.arraycopy(bracketOpt,0,   new_bO,0, brackets);
-        bracketStart = new_bS;
-        bracketEnd   = new_bE;
-        bracketMin   = new_bM;
-        bracketOpt   = new_bO;
-        maxBrackets  = new_size;
-    }
-
-    /**
      * Match bracket {m,n} expression put results in bracket member variables
      * @exception RESyntaxException Thrown if the regular expression has 
invalid syntax.
      */
-    void bracket(int bracket) throws RESyntaxException
+    void bracket() throws RESyntaxException
     {
         // Current character must be a '{'
         if (idx >= len || pattern.charAt(idx++) != '{')
@@ -310,7 +257,7 @@
         }
         try
         {
-            bracketMin[bracket] = Integer.parseInt(number.toString());
+            bracketMin = Integer.parseInt(number.toString());
         }
         catch (NumberFormatException e)
         {
@@ -327,7 +274,7 @@
         if (pattern.charAt(idx) == '}')
         {
             idx++;
-            bracketOpt[bracket] = 0;
+            bracketOpt = 0;
             return;
         }
 
@@ -347,7 +294,7 @@
         if (pattern.charAt(idx) == '}')
         {
             idx++;
-            bracketOpt[bracket] = bracketUnbounded;
+            bracketOpt = bracketUnbounded;
             return;
         }
 
@@ -365,7 +312,7 @@
         }
         try
         {
-            bracketOpt[bracket] = Integer.parseInt(number.toString()) - 
bracketMin[bracket];
+            bracketOpt = Integer.parseInt(number.toString()) - bracketMin;
         }
         catch (NumberFormatException e)
         {
@@ -373,7 +320,7 @@
         }
 
         // Optional repetitions must be >= 0
-        if (bracketOpt[bracket] < 0)
+        if (bracketOpt < 0)
         {
             syntaxError("Bad range");
         }
@@ -1018,132 +965,92 @@
             {
                 case '{':
                 {
-                    // We look for our bracket in the list
-                    boolean found = false;
-                    int i;
-                    allocBrackets();
-                    for (i = 0; i < brackets; i++)
-                    {
-                        if (bracketStart[i] == idx)
-                        {
-                            // Re-initialize bracketMin, bracketOpt since they 
are all used up by now
-                            if (bracketMin[i] == -1) {
-                                bracket(i);
-                            }
-                            found = true;
-                            break;
-                        }
-                    }
+                    bracket();
+                    int bracketEnd = idx;
+                    int bracketMin = this.bracketMin;
+                    int bracketOpt = this.bracketOpt;
 
-                    // If its not in the list we parse the {m,n}
-                    if (!found)
-                    {
-                        if (brackets >= maxBrackets)
-                        {
-                            reallocBrackets();
-                        }
-                        bracketStart[brackets] = idx;
-                        bracket(brackets);
-                        bracketEnd[brackets] = idx;
-                        i = brackets++;
-                    }
+                    // Pointer to the last terminal
+                    int pos = ret;
 
                     // Process min first
-                    if (bracketMin[i] > 0)
+                    for (int c = 0; c < bracketMin; c++)
                     {
-                        if (--bracketMin[i] > 0 || bracketOpt[i] != 0) {
-                            // Rewind stream and run it through again - more 
matchers coming
-                            for (int j = 0; j < i; j++) {
-                                bracketMin[j] = -1;
-                            }
-                            idx = idxBeforeTerminal;
-                        } else {
-                            // Bug #1030: No optional matches - no need to 
rewind
-                            idx = bracketEnd[i];
-                        }
-                        break;
+                        // Rewind stream and run it through again - more 
matchers coming
+                        idx = idxBeforeTerminal;
+                        setNextOfEnd(pos, pos = terminal(terminalFlags));
                     }
 
                     // Do the right thing for maximum ({m,})
-                    if (bracketOpt[i] == bracketUnbounded)
+                    if (bracketOpt == bracketUnbounded)
                     {
                         // Drop through now and closure expression.
                         // We are done with the {m,} expr, so skip rest
-                        closureType = '*';
-                        bracketOpt[i] = 0;
-                        idx = bracketEnd[i];
+                        idx = bracketEnd;
+                        nodeInsert(RE.OP_STAR, 0, pos);
+                        setNextOfEnd(pos + RE.nodeSize, pos);
+                        break;
                     }
-                    else
-                        if (bracketOpt[i]-- > 0)
+                    else if (bracketOpt > 0)
+                    {
+                        int opt[] = new int[bracketOpt + 1];
+                        // Surround first optional terminal with MAYBE
+                        nodeInsert(RE.OP_MAYBE, 0, pos);
+                        opt[0] = pos;
+
+                        // Add all the rest optional terminals with preceeding 
MAYBEs
+                        for (int c = 1; c < bracketOpt; c++)
                         {
-                            if (bracketOpt[i] > 0) {
-                                // More optional matchers - 'play it again 
sam!'
-                                for (int j = 0; j < i; j++) {
-                                    bracketMin[j] = -1;
-                                }
-                                idx = idxBeforeTerminal;
-                            } else {
-                                // Bug #1030: We are done - this one is last 
and optional
-                                idx = bracketEnd[i];
-                            }
-                            // Drop through to optionally close
-                            closureType = '?';
+                            opt[c] = node(RE.OP_MAYBE, 0);
+                            // Rewind stream and run it through again - more 
matchers coming
+                            idx = idxBeforeTerminal;
+                            terminal(terminalFlags);
                         }
-                        else
-                        {
-                            // Rollback terminal - neither min nor opt 
matchers present
-                            lenInstruction = ret;
-                            node(RE.OP_NOTHING, 0);
 
-                            // We are done. skip the rest of {m,n} expr
-                            idx = bracketEnd[i];
-                            break;
+                        // Tie ends together
+                        int end = opt[bracketOpt] = node(RE.OP_NOTHING, 0);
+                        for (int c = 0; c < bracketOpt; c++)
+                        {
+                            setNextOfEnd(opt[c], end);
+                            setNextOfEnd(opt[c] + RE.nodeSize, opt[c + 1]);
                         }
-                }
-
-                // Fall through!
-
-                case '?':
-                case '*':
-
-                    if (!greedy)
-                    {
-                        break;
                     }
-
-                    if (closureType == '?')
+                    else
                     {
-                        // X? is compiled as (X|)
-                        nodeInsert(RE.OP_BRANCH, 0, ret);                 // 
branch before X
-                        setNextOfEnd(ret, node (RE.OP_BRANCH, 0));        // 
inserted branch to option
-                        int nothing = node (RE.OP_NOTHING, 0);            // 
which is OP_NOTHING
-                        setNextOfEnd(ret, nothing);                       // 
point (second) branch to OP_NOTHING
-                        setNextOfEnd(ret + RE.nodeSize, nothing);         // 
point the end of X to OP_NOTHING node
+                        // Rollback terminal - no opt matchers present
+                        lenInstruction = pos;
+                        node(RE.OP_NOTHING, 0);
                     }
 
-                    if (closureType == '*')
-                    {
-                        // X* is compiled as (X{gotoX}|)
-                        nodeInsert(RE.OP_BRANCH, 0, ret);                      
   // branch before X
-                        setNextOfEnd(ret + RE.nodeSize, node(RE.OP_BRANCH, 
0));   // end of X points to an option
-                        setNextOfEnd(ret + RE.nodeSize, node(RE.OP_GOTO, 0));  
   // to goto
-                        setNextOfEnd(ret + RE.nodeSize, ret);                  
   // the start again
-                        setNextOfEnd(ret, node(RE.OP_BRANCH, 0));              
   // the other option is
-                        setNextOfEnd(ret, node(RE.OP_NOTHING, 0));             
   // OP_NOTHING
-                    }
+                    // We are done. skip the reminder of {m,n} expr
+                    idx = bracketEnd;
                     break;
+                }
+
+                case '?':
+                {
+                    nodeInsert(RE.OP_MAYBE, 0, ret);
+                    int n = node(RE.OP_NOTHING, 0);
+                    setNextOfEnd(ret, n);
+                    setNextOfEnd(ret + RE.nodeSize, n);
+                    break;
+                }
+
+                case '*':
+                {
+                    nodeInsert(RE.OP_STAR, 0, ret);
+                    setNextOfEnd(ret + RE.nodeSize, ret);
+                    break;
+                }
 
                 case '+':
                 {
-                    // X+ is compiled as X({gotoX}|)
-                    int branch;
-                    branch = node(RE.OP_BRANCH, 0);                   // a new 
branch
-                    setNextOfEnd(ret, branch);                        // is 
added to the end of X
-                    setNextOfEnd(node(RE.OP_GOTO, 0), ret);           // one 
option is to go back to the start
-                    setNextOfEnd(branch, node(RE.OP_BRANCH, 0));      // the 
other option
-                    setNextOfEnd(ret, node(RE.OP_NOTHING, 0));        // is 
OP_NOTHING
+                    nodeInsert(RE.OP_CONTINUE, 0, ret);
+                    int n = node(RE.OP_PLUS, 0);
+                    setNextOfEnd(ret + RE.nodeSize, n);
+                    setNextOfEnd(n, ret);
+                    break;
                 }
-                break;
             }
         }
         else
@@ -1177,6 +1084,7 @@
                 }
             }
         }
+
         return ret;
     }
 
@@ -1309,16 +1217,16 @@
 
         // Hook the ends of each branch to the end node
         int currentNode = ret;
-        int nextNodeOffset = instruction[ currentNode + RE.offsetNext ];
+        int nextNodeOffset = instruction[currentNode + RE.offsetNext];
         // while the next node o
-        while ( nextNodeOffset != 0 && currentNode < lenInstruction )
+        while (nextNodeOffset != 0 && currentNode < lenInstruction)
         {
             // If branch, make the end of the branch's operand chain point to 
the end node.
-            if ( instruction[ currentNode + RE.offsetOpcode ] == RE.OP_BRANCH )
+            if (instruction[currentNode /* + RE.offsetOpcode */] == 
RE.OP_BRANCH)
             {
-                setNextOfEnd( currentNode + RE.nodeSize, end );
+                setNextOfEnd(currentNode + RE.nodeSize, end);
             }
-            nextNodeOffset = instruction[ currentNode + RE.offsetNext ];
+            nextNodeOffset = instruction[currentNode + RE.offsetNext];
             currentNode += nextNodeOffset;
         }
 
@@ -1344,7 +1252,6 @@
         idx = 0;                                        // Set parsing index 
to the first character
         lenInstruction = 0;                             // Set emitted 
instruction count to zero
         parens = 1;                                     // Set paren level to 
1 (the implicit outer parens)
-        brackets = 0;                                   // No bracketed 
closures yet
 
         // Initialize pass by reference flags value
         int[] flags = { NODE_TOPLEVEL };

Modified: jakarta/regexp/trunk/src/java/org/apache/regexp/REDebugCompiler.java
URL: 
http://svn.apache.org/viewvc/jakarta/regexp/trunk/src/java/org/apache/regexp/REDebugCompiler.java?view=diff&rev=517503&r1=517502&r2=517503
==============================================================================
--- jakarta/regexp/trunk/src/java/org/apache/regexp/REDebugCompiler.java 
(original)
+++ jakarta/regexp/trunk/src/java/org/apache/regexp/REDebugCompiler.java Mon 
Mar 12 20:02:04 2007
@@ -49,6 +49,7 @@
         hashOpcode.put(new Integer(RE.OP_MAYBE),            "OP_MAYBE");
         hashOpcode.put(new Integer(RE.OP_NOTHING),          "OP_NOTHING");
         hashOpcode.put(new Integer(RE.OP_GOTO),             "OP_GOTO");
+        hashOpcode.put(new Integer(RE.OP_CONTINUE),         "OP_CONTINUE");
         hashOpcode.put(new Integer(RE.OP_ESCAPE),           "OP_ESCAPE");
         hashOpcode.put(new Integer(RE.OP_OPEN),             "OP_OPEN");
         hashOpcode.put(new Integer(RE.OP_CLOSE),            "OP_CLOSE");

Modified: jakarta/regexp/trunk/src/java/org/apache/regexp/RETest.java
URL: 
http://svn.apache.org/viewvc/jakarta/regexp/trunk/src/java/org/apache/regexp/RETest.java?view=diff&rev=517503&r1=517502&r2=517503
==============================================================================
--- jakarta/regexp/trunk/src/java/org/apache/regexp/RETest.java (original)
+++ jakarta/regexp/trunk/src/java/org/apache/regexp/RETest.java Mon Mar 12 
20:02:04 2007
@@ -17,17 +17,16 @@
 package org.apache.regexp;
 
 import java.io.BufferedReader;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
 import java.io.FileReader;
+import java.io.IOException;
 import java.io.InputStreamReader;
-import java.io.PrintWriter;
-import java.io.File;
-import java.io.ByteArrayOutputStream;
-import java.io.ObjectOutputStream;
-import java.io.ByteArrayInputStream;
 import java.io.ObjectInputStream;
-import java.io.StringBufferInputStream;
+import java.io.ObjectOutputStream;
+import java.io.PrintWriter;
 import java.io.StringReader;
-import java.io.IOException;
 
 /**
  * Data driven (and optionally interactive) testing harness to exercise regular
@@ -760,7 +759,7 @@
                     return;
 
                 log.append("   Match using StreamCharacterIterator\n");
-                if (!tryMatchUsingCI(new StreamCharacterIterator(new 
StringBufferInputStream(toMatch))))
+                if (!tryMatchUsingCI(new StreamCharacterIterator(new 
ByteArrayInputStream(toMatch.getBytes()))))
                     return;
 
                 log.append("   Match using ReaderCharacterIterator\n");

Modified: jakarta/regexp/trunk/xdocs/RETest.txt
URL: 
http://svn.apache.org/viewvc/jakarta/regexp/trunk/xdocs/RETest.txt?view=diff&rev=517503&r1=517502&r2=517503
==============================================================================
--- jakarta/regexp/trunk/xdocs/RETest.txt (original)
+++ jakarta/regexp/trunk/xdocs/RETest.txt Mon Mar 12 20:02:04 2007
@@ -1566,3 +1566,8 @@
 YES
 1
 aaabcca
+
+#231
+^a{1,30}$
+aaaaaaaaaaaaaaaaaaaaaaaaaaaab
+NO

Modified: jakarta/regexp/trunk/xdocs/changes.xml
URL: 
http://svn.apache.org/viewvc/jakarta/regexp/trunk/xdocs/changes.xml?view=diff&rev=517503&r1=517502&r2=517503
==============================================================================
--- jakarta/regexp/trunk/xdocs/changes.xml (original)
+++ jakarta/regexp/trunk/xdocs/changes.xml Mon Mar 12 20:02:04 2007
@@ -35,6 +35,9 @@
 <h3>Version 1.5-dev</h3>
 <ul>
 <li>Fixed Bug
+    <a href="http://issues.apache.org/bugzilla/show_bug.cgi?id=9153";>9153</a>:
+    {m,n} implementation had exponential complexity (VG)</li>
+<li>Fixed Bug
     <a 
href="http://issues.apache.org/bugzilla/show_bug.cgi?id=27763";>27763</a>:
     RE incorrectly processed reluctant matchers (VG)</li>
 <li>Fixed Bug



---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to