Author: vgritsenko
Date: Mon Mar 12 19:39:06 2007
New Revision: 517501

URL: http://svn.apache.org/viewvc?view=rev&rev=517501
Log:
Fix bug #27763: RE incorrectly processed reluctant matchers

Modified:
    jakarta/regexp/trunk/docs/changes.html
    jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java
    jakarta/regexp/trunk/src/java/org/apache/regexp/RECompiler.java
    jakarta/regexp/trunk/src/java/org/apache/regexp/REDebugCompiler.java
    jakarta/regexp/trunk/xdocs/changes.xml

Modified: jakarta/regexp/trunk/docs/changes.html
URL: 
http://svn.apache.org/viewvc/jakarta/regexp/trunk/docs/changes.html?view=diff&rev=517501&r1=517500&r2=517501
==============================================================================
--- jakarta/regexp/trunk/docs/changes.html (original)
+++ jakarta/regexp/trunk/docs/changes.html Mon Mar 12 19:39:06 2007
@@ -92,6 +92,9 @@
 <h3>Version 1.5-dev</h3>
 <ul>
 <li>Fixed Bug
+    <a 
href="http://issues.apache.org/bugzilla/show_bug.cgi?id=27763";>27763</a>:
+    RE incorrectly processed reluctant matchers (VG)</li>
+<li>Fixed Bug
     <a 
href="http://issues.apache.org/bugzilla/show_bug.cgi?id=38331";>38331</a>:
     RE compiler creates incorrect program if pattern results in large program
     with offsets exceeding capacity of the short (VG)</li>

Modified: jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java
URL: 
http://svn.apache.org/viewvc/jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java?view=diff&rev=517501&r1=517500&r2=517501
==============================================================================
--- jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java (original)
+++ jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java Mon Mar 12 19:39:06 
2007
@@ -346,6 +346,7 @@
     static final char OP_BACKREF          = '#';  // number          reference 
nth already matched parenthesized string
     static final char OP_GOTO             = 'G';  //                 nothing 
but a (back-)pointer
     static final char OP_NOTHING          = 'N';  //                 match 
null string such as in '(a|)'
+    static final char OP_CONTINUE         = 'C';  //                 continue 
to the following command (ignore next)
     static final char OP_RELUCTANTSTAR    = '8';  // none/expr       reluctant 
'*' (mnemonic for char is unshifted '*')
     static final char OP_RELUCTANTPLUS    = '=';  // none/expr       reluctant 
'+' (mnemonic for char is unshifted '+')
     static final char OP_RELUCTANTMAYBE   = '/';  // none/expr       reluctant 
'?' (mnemonic for char is unshifted '?')
@@ -818,41 +819,38 @@
             {
                 case OP_RELUCTANTMAYBE:
                     {
-                        int once = 0;
-                        do
+                        // Try to match the rest without using the reluctant 
subexpr
+                        if ((idxNew = matchNodes(next, maxNode, idx)) != -1)
                         {
-                            // Try to match the rest without using the 
reluctant subexpr
-                            if ((idxNew = matchNodes(next, maxNode, idx)) != 
-1)
-                            {
-                                return idxNew;
-                            }
+                            return idxNew;
                         }
-                        while ((once++ == 0) && (idx = matchNodes(node + 
nodeSize, next, idx)) != -1);
-                        return -1;
+
+                        // Try reluctant subexpr, which continues to the rest 
of the expression
+                        return matchNodes(node + nodeSize, next, idx);
                     }
 
                 case OP_RELUCTANTPLUS:
-                    while ((idx = matchNodes(node + nodeSize, next, idx)) != 
-1)
                     {
-                        // Try to match the rest without using the reluctant 
subexpr
-                        if ((idxNew = matchNodes(next, maxNode, idx)) != -1)
+                        // Continue matching the rest without using the 
reluctant subexpr
+                        if ((idxNew = matchNodes(next + instruction[next + 
offsetNext], maxNode, idx)) != -1)
                         {
                             return idxNew;
                         }
+
+                        // Try to match subexpression again
+                        break;
                     }
-                    return -1;
 
                 case OP_RELUCTANTSTAR:
-                    do
                     {
                         // Try to match the rest without using the reluctant 
subexpr
-                        if ((idxNew = matchNodes(next, maxNode, idx)) != -1)
-                        {
+                        if ((idxNew = matchNodes(next, maxNode, idx)) != -1) {
                             return idxNew;
                         }
+
+                        // Try reluctant subexpr
+                        return matchNodes(node + nodeSize, next, idx);
                     }
-                    while ((idx = matchNodes(node + nodeSize, next, idx)) != 
-1);
-                    return -1;
 
                 case OP_OPEN:
 
@@ -1315,6 +1313,12 @@
 
                     // Just advance to the next node without doing anything
                     break;
+
+                case OP_CONTINUE:
+
+                    // Advance to the following node
+                    node += nodeSize;
+                    continue;
 
                 case OP_END:
 

Modified: jakarta/regexp/trunk/src/java/org/apache/regexp/RECompiler.java
URL: 
http://svn.apache.org/viewvc/jakarta/regexp/trunk/src/java/org/apache/regexp/RECompiler.java?view=diff&rev=517501&r1=517500&r2=517501
==============================================================================
--- jakarta/regexp/trunk/src/java/org/apache/regexp/RECompiler.java (original)
+++ jakarta/regexp/trunk/src/java/org/apache/regexp/RECompiler.java Mon Mar 12 
19:39:06 2007
@@ -172,7 +172,7 @@
             // FIXME: This is a _hack_ to stop infinite programs.
             // I believe that the implementation of the reluctant matches is 
wrong but
             // have not worked out a better way yet.
-            if ( node == pointTo ) {
+            if (node == pointTo) {
                 pointTo = lenInstruction;
             }
             node += next;
@@ -184,7 +184,7 @@
         if ( node < lenInstruction ) {
             // Some patterns result in very large programs which exceed
             // capacity of the short used for specifying signed offset of the
-            // next instruction. Example: a{1638} 
+            // next instruction. Example: a{1638}
             int offset = pointTo - node;
             if (offset != (short) offset) {
                 throw new RESyntaxException("Exceeded short jump range.");
@@ -1148,27 +1148,34 @@
         }
         else
         {
-            // Add end after closured subexpr
-            setNextOfEnd(ret, node(RE.OP_END, 0));
-
             // Actually do the closure now
             switch (closureType)
             {
                 case '?':
+                {
                     nodeInsert(RE.OP_RELUCTANTMAYBE, 0, ret);
+                    int n = node(RE.OP_NOTHING, 0);
+                    setNextOfEnd(ret, n);
+                    setNextOfEnd(ret + RE.nodeSize, n);
                     break;
+                }
 
                 case '*':
+                {
                     nodeInsert(RE.OP_RELUCTANTSTAR, 0, ret);
+                    setNextOfEnd(ret + RE.nodeSize, ret);
                     break;
+                }
 
                 case '+':
-                    nodeInsert(RE.OP_RELUCTANTPLUS, 0, ret);
+                {
+                    nodeInsert(RE.OP_CONTINUE, 0, ret);
+                    int n = node(RE.OP_RELUCTANTPLUS, 0);
+                    setNextOfEnd(n, ret);
+                    setNextOfEnd(ret + RE.nodeSize, n);
                     break;
+                }
             }
-
-            // Point to the expr after the closure
-            setNextOfEnd(ret, lenInstruction);
         }
         return ret;
     }

Modified: jakarta/regexp/trunk/src/java/org/apache/regexp/REDebugCompiler.java
URL: 
http://svn.apache.org/viewvc/jakarta/regexp/trunk/src/java/org/apache/regexp/REDebugCompiler.java?view=diff&rev=517501&r1=517500&r2=517501
==============================================================================
--- jakarta/regexp/trunk/src/java/org/apache/regexp/REDebugCompiler.java 
(original)
+++ jakarta/regexp/trunk/src/java/org/apache/regexp/REDebugCompiler.java Mon 
Mar 12 19:39:06 2007
@@ -256,6 +256,8 @@
      */
     public void dumpProgram()
     {
-        dumpProgram(new PrintWriter(System.out));
+        PrintWriter w = new PrintWriter(System.out);
+        dumpProgram(w);
+        w.flush();
     }
 }

Modified: jakarta/regexp/trunk/xdocs/changes.xml
URL: 
http://svn.apache.org/viewvc/jakarta/regexp/trunk/xdocs/changes.xml?view=diff&rev=517501&r1=517500&r2=517501
==============================================================================
--- jakarta/regexp/trunk/xdocs/changes.xml (original)
+++ jakarta/regexp/trunk/xdocs/changes.xml Mon Mar 12 19:39:06 2007
@@ -35,6 +35,9 @@
 <h3>Version 1.5-dev</h3>
 <ul>
 <li>Fixed Bug
+    <a 
href="http://issues.apache.org/bugzilla/show_bug.cgi?id=27763";>27763</a>:
+    RE incorrectly processed reluctant matchers (VG)</li>
+<li>Fixed Bug
     <a 
href="http://issues.apache.org/bugzilla/show_bug.cgi?id=38331";>38331</a>:
     RE compiler creates incorrect program if pattern results in large program
     with offsets exceeding capacity of the short (VG)</li>



---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to