Author: vgritsenko Date: Mon Mar 12 19:39:06 2007 New Revision: 517501 URL: http://svn.apache.org/viewvc?view=rev&rev=517501 Log: Fix bug #27763: RE incorrectly processed reluctant matchers
Modified: jakarta/regexp/trunk/docs/changes.html jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java jakarta/regexp/trunk/src/java/org/apache/regexp/RECompiler.java jakarta/regexp/trunk/src/java/org/apache/regexp/REDebugCompiler.java jakarta/regexp/trunk/xdocs/changes.xml Modified: jakarta/regexp/trunk/docs/changes.html URL: http://svn.apache.org/viewvc/jakarta/regexp/trunk/docs/changes.html?view=diff&rev=517501&r1=517500&r2=517501 ============================================================================== --- jakarta/regexp/trunk/docs/changes.html (original) +++ jakarta/regexp/trunk/docs/changes.html Mon Mar 12 19:39:06 2007 @@ -92,6 +92,9 @@ <h3>Version 1.5-dev</h3> <ul> <li>Fixed Bug + <a href="http://issues.apache.org/bugzilla/show_bug.cgi?id=27763">27763</a>: + RE incorrectly processed reluctant matchers (VG)</li> +<li>Fixed Bug <a href="http://issues.apache.org/bugzilla/show_bug.cgi?id=38331">38331</a>: RE compiler creates incorrect program if pattern results in large program with offsets exceeding capacity of the short (VG)</li> Modified: jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java URL: http://svn.apache.org/viewvc/jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java?view=diff&rev=517501&r1=517500&r2=517501 ============================================================================== --- jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java (original) +++ jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java Mon Mar 12 19:39:06 2007 @@ -346,6 +346,7 @@ static final char OP_BACKREF = '#'; // number reference nth already matched parenthesized string static final char OP_GOTO = 'G'; // nothing but a (back-)pointer static final char OP_NOTHING = 'N'; // match null string such as in '(a|)' + static final char OP_CONTINUE = 'C'; // continue to the following command (ignore next) static final char OP_RELUCTANTSTAR = '8'; // none/expr reluctant '*' (mnemonic for char is unshifted '*') static final char OP_RELUCTANTPLUS = '='; // none/expr reluctant '+' (mnemonic for char is unshifted '+') static final char OP_RELUCTANTMAYBE = '/'; // none/expr reluctant '?' (mnemonic for char is unshifted '?') @@ -818,41 +819,38 @@ { case OP_RELUCTANTMAYBE: { - int once = 0; - do + // Try to match the rest without using the reluctant subexpr + if ((idxNew = matchNodes(next, maxNode, idx)) != -1) { - // Try to match the rest without using the reluctant subexpr - if ((idxNew = matchNodes(next, maxNode, idx)) != -1) - { - return idxNew; - } + return idxNew; } - while ((once++ == 0) && (idx = matchNodes(node + nodeSize, next, idx)) != -1); - return -1; + + // Try reluctant subexpr, which continues to the rest of the expression + return matchNodes(node + nodeSize, next, idx); } case OP_RELUCTANTPLUS: - while ((idx = matchNodes(node + nodeSize, next, idx)) != -1) { - // Try to match the rest without using the reluctant subexpr - if ((idxNew = matchNodes(next, maxNode, idx)) != -1) + // Continue matching the rest without using the reluctant subexpr + if ((idxNew = matchNodes(next + instruction[next + offsetNext], maxNode, idx)) != -1) { return idxNew; } + + // Try to match subexpression again + break; } - return -1; case OP_RELUCTANTSTAR: - do { // Try to match the rest without using the reluctant subexpr - if ((idxNew = matchNodes(next, maxNode, idx)) != -1) - { + if ((idxNew = matchNodes(next, maxNode, idx)) != -1) { return idxNew; } + + // Try reluctant subexpr + return matchNodes(node + nodeSize, next, idx); } - while ((idx = matchNodes(node + nodeSize, next, idx)) != -1); - return -1; case OP_OPEN: @@ -1315,6 +1313,12 @@ // Just advance to the next node without doing anything break; + + case OP_CONTINUE: + + // Advance to the following node + node += nodeSize; + continue; case OP_END: Modified: jakarta/regexp/trunk/src/java/org/apache/regexp/RECompiler.java URL: http://svn.apache.org/viewvc/jakarta/regexp/trunk/src/java/org/apache/regexp/RECompiler.java?view=diff&rev=517501&r1=517500&r2=517501 ============================================================================== --- jakarta/regexp/trunk/src/java/org/apache/regexp/RECompiler.java (original) +++ jakarta/regexp/trunk/src/java/org/apache/regexp/RECompiler.java Mon Mar 12 19:39:06 2007 @@ -172,7 +172,7 @@ // FIXME: This is a _hack_ to stop infinite programs. // I believe that the implementation of the reluctant matches is wrong but // have not worked out a better way yet. - if ( node == pointTo ) { + if (node == pointTo) { pointTo = lenInstruction; } node += next; @@ -184,7 +184,7 @@ if ( node < lenInstruction ) { // Some patterns result in very large programs which exceed // capacity of the short used for specifying signed offset of the - // next instruction. Example: a{1638} + // next instruction. Example: a{1638} int offset = pointTo - node; if (offset != (short) offset) { throw new RESyntaxException("Exceeded short jump range."); @@ -1148,27 +1148,34 @@ } else { - // Add end after closured subexpr - setNextOfEnd(ret, node(RE.OP_END, 0)); - // Actually do the closure now switch (closureType) { case '?': + { nodeInsert(RE.OP_RELUCTANTMAYBE, 0, ret); + int n = node(RE.OP_NOTHING, 0); + setNextOfEnd(ret, n); + setNextOfEnd(ret + RE.nodeSize, n); break; + } case '*': + { nodeInsert(RE.OP_RELUCTANTSTAR, 0, ret); + setNextOfEnd(ret + RE.nodeSize, ret); break; + } case '+': - nodeInsert(RE.OP_RELUCTANTPLUS, 0, ret); + { + nodeInsert(RE.OP_CONTINUE, 0, ret); + int n = node(RE.OP_RELUCTANTPLUS, 0); + setNextOfEnd(n, ret); + setNextOfEnd(ret + RE.nodeSize, n); break; + } } - - // Point to the expr after the closure - setNextOfEnd(ret, lenInstruction); } return ret; } Modified: jakarta/regexp/trunk/src/java/org/apache/regexp/REDebugCompiler.java URL: http://svn.apache.org/viewvc/jakarta/regexp/trunk/src/java/org/apache/regexp/REDebugCompiler.java?view=diff&rev=517501&r1=517500&r2=517501 ============================================================================== --- jakarta/regexp/trunk/src/java/org/apache/regexp/REDebugCompiler.java (original) +++ jakarta/regexp/trunk/src/java/org/apache/regexp/REDebugCompiler.java Mon Mar 12 19:39:06 2007 @@ -256,6 +256,8 @@ */ public void dumpProgram() { - dumpProgram(new PrintWriter(System.out)); + PrintWriter w = new PrintWriter(System.out); + dumpProgram(w); + w.flush(); } } Modified: jakarta/regexp/trunk/xdocs/changes.xml URL: http://svn.apache.org/viewvc/jakarta/regexp/trunk/xdocs/changes.xml?view=diff&rev=517501&r1=517500&r2=517501 ============================================================================== --- jakarta/regexp/trunk/xdocs/changes.xml (original) +++ jakarta/regexp/trunk/xdocs/changes.xml Mon Mar 12 19:39:06 2007 @@ -35,6 +35,9 @@ <h3>Version 1.5-dev</h3> <ul> <li>Fixed Bug + <a href="http://issues.apache.org/bugzilla/show_bug.cgi?id=27763">27763</a>: + RE incorrectly processed reluctant matchers (VG)</li> +<li>Fixed Bug <a href="http://issues.apache.org/bugzilla/show_bug.cgi?id=38331">38331</a>: RE compiler creates incorrect program if pattern results in large program with offsets exceeding capacity of the short (VG)</li> --------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]