Author: vgritsenko
Date: Mon Mar 12 19:39:06 2007
New Revision: 517501
URL: http://svn.apache.org/viewvc?view=rev&rev=517501
Log:
Fix bug #27763: RE incorrectly processed reluctant matchers
Modified:
jakarta/regexp/trunk/docs/changes.html
jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java
jakarta/regexp/trunk/src/java/org/apache/regexp/RECompiler.java
jakarta/regexp/trunk/src/java/org/apache/regexp/REDebugCompiler.java
jakarta/regexp/trunk/xdocs/changes.xml
Modified: jakarta/regexp/trunk/docs/changes.html
URL:
http://svn.apache.org/viewvc/jakarta/regexp/trunk/docs/changes.html?view=diff&rev=517501&r1=517500&r2=517501
==============================================================================
--- jakarta/regexp/trunk/docs/changes.html (original)
+++ jakarta/regexp/trunk/docs/changes.html Mon Mar 12 19:39:06 2007
@@ -92,6 +92,9 @@
<h3>Version 1.5-dev</h3>
<ul>
<li>Fixed Bug
+ <a
href="http://issues.apache.org/bugzilla/show_bug.cgi?id=27763">27763</a>:
+ RE incorrectly processed reluctant matchers (VG)</li>
+<li>Fixed Bug
<a
href="http://issues.apache.org/bugzilla/show_bug.cgi?id=38331">38331</a>:
RE compiler creates incorrect program if pattern results in large program
with offsets exceeding capacity of the short (VG)</li>
Modified: jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java
URL:
http://svn.apache.org/viewvc/jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java?view=diff&rev=517501&r1=517500&r2=517501
==============================================================================
--- jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java (original)
+++ jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java Mon Mar 12 19:39:06
2007
@@ -346,6 +346,7 @@
static final char OP_BACKREF = '#'; // number reference
nth already matched parenthesized string
static final char OP_GOTO = 'G'; // nothing
but a (back-)pointer
static final char OP_NOTHING = 'N'; // match
null string such as in '(a|)'
+ static final char OP_CONTINUE = 'C'; // continue
to the following command (ignore next)
static final char OP_RELUCTANTSTAR = '8'; // none/expr reluctant
'*' (mnemonic for char is unshifted '*')
static final char OP_RELUCTANTPLUS = '='; // none/expr reluctant
'+' (mnemonic for char is unshifted '+')
static final char OP_RELUCTANTMAYBE = '/'; // none/expr reluctant
'?' (mnemonic for char is unshifted '?')
@@ -818,41 +819,38 @@
{
case OP_RELUCTANTMAYBE:
{
- int once = 0;
- do
+ // Try to match the rest without using the reluctant
subexpr
+ if ((idxNew = matchNodes(next, maxNode, idx)) != -1)
{
- // Try to match the rest without using the
reluctant subexpr
- if ((idxNew = matchNodes(next, maxNode, idx)) !=
-1)
- {
- return idxNew;
- }
+ return idxNew;
}
- while ((once++ == 0) && (idx = matchNodes(node +
nodeSize, next, idx)) != -1);
- return -1;
+
+ // Try reluctant subexpr, which continues to the rest
of the expression
+ return matchNodes(node + nodeSize, next, idx);
}
case OP_RELUCTANTPLUS:
- while ((idx = matchNodes(node + nodeSize, next, idx)) !=
-1)
{
- // Try to match the rest without using the reluctant
subexpr
- if ((idxNew = matchNodes(next, maxNode, idx)) != -1)
+ // Continue matching the rest without using the
reluctant subexpr
+ if ((idxNew = matchNodes(next + instruction[next +
offsetNext], maxNode, idx)) != -1)
{
return idxNew;
}
+
+ // Try to match subexpression again
+ break;
}
- return -1;
case OP_RELUCTANTSTAR:
- do
{
// Try to match the rest without using the reluctant
subexpr
- if ((idxNew = matchNodes(next, maxNode, idx)) != -1)
- {
+ if ((idxNew = matchNodes(next, maxNode, idx)) != -1) {
return idxNew;
}
+
+ // Try reluctant subexpr
+ return matchNodes(node + nodeSize, next, idx);
}
- while ((idx = matchNodes(node + nodeSize, next, idx)) !=
-1);
- return -1;
case OP_OPEN:
@@ -1315,6 +1313,12 @@
// Just advance to the next node without doing anything
break;
+
+ case OP_CONTINUE:
+
+ // Advance to the following node
+ node += nodeSize;
+ continue;
case OP_END:
Modified: jakarta/regexp/trunk/src/java/org/apache/regexp/RECompiler.java
URL:
http://svn.apache.org/viewvc/jakarta/regexp/trunk/src/java/org/apache/regexp/RECompiler.java?view=diff&rev=517501&r1=517500&r2=517501
==============================================================================
--- jakarta/regexp/trunk/src/java/org/apache/regexp/RECompiler.java (original)
+++ jakarta/regexp/trunk/src/java/org/apache/regexp/RECompiler.java Mon Mar 12
19:39:06 2007
@@ -172,7 +172,7 @@
// FIXME: This is a _hack_ to stop infinite programs.
// I believe that the implementation of the reluctant matches is
wrong but
// have not worked out a better way yet.
- if ( node == pointTo ) {
+ if (node == pointTo) {
pointTo = lenInstruction;
}
node += next;
@@ -184,7 +184,7 @@
if ( node < lenInstruction ) {
// Some patterns result in very large programs which exceed
// capacity of the short used for specifying signed offset of the
- // next instruction. Example: a{1638}
+ // next instruction. Example: a{1638}
int offset = pointTo - node;
if (offset != (short) offset) {
throw new RESyntaxException("Exceeded short jump range.");
@@ -1148,27 +1148,34 @@
}
else
{
- // Add end after closured subexpr
- setNextOfEnd(ret, node(RE.OP_END, 0));
-
// Actually do the closure now
switch (closureType)
{
case '?':
+ {
nodeInsert(RE.OP_RELUCTANTMAYBE, 0, ret);
+ int n = node(RE.OP_NOTHING, 0);
+ setNextOfEnd(ret, n);
+ setNextOfEnd(ret + RE.nodeSize, n);
break;
+ }
case '*':
+ {
nodeInsert(RE.OP_RELUCTANTSTAR, 0, ret);
+ setNextOfEnd(ret + RE.nodeSize, ret);
break;
+ }
case '+':
- nodeInsert(RE.OP_RELUCTANTPLUS, 0, ret);
+ {
+ nodeInsert(RE.OP_CONTINUE, 0, ret);
+ int n = node(RE.OP_RELUCTANTPLUS, 0);
+ setNextOfEnd(n, ret);
+ setNextOfEnd(ret + RE.nodeSize, n);
break;
+ }
}
-
- // Point to the expr after the closure
- setNextOfEnd(ret, lenInstruction);
}
return ret;
}
Modified: jakarta/regexp/trunk/src/java/org/apache/regexp/REDebugCompiler.java
URL:
http://svn.apache.org/viewvc/jakarta/regexp/trunk/src/java/org/apache/regexp/REDebugCompiler.java?view=diff&rev=517501&r1=517500&r2=517501
==============================================================================
--- jakarta/regexp/trunk/src/java/org/apache/regexp/REDebugCompiler.java
(original)
+++ jakarta/regexp/trunk/src/java/org/apache/regexp/REDebugCompiler.java Mon
Mar 12 19:39:06 2007
@@ -256,6 +256,8 @@
*/
public void dumpProgram()
{
- dumpProgram(new PrintWriter(System.out));
+ PrintWriter w = new PrintWriter(System.out);
+ dumpProgram(w);
+ w.flush();
}
}
Modified: jakarta/regexp/trunk/xdocs/changes.xml
URL:
http://svn.apache.org/viewvc/jakarta/regexp/trunk/xdocs/changes.xml?view=diff&rev=517501&r1=517500&r2=517501
==============================================================================
--- jakarta/regexp/trunk/xdocs/changes.xml (original)
+++ jakarta/regexp/trunk/xdocs/changes.xml Mon Mar 12 19:39:06 2007
@@ -35,6 +35,9 @@
<h3>Version 1.5-dev</h3>
<ul>
<li>Fixed Bug
+ <a
href="http://issues.apache.org/bugzilla/show_bug.cgi?id=27763">27763</a>:
+ RE incorrectly processed reluctant matchers (VG)</li>
+<li>Fixed Bug
<a
href="http://issues.apache.org/bugzilla/show_bug.cgi?id=38331">38331</a>:
RE compiler creates incorrect program if pattern results in large program
with offsets exceeding capacity of the short (VG)</li>
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]