Author: vgritsenko
Date: Wed Mar 7 16:28:21 2007
New Revision: 515865
URL: http://svn.apache.org/viewvc?view=rev&rev=515865
Log:
Fix bug #38331: RE compiler creates incorrect program if pattern results in
large program
with offsets exceeding capacity of the short
Modified:
jakarta/regexp/trunk/docs/changes.html
jakarta/regexp/trunk/docs/jakarta-regexp.jar
jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java
jakarta/regexp/trunk/src/java/org/apache/regexp/RECompiler.java
jakarta/regexp/trunk/src/java/org/apache/regexp/REDebugCompiler.java
jakarta/regexp/trunk/src/java/org/apache/regexp/REProgram.java
jakarta/regexp/trunk/src/java/org/apache/regexp/RETest.java
jakarta/regexp/trunk/xdocs/changes.xml
Modified: jakarta/regexp/trunk/docs/changes.html
URL:
http://svn.apache.org/viewvc/jakarta/regexp/trunk/docs/changes.html?view=diff&rev=515865&r1=515864&r2=515865
==============================================================================
--- jakarta/regexp/trunk/docs/changes.html (original)
+++ jakarta/regexp/trunk/docs/changes.html Wed Mar 7 16:28:21 2007
@@ -92,6 +92,10 @@
<h3>Version 1.5-dev</h3>
<ul>
<li>Fixed Bug
+ <a
href="http://issues.apache.org/bugzilla/show_bug.cgi?id=38331">38331</a>:
+ RE compiler creates incorrect program if pattern results in large program
+ with offsets exceeding capacity of the short (VG)</li>
+<li>Fixed Bug
<a
href="http://issues.apache.org/bugzilla/show_bug.cgi?id=37275">37275</a>:
RE incorrectly processes nested {n,m} closures (ex: (A{1}){0,2}) (VG)</li>
<li>Added accessor for REProgram.prefix (VG)</li>
Modified: jakarta/regexp/trunk/docs/jakarta-regexp.jar
URL:
http://svn.apache.org/viewvc/jakarta/regexp/trunk/docs/jakarta-regexp.jar?view=diff&rev=515865&r1=515864&r2=515865
==============================================================================
Binary files - no diff available.
Modified: jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java
URL:
http://svn.apache.org/viewvc/jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java?view=diff&rev=515865&r1=515864&r2=515865
==============================================================================
--- jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java (original)
+++ jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java Wed Mar 7 16:28:21
2007
@@ -812,7 +812,7 @@
for (int node = firstNode; node < lastNode; )
{
opcode = instruction[node + offsetOpcode];
- next = node + (short)instruction[node + offsetNext];
+ next = node + (short) instruction[node + offsetNext];
opdata = instruction[node + offsetOpdata];
switch (opcode)
@@ -1292,7 +1292,7 @@
}
// Try all available branches
- short nextBranch;
+ int nextBranch;
do
{
// Try matching the branch against the string
@@ -1302,7 +1302,7 @@
}
// Go to next branch (if any)
- nextBranch = (short)instruction[node + offsetNext];
+ nextBranch = (short) instruction[node + offsetNext];
node += nextBranch;
}
while (nextBranch != 0 && (instruction[node +
offsetOpcode] == OP_BRANCH));
Modified: jakarta/regexp/trunk/src/java/org/apache/regexp/RECompiler.java
URL:
http://svn.apache.org/viewvc/jakarta/regexp/trunk/src/java/org/apache/regexp/RECompiler.java?view=diff&rev=515865&r1=515864&r2=515865
==============================================================================
--- jakarta/regexp/trunk/src/java/org/apache/regexp/RECompiler.java (original)
+++ jakarta/regexp/trunk/src/java/org/apache/regexp/RECompiler.java Wed Mar 7
16:28:21 2007
@@ -148,7 +148,7 @@
// Move everything from insertAt to the end down nodeSize elements
System.arraycopy(instruction, insertAt, instruction, insertAt +
RE.nodeSize, lenInstruction - insertAt);
instruction[insertAt + RE.offsetOpcode] = opcode;
- instruction[insertAt + RE.offsetOpdata] = (char)opdata;
+ instruction[insertAt + RE.offsetOpdata] = (char) opdata;
instruction[insertAt + RE.offsetNext] = 0;
lenInstruction += RE.nodeSize;
}
@@ -169,20 +169,29 @@
// if the node we are supposed to point to is in the chain then
// point to the end of the program instead.
// Michael McCallum <[EMAIL PROTECTED]>
- // FIXME: // This is a _hack_ to stop infinite programs.
+ // FIXME: This is a _hack_ to stop infinite programs.
// I believe that the implementation of the reluctant matches is
wrong but
// have not worked out a better way yet.
if ( node == pointTo ) {
- pointTo = lenInstruction;
+ pointTo = lenInstruction;
}
node += next;
next = instruction[node + RE.offsetNext];
}
+
// if we have reached the end of the program then dont set the pointTo.
// im not sure if this will break any thing but passes all the tests.
if ( node < lenInstruction ) {
+ // Some patterns result in very large programs which exceed
+ // capacity of the short used for specifying signed offset of the
+ // next instruction. Example: a{1638}
+ int offset = pointTo - node;
+ if (offset != (short) offset) {
+ throw new RESyntaxException("Exceeded short jump range.");
+ }
+
// Point the last node in the chain to pointTo.
- instruction[node + RE.offsetNext] = (char)(short)(pointTo - node);
+ instruction[node + RE.offsetNext] = (char) (short) offset;
}
}
Modified: jakarta/regexp/trunk/src/java/org/apache/regexp/REDebugCompiler.java
URL:
http://svn.apache.org/viewvc/jakarta/regexp/trunk/src/java/org/apache/regexp/REDebugCompiler.java?view=diff&rev=515865&r1=515864&r2=515865
==============================================================================
--- jakarta/regexp/trunk/src/java/org/apache/regexp/REDebugCompiler.java
(original)
+++ jakarta/regexp/trunk/src/java/org/apache/regexp/REDebugCompiler.java Wed
Mar 7 16:28:21 2007
@@ -150,9 +150,9 @@
for (int i = 0; i < lenInstruction; )
{
// Get opcode, opdata and next fields of current program node
- char opcode = instruction[i + RE.offsetOpcode];
- char opdata = instruction[i + RE.offsetOpdata];
- short next = (short)instruction[i + RE.offsetNext];
+ char opcode = instruction[i + RE.offsetOpcode];
+ char opdata = instruction[i + RE.offsetOpdata];
+ int next = (short) instruction[i + RE.offsetNext];
// Display the current program node
p.print(i + ". " + nodeToString(i) + ", next = ");
Modified: jakarta/regexp/trunk/src/java/org/apache/regexp/REProgram.java
URL:
http://svn.apache.org/viewvc/jakarta/regexp/trunk/src/java/org/apache/regexp/REProgram.java?view=diff&rev=515865&r1=515864&r2=515865
==============================================================================
--- jakarta/regexp/trunk/src/java/org/apache/regexp/REProgram.java (original)
+++ jakarta/regexp/trunk/src/java/org/apache/regexp/REProgram.java Wed Mar 7
16:28:21 2007
@@ -117,7 +117,7 @@
if (lenInstruction >= RE.nodeSize && instruction[0 +
RE.offsetOpcode] == RE.OP_BRANCH)
{
// to the end node
- char next = instruction[0 + RE.offsetNext];
+ int next = (short) instruction[0 + RE.offsetNext];
if (instruction[next + RE.offsetOpcode] == RE.OP_END &&
lenInstruction >= (RE.nodeSize * 2))
{
final char nextOp = instruction[RE.nodeSize +
RE.offsetOpcode];
Modified: jakarta/regexp/trunk/src/java/org/apache/regexp/RETest.java
URL:
http://svn.apache.org/viewvc/jakarta/regexp/trunk/src/java/org/apache/regexp/RETest.java?view=diff&rev=515865&r1=515864&r2=515865
==============================================================================
--- jakarta/regexp/trunk/src/java/org/apache/regexp/RETest.java (original)
+++ jakarta/regexp/trunk/src/java/org/apache/regexp/RETest.java Wed Mar 7
16:28:21 2007
@@ -425,6 +425,14 @@
if (r.match("a\u2029b")) {
fail("\"a\\u2029b\" matches \"^a.*b$\"");
}
+
+ // Bug 38331: Large program
+ try {
+ new RE("a{8192}");
+ fail("a{8192} should fail to compile.");
+ } catch (RESyntaxException e) {
+ // expected
+ }
}
private void testPrecompiledRE()
@@ -631,7 +639,7 @@
final String matchAgainst = br.readLine();
final boolean badPattern = "ERR".equals(matchAgainst);
boolean shouldMatch = false;
- int expectedParenCount = 0;
+ int expectedParenCount;
String[] expectedParens = null;
if (!badPattern) {
Modified: jakarta/regexp/trunk/xdocs/changes.xml
URL:
http://svn.apache.org/viewvc/jakarta/regexp/trunk/xdocs/changes.xml?view=diff&rev=515865&r1=515864&r2=515865
==============================================================================
--- jakarta/regexp/trunk/xdocs/changes.xml (original)
+++ jakarta/regexp/trunk/xdocs/changes.xml Wed Mar 7 16:28:21 2007
@@ -35,6 +35,10 @@
<h3>Version 1.5-dev</h3>
<ul>
<li>Fixed Bug
+ <a
href="http://issues.apache.org/bugzilla/show_bug.cgi?id=38331">38331</a>:
+ RE compiler creates incorrect program if pattern results in large program
+ with offsets exceeding capacity of the short (VG)</li>
+<li>Fixed Bug
<a
href="http://issues.apache.org/bugzilla/show_bug.cgi?id=37275">37275</a>:
RE incorrectly processes nested {n,m} closures (ex: (A{1}){0,2}) (VG)</li>
<li>Added accessor for REProgram.prefix (VG)</li>
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]