Author: vgritsenko
Date: Tue Mar 13 17:42:08 2007
New Revision: 517955
URL: http://svn.apache.org/viewvc?view=rev&rev=517955
Log:
compiler optimization: omit unnecessary branch operations
Modified:
jakarta/regexp/trunk/docs/changes.html
jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java
jakarta/regexp/trunk/src/java/org/apache/regexp/RECompiler.java
jakarta/regexp/trunk/xdocs/changes.xml
Modified: jakarta/regexp/trunk/docs/changes.html
URL:
http://svn.apache.org/viewvc/jakarta/regexp/trunk/docs/changes.html?view=diff&rev=517955&r1=517954&r2=517955
==============================================================================
--- jakarta/regexp/trunk/docs/changes.html (original)
+++ jakarta/regexp/trunk/docs/changes.html Tue Mar 13 17:42:08 2007
@@ -91,6 +91,8 @@
<h3>Version 1.5-dev</h3>
<ul>
+<li>Implemented optimization: RE compiler omits branch instruction if only one
+ branch is present (VG)</li>
<li>Fixed Bug
<a href="http://issues.apache.org/bugzilla/show_bug.cgi?id=9153">9153</a>:
{m,n} implementation had exponential complexity (VG)</li>
Modified: jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java
URL:
http://svn.apache.org/viewvc/jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java?view=diff&rev=517955&r1=517954&r2=517955
==============================================================================
--- jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java (original)
+++ jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java Tue Mar 13 17:42:08
2007
@@ -483,7 +483,7 @@
*/
public RE()
{
- this((REProgram)null, MATCH_NORMAL);
+ this((REProgram) null, MATCH_NORMAL);
}
/**
Modified: jakarta/regexp/trunk/src/java/org/apache/regexp/RECompiler.java
URL:
http://svn.apache.org/viewvc/jakarta/regexp/trunk/src/java/org/apache/regexp/RECompiler.java?view=diff&rev=517955&r1=517954&r2=517955
==============================================================================
--- jakarta/regexp/trunk/src/java/org/apache/regexp/RECompiler.java (original)
+++ jakarta/regexp/trunk/src/java/org/apache/regexp/RECompiler.java Tue Mar 13
17:42:08 2007
@@ -1092,16 +1092,17 @@
}
/**
- * Compile one branch of an or operator (implements concatenation)
+ * Compile body of one branch of an or operator (implements concatenation)
+ *
* @param flags Flags passed by reference
- * @return Pointer to branch node
+ * @return Pointer to first node in the branch
* @exception RESyntaxException Thrown if the regular expression has
invalid syntax.
*/
int branch(int[] flags) throws RESyntaxException
{
// Get each possibly closured piece and concat
int node;
- int ret = node(RE.OP_BRANCH, 0);
+ int ret = -1;
int chain = -1;
int[] closureFlags = new int[1];
boolean nullable = true;
@@ -1123,12 +1124,15 @@
// Chain starts at current
chain = node;
+ if (ret == -1) {
+ ret = node;
+ }
}
// If we don't run loop, make a nothing node
- if (chain == -1)
+ if (ret == -1)
{
- node(RE.OP_NOTHING, 0);
+ ret = node(RE.OP_NOTHING, 0);
}
// Set nullable flag for this branch
@@ -1136,12 +1140,14 @@
{
flags[0] |= NODE_NULLABLE;
}
+
return ret;
}
/**
* Compile an expression with possible parens around it. Paren matching
* is done at this level so we can tie the branch tails together.
+ *
* @param flags Flag value passed by reference
* @return Node index of expression in instruction array
* @exception RESyntaxException Thrown if the regular expression has
invalid syntax.
@@ -1155,11 +1161,11 @@
if ((flags[0] & NODE_TOPLEVEL) == 0 && pattern.charAt(idx) == '(')
{
// if its a cluster ( rather than a proper subexpression ie with
backrefs )
- if ( idx + 2 < len && pattern.charAt( idx + 1 ) == '?' &&
pattern.charAt( idx + 2 ) == ':' )
+ if (idx + 2 < len && pattern.charAt(idx + 1) == '?' &&
pattern.charAt(idx + 2) == ':')
{
paren = 2;
idx += 3;
- ret = node( RE.OP_OPEN_CLUSTER, 0 );
+ ret = node(RE.OP_OPEN_CLUSTER, 0);
}
else
{
@@ -1170,7 +1176,8 @@
}
flags[0] &= ~NODE_TOPLEVEL;
- // Create a branch node
+ // Process contents of first branch node
+ boolean open = false;
int branch = branch(flags);
if (ret == -1)
{
@@ -1184,14 +1191,20 @@
// Loop through branches
while (idx < len && pattern.charAt(idx) == '|')
{
+ // Now open the first branch since there are more than one
+ if (!open) {
+ nodeInsert(RE.OP_BRANCH, 0, branch);
+ open = true;
+ }
+
idx++;
- branch = branch(flags);
- setNextOfEnd(ret, branch);
+ setNextOfEnd(branch, branch = node(RE.OP_BRANCH, 0));
+ branch(flags);
}
// Create an ending node (either a close paren or an OP_END)
int end;
- if ( paren > 0 )
+ if (paren > 0)
{
if (idx < len && pattern.charAt(idx) == ')')
{
@@ -1201,13 +1214,13 @@
{
syntaxError("Missing close paren");
}
- if ( paren == 1 )
+ if (paren == 1)
{
end = node(RE.OP_CLOSE, closeParens);
}
else
{
- end = node( RE.OP_CLOSE_CLUSTER, 0 );
+ end = node(RE.OP_CLOSE_CLUSTER, 0);
}
}
else
Modified: jakarta/regexp/trunk/xdocs/changes.xml
URL:
http://svn.apache.org/viewvc/jakarta/regexp/trunk/xdocs/changes.xml?view=diff&rev=517955&r1=517954&r2=517955
==============================================================================
--- jakarta/regexp/trunk/xdocs/changes.xml (original)
+++ jakarta/regexp/trunk/xdocs/changes.xml Tue Mar 13 17:42:08 2007
@@ -34,6 +34,8 @@
<h3>Version 1.5-dev</h3>
<ul>
+<li>Implemented optimization: RE compiler omits branch instruction if only one
+ branch is present (VG)</li>
<li>Fixed Bug
<a href="http://issues.apache.org/bugzilla/show_bug.cgi?id=9153">9153</a>:
{m,n} implementation had exponential complexity (VG)</li>
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]