Author: vgritsenko Date: Tue Mar 13 17:42:08 2007 New Revision: 517955 URL: http://svn.apache.org/viewvc?view=rev&rev=517955 Log: compiler optimization: omit unnecessary branch operations
Modified: jakarta/regexp/trunk/docs/changes.html jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java jakarta/regexp/trunk/src/java/org/apache/regexp/RECompiler.java jakarta/regexp/trunk/xdocs/changes.xml Modified: jakarta/regexp/trunk/docs/changes.html URL: http://svn.apache.org/viewvc/jakarta/regexp/trunk/docs/changes.html?view=diff&rev=517955&r1=517954&r2=517955 ============================================================================== --- jakarta/regexp/trunk/docs/changes.html (original) +++ jakarta/regexp/trunk/docs/changes.html Tue Mar 13 17:42:08 2007 @@ -91,6 +91,8 @@ <h3>Version 1.5-dev</h3> <ul> +<li>Implemented optimization: RE compiler omits branch instruction if only one + branch is present (VG)</li> <li>Fixed Bug <a href="http://issues.apache.org/bugzilla/show_bug.cgi?id=9153">9153</a>: {m,n} implementation had exponential complexity (VG)</li> Modified: jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java URL: http://svn.apache.org/viewvc/jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java?view=diff&rev=517955&r1=517954&r2=517955 ============================================================================== --- jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java (original) +++ jakarta/regexp/trunk/src/java/org/apache/regexp/RE.java Tue Mar 13 17:42:08 2007 @@ -483,7 +483,7 @@ */ public RE() { - this((REProgram)null, MATCH_NORMAL); + this((REProgram) null, MATCH_NORMAL); } /** Modified: jakarta/regexp/trunk/src/java/org/apache/regexp/RECompiler.java URL: http://svn.apache.org/viewvc/jakarta/regexp/trunk/src/java/org/apache/regexp/RECompiler.java?view=diff&rev=517955&r1=517954&r2=517955 ============================================================================== --- jakarta/regexp/trunk/src/java/org/apache/regexp/RECompiler.java (original) +++ jakarta/regexp/trunk/src/java/org/apache/regexp/RECompiler.java Tue Mar 13 17:42:08 2007 @@ -1092,16 +1092,17 @@ } /** - * Compile one branch of an or operator (implements concatenation) + * Compile body of one branch of an or operator (implements concatenation) + * * @param flags Flags passed by reference - * @return Pointer to branch node + * @return Pointer to first node in the branch * @exception RESyntaxException Thrown if the regular expression has invalid syntax. */ int branch(int[] flags) throws RESyntaxException { // Get each possibly closured piece and concat int node; - int ret = node(RE.OP_BRANCH, 0); + int ret = -1; int chain = -1; int[] closureFlags = new int[1]; boolean nullable = true; @@ -1123,12 +1124,15 @@ // Chain starts at current chain = node; + if (ret == -1) { + ret = node; + } } // If we don't run loop, make a nothing node - if (chain == -1) + if (ret == -1) { - node(RE.OP_NOTHING, 0); + ret = node(RE.OP_NOTHING, 0); } // Set nullable flag for this branch @@ -1136,12 +1140,14 @@ { flags[0] |= NODE_NULLABLE; } + return ret; } /** * Compile an expression with possible parens around it. Paren matching * is done at this level so we can tie the branch tails together. + * * @param flags Flag value passed by reference * @return Node index of expression in instruction array * @exception RESyntaxException Thrown if the regular expression has invalid syntax. @@ -1155,11 +1161,11 @@ if ((flags[0] & NODE_TOPLEVEL) == 0 && pattern.charAt(idx) == '(') { // if its a cluster ( rather than a proper subexpression ie with backrefs ) - if ( idx + 2 < len && pattern.charAt( idx + 1 ) == '?' && pattern.charAt( idx + 2 ) == ':' ) + if (idx + 2 < len && pattern.charAt(idx + 1) == '?' && pattern.charAt(idx + 2) == ':') { paren = 2; idx += 3; - ret = node( RE.OP_OPEN_CLUSTER, 0 ); + ret = node(RE.OP_OPEN_CLUSTER, 0); } else { @@ -1170,7 +1176,8 @@ } flags[0] &= ~NODE_TOPLEVEL; - // Create a branch node + // Process contents of first branch node + boolean open = false; int branch = branch(flags); if (ret == -1) { @@ -1184,14 +1191,20 @@ // Loop through branches while (idx < len && pattern.charAt(idx) == '|') { + // Now open the first branch since there are more than one + if (!open) { + nodeInsert(RE.OP_BRANCH, 0, branch); + open = true; + } + idx++; - branch = branch(flags); - setNextOfEnd(ret, branch); + setNextOfEnd(branch, branch = node(RE.OP_BRANCH, 0)); + branch(flags); } // Create an ending node (either a close paren or an OP_END) int end; - if ( paren > 0 ) + if (paren > 0) { if (idx < len && pattern.charAt(idx) == ')') { @@ -1201,13 +1214,13 @@ { syntaxError("Missing close paren"); } - if ( paren == 1 ) + if (paren == 1) { end = node(RE.OP_CLOSE, closeParens); } else { - end = node( RE.OP_CLOSE_CLUSTER, 0 ); + end = node(RE.OP_CLOSE_CLUSTER, 0); } } else Modified: jakarta/regexp/trunk/xdocs/changes.xml URL: http://svn.apache.org/viewvc/jakarta/regexp/trunk/xdocs/changes.xml?view=diff&rev=517955&r1=517954&r2=517955 ============================================================================== --- jakarta/regexp/trunk/xdocs/changes.xml (original) +++ jakarta/regexp/trunk/xdocs/changes.xml Tue Mar 13 17:42:08 2007 @@ -34,6 +34,8 @@ <h3>Version 1.5-dev</h3> <ul> +<li>Implemented optimization: RE compiler omits branch instruction if only one + branch is present (VG)</li> <li>Fixed Bug <a href="http://issues.apache.org/bugzilla/show_bug.cgi?id=9153">9153</a>: {m,n} implementation had exponential complexity (VG)</li> --------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]