dfs 02/02/18 20:54:29
Modified: . CHANGES CONTRIBUTORS
src/java/examples jdfix.java
src/java/org/apache/oro/text/perl Perl5Util.java
src/java/org/apache/oro/text/regex Util.java
Log:
Added missing int Perl5Util.substitute(...) method and fixed some documentation.
Revision Changes Path
1.24 +6 -1 jakarta-oro/CHANGES
Index: CHANGES
===================================================================
RCS file: /home/cvs/jakarta-oro/CHANGES,v
retrieving revision 1.23
retrieving revision 1.24
diff -u -r1.23 -r1.24
--- CHANGES 1 Feb 2002 09:38:19 -0000 1.23
+++ CHANGES 19 Feb 2002 04:54:29 -0000 1.24
@@ -1,6 +1,11 @@
-$Id: CHANGES,v 1.23 2002/02/01 09:38:19 dfs Exp $
+$Id: CHANGES,v 1.24 2002/02/19 04:54:29 dfs Exp $
Version 2.0.x
+
+o Removed some leftover references to OROMatcher in the Perl5Util javadocs.
+
+o Added an int substitute(...) method to Perl5Util to correspond to
+ the similar method added to org.apache.oro.text.regex.Util in v2.0.3
o Removed ant and support jars from distribution and moved build.xml to
top level directory. From now on, you must have ant installed on your
1.5 +4 -1 jakarta-oro/CONTRIBUTORS
Index: CONTRIBUTORS
===================================================================
RCS file: /home/cvs/jakarta-oro/CONTRIBUTORS,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -r1.4 -r1.5
--- CONTRIBUTORS 17 May 2001 00:44:41 -0000 1.4
+++ CONTRIBUTORS 19 Feb 2002 04:54:29 -0000 1.5
@@ -1,4 +1,4 @@
-$Id: CONTRIBUTORS,v 1.4 2001/05/17 00:44:41 dfs Exp $
+$Id: CONTRIBUTORS,v 1.5 2002/02/19 04:54:29 dfs Exp $
Daniel Savarese <dfs at savarese.org> is the original author of the
OROMatcher, PerlTools, AwkTools, and TextTools packages that became
@@ -14,3 +14,6 @@
Mark Murphy <markm at tyrell.com> has contributed performance
improvements to Perl5Substitution as well as adding support for
\UuLlE and escaping of $.
+
+Michael Davey <michael.davey at sun.com> fixed some documentation and
+added a missing int substitute(...) method to Perl5Util.
1.5 +17 -10 jakarta-oro/src/java/examples/jdfix.java
Index: jdfix.java
===================================================================
RCS file: /home/cvs/jakarta-oro/src/java/examples/jdfix.java,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -r1.4 -r1.5
--- jdfix.java 20 May 2001 23:55:15 -0000 1.4
+++ jdfix.java 19 Feb 2002 04:54:29 -0000 1.5
@@ -56,7 +56,7 @@
*/
/*
- * $Id: jdfix.java,v 1.4 2001/05/20 23:55:15 dfs Exp $
+ * $Id: jdfix.java,v 1.5 2002/02/19 04:54:29 dfs Exp $
*/
import java.io.*;
@@ -77,8 +77,10 @@
* Notice that the Java program is only so much longer because of all
* of the I/O exception handling and InputStream creation. The core
* while loop is EXACTLY the same length as the while loop in the Perl
- * script. This program uses DataInputStream, readLine(), and
- * PrintStream for JDK 1.0.2 compatibility.
+ * script. The number of substitutions performed is printed to standard
+ * output as additional information. Note, this is not an efficient way
+ * to do this job; it is better to first read the entire file into a
+ * character array.
* <p>
* This is a simple program that takes a javadoc generated HTML file as
* input and produces as output the same HTML file, except with a white
@@ -106,10 +108,12 @@
BufferedReader input = null;
PrintWriter output = null;
Perl5Util perl;
+ StringBuffer result = new StringBuffer();
+ int numSubs = 0;
if(args.length < 2) {
System.err.println("Usage: jdfix input output");
- System.exit(1);
+ return;
}
try {
@@ -118,7 +122,7 @@
} catch(IOException e) {
System.err.println("Error opening input file: " + args[0]);
e.printStackTrace();
- System.exit(1);
+ return;
}
try {
@@ -127,20 +131,23 @@
} catch(IOException e) {
System.err.println("Error opening output file: " + args[1]);
e.printStackTrace();
- System.exit(1);
+ return;
}
perl = new Perl5Util();
try {
while((line = input.readLine()) != null) {
- line = perl.substitute("s/<body>/<body bgcolor=\"#ffffff\">/", line);
- output.println(line);
+ numSubs+=perl.substitute(result,
+ "s/<body>/<body bgcolor=\"#ffffff\">/", line);
+ result.append('\n');
}
+ output.print(result.toString());
+ System.out.println("Substitutions made: " + numSubs);
} catch(IOException e) {
System.err.println("Error reading from input: " + args[1]);
e.printStackTrace();
- System.exit(1);
+ return;
} finally {
try {
input.close();
@@ -148,7 +155,7 @@
} catch(IOException e) {
System.err.println("Error closing files.");
e.printStackTrace();
- System.exit(1);
+ return;
}
}
}
1.11 +88 -48 jakarta-oro/src/java/org/apache/oro/text/perl/Perl5Util.java
Index: Perl5Util.java
===================================================================
RCS file: /home/cvs/jakarta-oro/src/java/org/apache/oro/text/perl/Perl5Util.java,v
retrieving revision 1.10
retrieving revision 1.11
diff -u -r1.10 -r1.11
--- Perl5Util.java 8 Dec 2001 03:15:25 -0000 1.10
+++ Perl5Util.java 19 Feb 2002 04:54:29 -0000 1.11
@@ -58,11 +58,12 @@
*/
/*
- * $Id: Perl5Util.java,v 1.10 2001/12/08 03:15:25 dfs Exp $
+ * $Id: Perl5Util.java,v 1.11 2002/02/19 04:54:29 dfs Exp $
*/
import java.util.*;
-import org.apache.oro.text.regex.*;
+
import org.apache.oro.text.*;
+import org.apache.oro.text.regex.*;
import org.apache.oro.util.*;
/**
@@ -77,7 +78,7 @@
* the slashes.
* <p>
* The objective of the class is to minimize the amount of code a Java
- * programmer using OROMatcher<font size="-2"><sup>TM</sup></font>
+ * programmer using Jakarta-ORO
* has to write to achieve the same results as Perl by
* transparently handling regular expression compilation, caching, and
* matching. A second objective is to use the same Perl pattern matching
@@ -86,7 +87,7 @@
* All the state affecting methods are synchronized to avoid
* the maintenance of explicit locks in multithreaded programs. This
* philosophy differs from the
- * OROMatcher<font size="-2"><sup>TM</sup></font> package, where
+ * {@link org.apache.oro.text.regex} package, where
* you are expected to either maintain explicit locks, or more preferably
* create separate compiler and matcher instances for each thread.
* <p>
@@ -123,10 +124,12 @@
* <p>
* A couple of things to remember when using this class are that the
* {@link #match match()} methods have the same meaning as
- * contains() in OROMatcher<font size="-2"><sup>TM</sup></font>
+ * {@link org.apache.oro.text.regex.Perl5Matcher#contains
+ * Perl5Matcher.contains()}
* and <code>=~ m/pattern/</code> in Perl. The methods are named match
* to more closely associate them with Perl and to differentiate them
- * from matches() in OROMatcher<font size="-2"><sup>TM</sup></font>.
+ * from {@link org.apache.oro.text.regex.Perl5Matcher#matches
+ * Perl5Matcher.matches()}.
* A further thing to keep in mind is that the
* {@link MalformedPerl5PatternException} class is derived from
* RuntimeException which means you DON'T have to catch it. The reasoning
@@ -143,12 +146,11 @@
* programs to be robust.
* <p>
* Finally, as a convenience Perl5Util implements
- * the org.apache.oro.text.regex.MatchResult interface found in the
- * OROMatcher<font size="-2"><sup>TM</sup></font> package. The methods
- * are merely wrappers which call the corresponding method of the last
- * MatchResult found (which can be accessed with
- * {@link #getMatch()} by a match or substitution
- * (or even a split, but this isn't particularly useful).
+ * the {@link org.apache.oro.text.regex.MatchResult MatchResult} interface.
+ * The methods are merely wrappers which call the corresponding method of
+ * the last {@link org.apache.oro.text.regex.MatchResult MatchResult}
+ * found (which can be accessed with {@link #getMatch()} by a match or
+ * substitution (or even a split, but this isn't particularly useful).
*
* @author <a href="mailto:[EMAIL PROTECTED]">Daniel F. Savarese</a>
* @version @version@
@@ -384,7 +386,8 @@
* As with Perl, any non-alphanumeric character can be used in lieu of
* the slashes.
* <p>
- * If the input contains the pattern, the org.apache.oro.text.regex.MatchResult
+ * If the input contains the pattern, the
+ * {@link org.apache.oro.text.regex.MatchResult MatchResult}
* can be obtained by calling {@link #getMatch()}.
* However, Perl5Util implements the MatchResult interface as a wrapper
* around the last MatchResult found, so you can call its methods to
@@ -423,7 +426,8 @@
* As with Perl, any non-alphanumeric character can be used in lieu of
* the slashes.
* <p>
- * If the input contains the pattern, the org.apache.oro.text.regex.MatchResult
+ * If the input contains the pattern, the
+ * {@link org.apache.oro.text.regex.MatchResult MatchResult}
* can be obtained by calling {@link #getMatch()}.
* However, Perl5Util implements the MatchResult interface as a wrapper
* around the last MatchResult found, so you can call its methods to
@@ -431,7 +435,8 @@
* After the call to this method, the PatternMatcherInput current offset
* is advanced to the end of the match, so you can use it to repeatedly
* search for expressions in the entire input using a while loop as
- * explained in the OROMatcher<font size="-2"><sup>TM</sup></font> package.
+ * explained in the {@link org.apache.oro.text.regex.PatternMatcherInput
+ * PatternMatcherIinput} documentation.
* <p>
* @param pattern The pattern to search for.
* @param input The PatternMatcherInput to search.
@@ -489,13 +494,14 @@
* <dt> m <dd> treat the input as consisting of multiple lines
* <dt> o <dd> If variable interopolation is used, only evaluate the
* interpolation once (the first time). This is equivalent
- * to using a numInterpolations argument of 1 in the
- * OROMatcher<font size="-2"><sup>TM</sup></font>
- * Util.substitute() method. The default is to compute
- * each interpolation independently. See the
- * OROMatcher<font size="-2"><sup>TM</sup></font>
- * Util.substitute() method for more details on variable
- * interpolation in substitutions.
+ * to using a numInterpolations argument of 1 in
+ * {@link org.apache.oro.text.regex.Util#substitute Util.substitute()}.
+ * The default is to compute each interpolation independently.
+ * See
+ * {@link org.apache.oro.text.regex.Util#substitute Util.substitute()}
+ * and {@link org.apache.oro.text.regex.Perl5Substitution Perl5Substitution}
+ * for more details on variable interpolation in
+ * substitutions.
* <dt> s <dd> treat the input as consisting of a single line
* <dt> x <dd> enable extended expression syntax incorporating whitespace
* and comments
@@ -504,11 +510,11 @@
* the slashes. This is helpful to avoid backslashing. For example,
* using slashes you would have to do:
* <blockquote><pre>
- * result = util.substitute("s/foo\\/bar/goo\\/\\/baz/", input);
+ * numSubs = util.substitute(result, "s/foo\\/bar/goo\\/\\/baz/", input);
* </pre></blockquote>
* when you could more easily write:
* <blockquote><pre>
- * result = util.substitute("s#foo/bar#goo//baz#", input);
+ * numSubs = util.substitute(result, "s#foo/bar#goo//baz#", input);
* </pre></blockquote>
* where the hashmarks are used instead of slashes.
* <p>
@@ -535,25 +541,28 @@
* used to be an invalid expression and require a special-case extra
* backslash, will now replace all instances of / with \:
* <blockquote><pre>
- * result = util.substitute("s#/#\\#g", input);
+ * numSubs = util.substitute(result, "s#/#\\#g", input);
* </pre></blockquote>
* <p>
- * @param expression The substitution expression.
- * @param input The input.
- * @return The input after substitutions have been performed.
+ * @param result The StringBuffer in which to store the result of the
+ * substitutions. The buffer is only appended to.
+ * @param expression The Perl5 substitution regular expression.
+ * @param input The input on which to perform substitutions.
+ * @return The number of substitutions made.
* @exception MalformedPerl5PatternException If there is an error in
* the expression. You are not forced to catch this exception
* because it is derived from RuntimeException.
+ * @since 2.0.6
*/
// Expression parsing will have to be moved into a separate method if
// there are going to be variations of this method.
- public synchronized String substitute(String expression, String input)
+ public synchronized int substitute(StringBuffer result, String expression,
+ String input)
throws MalformedPerl5PatternException
{
boolean backslash, finalDelimiter;
int index, compileOptions, numSubstitutions, numInterpolations;
- int firstOffset, secondOffset, thirdOffset;
- String result;
+ int firstOffset, secondOffset, thirdOffset, subCount;
StringBuffer replacement;
Pattern compiledPattern;
char exp[], delimiter;
@@ -575,12 +584,14 @@
break __nullTest;
}
- result = Util.substitute(__matcher, entry._pattern, entry._substitution,
- input, entry._numSubstitutions);
+
+ subCount =
+ Util.substitute(result, __matcher, entry._pattern, entry._substitution,
+ input, entry._numSubstitutions);
__lastMatch = __matcher.getMatch();
- return result;
+ return subCount;
}
exp = expression.toCharArray();
@@ -680,15 +691,46 @@
numSubstitutions);
__expressionCache.addElement(expression, entry);
- result = Util.substitute(__matcher, compiledPattern, substitution,
- input, numSubstitutions);
+ subCount =
+ Util.substitute(result, __matcher, compiledPattern, substitution,
+ input, numSubstitutions);
__lastMatch = __matcher.getMatch();
- return result;
+ return subCount;
}
-
+ /**
+ * Substitutes a pattern in a given input with a replacement string.
+ * The substitution expression is specified in Perl5 native format.
+ * <dl compact>
+ * <dt>Calling this method is the same as:</dt>
+ * <dd>
+ * <blockquote><pre>
+ * String result;
+ * StringBuffer buffer = new StringBuffer();
+ * perl.substitute(buffer, expression, input);
+ * result = buffer.toString();
+ * </pre></blockquote>
+ * </ddD>
+ * </dl>
+ * @param expression The Perl5 substitution regular expression.
+ * @param input The input on which to perform substitutions.
+ * @return The input as a String after substitutions have been performed.
+ * @exception MalformedPerl5PatternException If there is an error in
+ * the expression. You are not forced to catch this exception
+ * because it is derived from RuntimeException.
+ * @since 1.0
+ * @see #substitute
+ */
+ public synchronized String substitute(String expression, String input)
+ throws MalformedPerl5PatternException
+ {
+ StringBuffer result = new StringBuffer();
+ substitute(result, expression, input);
+ return result.toString();
+ }
+
/**
* Splits a String into strings that are appended to a List, but no more
* than a specified limit. The String is split using a regular expression
@@ -724,9 +766,8 @@
* <blockquote><pre>
* { "8", "-", "12", ",", "15", ",", "18" }
* </pre></blockquote>
- * The Util.split() method in the
- * OROMatcher<font size="-2"><sup>TM</sup></font> package does NOT
- * implement this particular behavior because it is intended to
+ * The {@link org.apache.oro.text.regex.Util#split Util.split()} method
+ * does NOT implement this particular behavior because it is intended to
* be usable with Pattern instances other than Perl5Pattern.
* <p>
* @param results
@@ -848,9 +889,8 @@
* <blockquote><pre>
* { "8", "-", "12", ",", "15", ",", "18" }
* </pre></blockquote>
- * The Util.split() method in the
- * OROMatcher<font size="-2"><sup>TM</sup></font> package does NOT
- * implement this particular behavior because it is intended to
+ * The {@link org.apache.oro.text.regex.Util#split Util.split()} method
+ * does NOT implement this particular behavior because it is intended to
* be usable with Pattern instances other than Perl5Pattern.
* <p>
* @deprecated Use
@@ -1023,7 +1063,7 @@
/**
- * Returns the part of the input preceding that last match found.
+ * Returns the part of the input preceding the last match found.
* <p>
* @return The part of the input following the last match found.
*/
@@ -1067,7 +1107,7 @@
/**
- * Returns the part of the input following that last match found.
+ * Returns the part of the input following the last match found.
* <p>
* @return The part of the input following the last match found.
*/
@@ -1110,7 +1150,7 @@
/**
- * Returns the part of the input preceding that last match found as a
+ * Returns the part of the input preceding the last match found as a
* char array. This method eliminates the extra
* buffer copying caused by preMatch().toCharArray().
* <p>
@@ -1161,7 +1201,7 @@
/**
- * Returns the part of the input following that last match found as a char
+ * Returns the part of the input following the last match found as a char
* array. This method eliminates the extra buffer copying caused by
* preMatch().toCharArray().
* <p>
1.10 +20 -20 jakarta-oro/src/java/org/apache/oro/text/regex/Util.java
Index: Util.java
===================================================================
RCS file: /home/cvs/jakarta-oro/src/java/org/apache/oro/text/regex/Util.java,v
retrieving revision 1.9
retrieving revision 1.10
diff -u -r1.9 -r1.10
--- Util.java 17 Jan 2002 19:29:15 -0000 1.9
+++ Util.java 19 Feb 2002 04:54:29 -0000 1.10
@@ -58,7 +58,7 @@
*/
/*
- * $Id: Util.java,v 1.9 2002/01/17 19:29:15 dfs Exp $
+ * $Id: Util.java,v 1.10 2002/02/19 04:54:29 dfs Exp $
*/
import java.util.*;
@@ -151,7 +151,7 @@
* regular expression will be used to split the input.
* @param matcher The regular expression matcher to execute the split.
* @param pattern The regular expression to use as a split delimiter.
- * @param input The <code>String</code> to split.
+ * @param input The <code>String</code> to split.
* @param limit The limit on the number of resulting split elements.
* Values <= 0 produce the same behavior as using the
* <b>SPLIT_ALL</b> constant which causes the limit to be
@@ -214,7 +214,7 @@
* occurences are appended.
* @param matcher The regular expression matcher to execute the split.
* @param pattern The regular expression to use as a split delimiter.
- * @param input The <code>String</code> to split.
+ * @param input The <code>String</code> to split.
* @since 2.0
*/
public static void split(Collection results, PatternMatcher matcher,
@@ -309,7 +309,7 @@
* {@link #split(Collection, PatternMatcher, Pattern, String)} instead.
* @param matcher The regular expression matcher to execute the split.
* @param pattern The regular expression to use as a split delimiter.
- * @param input The <code>String</code> to split.
+ * @param input The <code>String</code> to split.
* @return A <code>Vector</code> containing all the substrings of the input
* that occur between the regular expression delimiter occurences.
* @since 1.0
@@ -333,11 +333,11 @@
* @param pattern The regular expression to search for and substitute
* occurrences of.
* @param sub The Substitution used to substitute pattern occurences.
- * @param input The <code>String</code> on which to perform substitutions.
- * @param numSubs The number of substitutions to perform. Only the
- * first <b> numSubs </b> patterns encountered are
- * substituted. If you want to substitute all occurences
- * set this parameter to <b> SUBSTITUTE_ALL </b>.
+ * @param input The <code>String</code> on which to perform substitutions.
+ * @param numSubs The number of substitutions to perform. Only the
+ * first <b> numSubs </b> patterns encountered are
+ * substituted. If you want to substitute all occurences
+ * set this parameter to <b> SUBSTITUTE_ALL </b>.
* @return A String comprising the input string with the substitutions,
* if any, made. If no substitutions are made, the returned String
* is the original input String.
@@ -372,7 +372,7 @@
* @param pattern The regular expression to search for and substitute
* occurrences of.
* @param sub The Substitution used to substitute pattern occurences.
- * @param input The <code>String</code> on which to perform substitutions.
+ * @param input The <code>String</code> on which to perform substitutions.
* @return A String comprising the input string with the substitutions,
* if any, made. If no substitutions are made, the returned String
* is the original input String.
@@ -399,11 +399,11 @@
* @param pattern The regular expression to search for and substitute
* occurrences of.
* @param sub The Substitution used to substitute pattern occurences.
- * @param input The input on which to perform substitutions.
- * @param numSubs The number of substitutions to perform. Only the
- * first <b> numSubs </b> patterns encountered are
- * substituted. If you want to substitute all occurences
- * set this parameter to <b> SUBSTITUTE_ALL </b>.
+ * @param input The input on which to perform substitutions.
+ * @param numSubs The number of substitutions to perform. Only the
+ * first <b> numSubs </b> patterns encountered are
+ * substituted. If you want to substitute all occurences
+ * set this parameter to <b> SUBSTITUTE_ALL </b>.
* @return The number of substitutions made.
* @since 2.0.5
*/
@@ -431,11 +431,11 @@
* @param pattern The regular expression to search for and substitute
* occurrences of.
* @param sub The Substitution used to substitute pattern occurences.
- * @param input The input on which to perform substitutions.
- * @param numSubs The number of substitutions to perform. Only the
- * first <b> numSubs </b> patterns encountered are
- * substituted. If you want to substitute all occurences
- * set this parameter to <b> SUBSTITUTE_ALL </b>.
+ * @param input The input on which to perform substitutions.
+ * @param numSubs The number of substitutions to perform. Only the
+ * first <b> numSubs </b> patterns encountered are
+ * substituted. If you want to substitute all occurences
+ * set this parameter to <b> SUBSTITUTE_ALL </b>.
* @return The number of substitutions made.
* @since 2.0.3
*/
--
To unsubscribe, e-mail: <mailto:[EMAIL PROTECTED]>
For additional commands, e-mail: <mailto:[EMAIL PROTECTED]>