jsalvata 2003/11/24 08:52:51
Modified: src/protocol/http/org/apache/jmeter/protocol/http/sampler
ParseRegexp.java
Log:
Use java.util.regex instead of ORO classes. Performance is similar, but some memory
savings are easier and it's much simpler.
Revision Changes Path
1.3 +55 -60
jakarta-jmeter/src/protocol/http/org/apache/jmeter/protocol/http/sampler/ParseRegexp.java
Index: ParseRegexp.java
===================================================================
RCS file:
/home/cvs/jakarta-jmeter/src/protocol/http/org/apache/jmeter/protocol/http/sampler/ParseRegexp.java,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- ParseRegexp.java 24 Nov 2003 02:03:19 -0000 1.2
+++ ParseRegexp.java 24 Nov 2003 16:52:51 -0000 1.3
@@ -54,11 +54,6 @@
*/
package org.apache.jmeter.protocol.http.sampler;
-import java.io.ByteArrayInputStream;
-import java.io.FileOutputStream;
-import java.io.IOException;
-import java.io.UnsupportedEncodingException;
-import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Set;
@@ -67,19 +62,13 @@
import junit.framework.TestCase;
-import org.apache.jmeter.samplers.Entry;
import org.apache.jmeter.samplers.SampleResult;
import org.apache.jorphan.logging.LoggingManager;
import org.apache.log.Logger;
-// NOTE: Also looked at using Java 1.4 regexp instead of ORO. The change was
-// trivial. Performance did not improve -- at least not significantly. [Jordi]
-import org.apache.oro.text.regex.MatchResult;
-import org.apache.oro.text.regex.Pattern;
-import org.apache.oro.text.regex.PatternMatcherInput;
-import org.apache.oro.text.regex.Perl5Compiler;
-import org.apache.oro.text.regex.Perl5Matcher;
-import org.apache.oro.text.regex.MalformedPatternException;
+import java.util.regex.Pattern;
+import java.util.regex.Matcher;
+import java.util.regex.PatternSyntaxException;
/**
* Parser class using regular expressions to scan HTML documents for images etc.
@@ -91,37 +80,24 @@
* <li><img src=<b>url</b> ... >
* <li><script src=<b>url</b> ... >
* <li><applet code=<b>url</b> ... >
+ * <li><applet ... codebase=<b>url</b> ... >
* <li><input type=image src=<b>url</b> ... >
* <li><body background=<b>url</b> ... >
* <li><table background=<b>url</b> ... >
* <li><td background=<b>url</b> ... >
* <li><tr background=<b>url</b> ... >
- * <li><applet ... codebase=<b>url</b> ... >
* <li><embed src=<b>url</b> ... >
* <li><embed codebase=<b>url</b> ... >
- * <li><object codebase=<b>url</b> ... >
* </ul>
*
* Note that files that are duplicated within the enclosing document will
* only be downloaded once.
+ * <p>
+ * This parser takes into account the following tag:
* <ul>
* <li><base href=<b>url</b>>
* </ul>
*
- * But not the following:
- * <ul>
- * <li>< ... codebase=<b>url</b> ... >
- * </ul>
- *
- * The following parameters are not accounted for either (as the textbooks
- * say, they are left as an exercise for the interested reader):
- * <ul>
- * <li><area href=<b>url</b> ... >
- * </ul>
- *
- * <p>
- * Finally, this class does not process <b>Style Sheets</b> either.
- *
* @author Jordi Salvat i Alabart <[EMAIL PROTECTED]>
* @version $Id$
*/
@@ -152,20 +128,46 @@
{
protected Object initialValue()
{
- return new Perl5Matcher();
+ return pattern.matcher("");
}
};
/**
- * Thread-local input:
+ * CharSequence supported by a byte array. Works on the assumption that
+ * the character encoding is ISO-Latin1 -- which is not necessarily the case
+ * but probably OK for the purpose of this ParseRegexp class.
*/
- private static ThreadLocal localInput = new ThreadLocal()
- {
- protected Object initialValue()
- {
- return new PatternMatcherInput(new char[0]);
+ private static class ByteArrayCharSequence implements CharSequence {
+ byte[] input;
+ int start, end;
+ public ByteArrayCharSequence(byte[] input) {
+ super();
+ setInput(input);
+ }
+ public ByteArrayCharSequence(byte[] input, int start, int end) {
+ super();
+ this.input= input;
+ this.start= start;
+ this.end= end;
+ }
+ public void setInput(byte[] input) {
+ this.input= input;
+ start= 0;
+ end= input.length;
+ }
+ public char charAt(int index) {
+ return (char)input[start+index];
}
- };
+ public int length() {
+ return end-start;
+ }
+ public CharSequence subSequence(int start, int end) {
+ return new ByteArrayCharSequence(input, start, end);
+ }
+ public String toString() {
+ return new String(input, start, end-start);
+ }
+ }
/** Used to store the Logger (used for debug and error messages). */
transient private static Logger log = LoggingManager.getLoggerForClass();
@@ -176,17 +178,15 @@
static {
// Compile the regular expression:
try {
- Perl5Compiler c= new Perl5Compiler();
- pattern= c.compile(REGEXP,
- c.CASE_INSENSITIVE_MASK
- |c.SINGLELINE_MASK
- |c.READ_ONLY_MASK);
+ pattern= Pattern.compile(REGEXP,
+ Pattern.CASE_INSENSITIVE
+ |Pattern.DOTALL);
}
- catch(MalformedPatternException mpe)
+ catch(PatternSyntaxException e)
{
log.error("Internal error compiling regular expression in
ParseRegexp.");
- log.error("MalformedPatterException - " + mpe);
- throw new Error(mpe);
+ log.error(e.toString());
+ throw new Error(e);
}
}
@@ -223,18 +223,13 @@
Set uniqueRLs = new LinkedHashSet();
// Look for unique RLs to be sampled.
- Perl5Matcher matcher = (Perl5Matcher) localMatcher.get();
- PatternMatcherInput input = (PatternMatcherInput) localInput.get();
- // TODO: find a way to avoid the cost of creating a String here --
- // probably a new PatternMatcherInput working on a byte[] would do
- // better.
- input.setInput(new String(res.getResponseData()));
- while (matcher.contains(input, pattern)) {
- MatchResult match= matcher.getMatch();
+ Matcher matcher = (Matcher) localMatcher.get();
+ matcher.reset(new ByteArrayCharSequence(res.getResponseData()));
+ while (matcher.find()) {
String s;
- if (log.isDebugEnabled()) log.debug("match groups "+match.groups());
+ if (log.isDebugEnabled()) log.debug("match groups
"+matcher.groupCount());
// Check for a BASE HREF:
- s= match.group(1);
+ s= matcher.group(1);
if (s!=null) {
try {
baseUrl= new URL(baseUrl, s);
@@ -251,9 +246,9 @@
return res;
}
}
- for (int g= 2; g < match.groups(); g++) {
- s= match.group(g);
- if (log.isDebugEnabled()) log.debug("group "+g+" -
"+match.group(g));
+ for (int g= 2; g < matcher.groupCount(); g++) {
+ s= matcher.group(g);
+ if (log.isDebugEnabled()) log.debug("group "+g+" - "+s);
if (s!=null) uniqueRLs.add(s);
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]