jsalvata 2003/11/25 01:43:17
Modified: src/protocol/http/org/apache/jmeter/protocol/http/sampler
ParseRegexp.java
Log:
Rolling back to ORO as per Stefan Bodwig's comments. Cleaned some unused imports.
Revision Changes Path
1.4 +56 -54
jakarta-jmeter/src/protocol/http/org/apache/jmeter/protocol/http/sampler/ParseRegexp.java
Index: ParseRegexp.java
===================================================================
RCS file:
/home/cvs/jakarta-jmeter/src/protocol/http/org/apache/jmeter/protocol/http/sampler/ParseRegexp.java,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -r1.3 -r1.4
--- ParseRegexp.java 24 Nov 2003 16:52:51 -0000 1.3
+++ ParseRegexp.java 25 Nov 2003 09:43:17 -0000 1.4
@@ -62,13 +62,21 @@
import junit.framework.TestCase;
+import org.apache.jmeter.samplers.Entry;
import org.apache.jmeter.samplers.SampleResult;
import org.apache.jorphan.logging.LoggingManager;
import org.apache.log.Logger;
-import java.util.regex.Pattern;
-import java.util.regex.Matcher;
-import java.util.regex.PatternSyntaxException;
+// NOTE: Also looked at using Java 1.4 regexp instead of ORO. The change was
+// trivial. Performance did not improve -- at least not significantly.
+// Finally decided for ORO following advise from Stefan Bodewig (message
+// to jmeter-dev dated 25 Nov 2003 8:52 CET) [Jordi]
+import org.apache.oro.text.regex.MatchResult;
+import org.apache.oro.text.regex.Pattern;
+import org.apache.oro.text.regex.PatternMatcherInput;
+import org.apache.oro.text.regex.Perl5Compiler;
+import org.apache.oro.text.regex.Perl5Matcher;
+import org.apache.oro.text.regex.MalformedPatternException;
/**
* Parser class using regular expressions to scan HTML documents for images etc.
@@ -80,24 +88,37 @@
* <li><img src=<b>url</b> ... >
* <li><script src=<b>url</b> ... >
* <li><applet code=<b>url</b> ... >
- * <li><applet ... codebase=<b>url</b> ... >
* <li><input type=image src=<b>url</b> ... >
* <li><body background=<b>url</b> ... >
* <li><table background=<b>url</b> ... >
* <li><td background=<b>url</b> ... >
* <li><tr background=<b>url</b> ... >
+ * <li><applet ... codebase=<b>url</b> ... >
* <li><embed src=<b>url</b> ... >
* <li><embed codebase=<b>url</b> ... >
+ * <li><object codebase=<b>url</b> ... >
* </ul>
*
* Note that files that are duplicated within the enclosing document will
* only be downloaded once.
- * <p>
- * This parser takes into account the following tag:
* <ul>
* <li><base href=<b>url</b>>
* </ul>
*
+ * But not the following:
+ * <ul>
+ * <li>< ... codebase=<b>url</b> ... >
+ * </ul>
+ *
+ * The following parameters are not accounted for either (as the textbooks
+ * say, they are left as an exercise for the interested reader):
+ * <ul>
+ * <li><area href=<b>url</b> ... >
+ * </ul>
+ *
+ * <p>
+ * Finally, this class does not process <b>Style Sheets</b> either.
+ *
* @author Jordi Salvat i Alabart <[EMAIL PROTECTED]>
* @version $Id$
*/
@@ -128,46 +149,20 @@
{
protected Object initialValue()
{
- return pattern.matcher("");
+ return new Perl5Matcher();
}
};
/**
- * CharSequence supported by a byte array. Works on the assumption that
- * the character encoding is ISO-Latin1 -- which is not necessarily the case
- * but probably OK for the purpose of this ParseRegexp class.
+ * Thread-local input:
*/
- private static class ByteArrayCharSequence implements CharSequence {
- byte[] input;
- int start, end;
- public ByteArrayCharSequence(byte[] input) {
- super();
- setInput(input);
- }
- public ByteArrayCharSequence(byte[] input, int start, int end) {
- super();
- this.input= input;
- this.start= start;
- this.end= end;
- }
- public void setInput(byte[] input) {
- this.input= input;
- start= 0;
- end= input.length;
- }
- public char charAt(int index) {
- return (char)input[start+index];
- }
- public int length() {
- return end-start;
- }
- public CharSequence subSequence(int start, int end) {
- return new ByteArrayCharSequence(input, start, end);
- }
- public String toString() {
- return new String(input, start, end-start);
+ private static ThreadLocal localInput = new ThreadLocal()
+ {
+ protected Object initialValue()
+ {
+ return new PatternMatcherInput(new char[0]);
}
- }
+ };
/** Used to store the Logger (used for debug and error messages). */
transient private static Logger log = LoggingManager.getLoggerForClass();
@@ -178,15 +173,17 @@
static {
// Compile the regular expression:
try {
- pattern= Pattern.compile(REGEXP,
- Pattern.CASE_INSENSITIVE
- |Pattern.DOTALL);
+ Perl5Compiler c= new Perl5Compiler();
+ pattern= c.compile(REGEXP,
+ c.CASE_INSENSITIVE_MASK
+ |c.SINGLELINE_MASK
+ |c.READ_ONLY_MASK);
}
- catch(PatternSyntaxException e)
+ catch(MalformedPatternException mpe)
{
log.error("Internal error compiling regular expression in
ParseRegexp.");
- log.error(e.toString());
- throw new Error(e);
+ log.error("MalformedPatterException - " + mpe);
+ throw new Error(mpe);
}
}
@@ -223,13 +220,18 @@
Set uniqueRLs = new LinkedHashSet();
// Look for unique RLs to be sampled.
- Matcher matcher = (Matcher) localMatcher.get();
- matcher.reset(new ByteArrayCharSequence(res.getResponseData()));
- while (matcher.find()) {
+ Perl5Matcher matcher = (Perl5Matcher) localMatcher.get();
+ PatternMatcherInput input = (PatternMatcherInput) localInput.get();
+ // TODO: find a way to avoid the cost of creating a String here --
+ // probably a new PatternMatcherInput working on a byte[] would do
+ // better.
+ input.setInput(new String(res.getResponseData()));
+ while (matcher.contains(input, pattern)) {
+ MatchResult match= matcher.getMatch();
String s;
- if (log.isDebugEnabled()) log.debug("match groups
"+matcher.groupCount());
+ if (log.isDebugEnabled()) log.debug("match groups "+match.groups());
// Check for a BASE HREF:
- s= matcher.group(1);
+ s= match.group(1);
if (s!=null) {
try {
baseUrl= new URL(baseUrl, s);
@@ -246,9 +248,9 @@
return res;
}
}
- for (int g= 2; g < matcher.groupCount(); g++) {
- s= matcher.group(g);
- if (log.isDebugEnabled()) log.debug("group "+g+" - "+s);
+ for (int g= 2; g < match.groups(); g++) {
+ s= match.group(g);
+ if (log.isDebugEnabled()) log.debug("group "+g+" -
"+match.group(g));
if (s!=null) uniqueRLs.add(s);
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]