sampler ParseRegexp.java

jsalvata Tue, 25 Nov 2003 02:46:35 -0800

jsalvata    2003/11/25 01:43:17

  Modified:    src/protocol/http/org/apache/jmeter/protocol/http/sampler
                        ParseRegexp.java
  Log:
  Rolling back to ORO as per Stefan Bodwig's comments. Cleaned some unused imports.
  
  Revision  Changes    Path
  1.4       +56 -54    
jakarta-jmeter/src/protocol/http/org/apache/jmeter/protocol/http/sampler/ParseRegexp.java
  
  Index: ParseRegexp.java
  ===================================================================
  RCS file: 
/home/cvs/jakarta-jmeter/src/protocol/http/org/apache/jmeter/protocol/http/sampler/ParseRegexp.java,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- ParseRegexp.java  24 Nov 2003 16:52:51 -0000      1.3
  +++ ParseRegexp.java  25 Nov 2003 09:43:17 -0000      1.4
  @@ -62,13 +62,21 @@
   
   import junit.framework.TestCase;
   
  +import org.apache.jmeter.samplers.Entry;
   import org.apache.jmeter.samplers.SampleResult;
   import org.apache.jorphan.logging.LoggingManager;
   import org.apache.log.Logger;
   
  -import java.util.regex.Pattern;
  -import java.util.regex.Matcher;
  -import java.util.regex.PatternSyntaxException;
  +// NOTE: Also looked at using Java 1.4 regexp instead of ORO. The change was
  +// trivial. Performance did not improve -- at least not significantly.
  +// Finally decided for ORO following advise from Stefan Bodewig (message
  +// to jmeter-dev dated 25 Nov 2003 8:52 CET) [Jordi]
  +import org.apache.oro.text.regex.MatchResult;
  +import org.apache.oro.text.regex.Pattern;
  +import org.apache.oro.text.regex.PatternMatcherInput;
  +import org.apache.oro.text.regex.Perl5Compiler;
  +import org.apache.oro.text.regex.Perl5Matcher;
  +import org.apache.oro.text.regex.MalformedPatternException;
   
   /**
    * Parser class using regular expressions to scan HTML documents for images etc.
  @@ -80,24 +88,37 @@
    *  <li>&lt;img src=<b>url</b> ... &gt;
    *  <li>&lt;script src=<b>url</b> ... &gt;
    *  <li>&lt;applet code=<b>url</b> ... &gt;
  - *  <li>&lt;applet ... codebase=<b>url</b> ... &gt;
    *  <li>&lt;input type=image src=<b>url</b> ... &gt;
    *  <li>&lt;body background=<b>url</b> ... &gt;
    *  <li>&lt;table background=<b>url</b> ... &gt;
    *  <li>&lt;td background=<b>url</b> ... &gt;
    *  <li>&lt;tr background=<b>url</b> ... &gt;
  + *  <li>&lt;applet ... codebase=<b>url</b> ... &gt;
    *  <li>&lt;embed src=<b>url</b> ... &gt;
    *  <li>&lt;embed codebase=<b>url</b> ... &gt;
  + *  <li>&lt;object codebase=<b>url</b> ... &gt;
    * </ul>
    *
    * Note that files that are duplicated within the enclosing document will
    * only be downloaded once.
  - * <p>
  - * This parser takes into account the following tag:
    * <ul>
    *  <li>&lt;base href=<b>url</b>&gt;
    * </ul>
    *
  + * But not the following:
  + * <ul>
  + *  <li>&lt; ... codebase=<b>url</b> ... &gt;
  + * </ul>
  + *
  + * The following parameters are not accounted for either (as the textbooks
  + * say, they are left as an exercise for the interested reader):
  + * <ul>
  + *  <li>&lt;area href=<b>url</b> ... &gt;
  + * </ul>
  + *
  + * <p>
  + * Finally, this class does not process <b>Style Sheets</b> either.
  + *
    * @author Jordi Salvat i Alabart <[EMAIL PROTECTED]>
    * @version $Id$
    */
  @@ -128,46 +149,20 @@
       {
           protected Object initialValue()
           {
  -            return pattern.matcher("");
  +            return new Perl5Matcher();
           }
       };
   
       /**
  -     * CharSequence supported by a byte array. Works on the assumption that
  -     * the character encoding is ISO-Latin1 -- which is not necessarily the case
  -     * but probably OK for the purpose of this ParseRegexp class.
  +     * Thread-local input:
        */
  -    private static class ByteArrayCharSequence implements CharSequence {
  -        byte[] input;
  -        int start, end;
  -        public ByteArrayCharSequence(byte[] input) {
  -            super();
  -            setInput(input);
  -        }
  -        public ByteArrayCharSequence(byte[] input, int start, int end) {
  -            super();
  -            this.input= input;
  -            this.start= start;
  -            this.end= end;
  -        }
  -        public void setInput(byte[] input) {
  -            this.input= input;
  -            start= 0;
  -            end= input.length;
  -        }
  -        public char charAt(int index) {
  -            return (char)input[start+index];
  -        }
  -        public int length() {
  -            return end-start;
  -        }
  -        public CharSequence subSequence(int start, int end) {
  -            return new ByteArrayCharSequence(input, start, end);
  -        }
  -        public String toString() {
  -            return new String(input, start, end-start);
  +    private static ThreadLocal localInput = new ThreadLocal()
  +    {
  +        protected Object initialValue()
  +        {
  +            return new PatternMatcherInput(new char[0]);
           }
  -    }
  +    };
   
       /** Used to store the Logger (used for debug and error messages). */
       transient private static Logger log = LoggingManager.getLoggerForClass();
  @@ -178,15 +173,17 @@
       static {
           // Compile the regular expression:
           try {
  -            pattern= Pattern.compile(REGEXP,
  -                    Pattern.CASE_INSENSITIVE
  -                    |Pattern.DOTALL);
  +            Perl5Compiler c= new Perl5Compiler();
  +            pattern= c.compile(REGEXP,
  +                    c.CASE_INSENSITIVE_MASK
  +                    |c.SINGLELINE_MASK
  +                    |c.READ_ONLY_MASK);
           }
  -        catch(PatternSyntaxException e)
  +        catch(MalformedPatternException mpe)
           {
               log.error("Internal error compiling regular expression in 
ParseRegexp.");
  -            log.error(e.toString());
  -            throw new Error(e);
  +            log.error("MalformedPatterException - " + mpe);
  +            throw new Error(mpe);
           }
       }
   
  @@ -223,13 +220,18 @@
           Set uniqueRLs = new LinkedHashSet();
           
           // Look for unique RLs to be sampled.
  -        Matcher matcher = (Matcher) localMatcher.get();
  -        matcher.reset(new ByteArrayCharSequence(res.getResponseData()));
  -        while (matcher.find()) {
  +        Perl5Matcher matcher = (Perl5Matcher) localMatcher.get();
  +        PatternMatcherInput input = (PatternMatcherInput) localInput.get();
  +        // TODO: find a way to avoid the cost of creating a String here --
  +        // probably a new PatternMatcherInput working on a byte[] would do
  +        // better.
  +        input.setInput(new String(res.getResponseData()));
  +        while (matcher.contains(input, pattern)) {
  +            MatchResult match= matcher.getMatch();
               String s;
  -            if (log.isDebugEnabled()) log.debug("match groups 
"+matcher.groupCount());
  +            if (log.isDebugEnabled()) log.debug("match groups "+match.groups());
               // Check for a BASE HREF:
  -            s= matcher.group(1);
  +            s= match.group(1);
               if (s!=null) {
                   try {
                       baseUrl= new URL(baseUrl, s);
  @@ -246,9 +248,9 @@
                       return res;
                   }
               }
  -            for (int g= 2; g < matcher.groupCount(); g++) {
  -                s= matcher.group(g);
  -                if (log.isDebugEnabled()) log.debug("group "+g+" - "+s);
  +            for (int g= 2; g < match.groups(); g++) {
  +                s= match.group(g);
  +                if (log.isDebugEnabled()) log.debug("group "+g+" - 
"+match.group(g));
                   if (s!=null) uniqueRLs.add(s);
               }
           }


---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

cvs commit: jakarta-jmeter/src/protocol/http/org/apache/jmeter/protocol/http/sampler ParseRegexp.java

Reply via email to