woolfel 2003/10/21 05:58:26
Added: src/protocol/http/org/apache/jmeter/protocol/http/sampler
NewHTTPSamplerFull.java
Log:
the new sampler using htmlparser to get the images from the html
source.
Revision Changes Path
1.1
jakarta-jmeter/src/protocol/http/org/apache/jmeter/protocol/http/sampler/NewHTTPSamplerFull.java
Index: NewHTTPSamplerFull.java
===================================================================
/*
* ====================================================================
* The Apache Software License, Version 1.1
*
* Copyright (c) 2001-2003 The Apache Software Foundation. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. The end-user documentation included with the redistribution,
* if any, must include the following acknowledgment:
* "This product includes software developed by the
* Apache Software Foundation (http://www.apache.org/)."
* Alternately, this acknowledgment may appear in the software itself,
* if and wherever such third-party acknowledgments normally appear.
*
* 4. The names "Apache" and "Apache Software Foundation" and
* "Apache JMeter" must not be used to endorse or promote products
* derived from this software without prior written permission. For
* written permission, please contact [EMAIL PROTECTED]
*
* 5. Products derived from this software may not be called "Apache",
* "Apache JMeter", nor may "Apache" appear in their name, without
* prior written permission of the Apache Software Foundation.
*
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*/
package org.apache.jmeter.protocol.http.sampler;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.StringReader;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.HashSet;
import java.util.Set;
import junit.framework.TestCase;
import org.apache.jmeter.samplers.Entry;
import org.apache.jmeter.samplers.SampleResult;
import org.apache.log.Hierarchy;
import org.apache.log.Logger;
import org.htmlparser.*;
import org.htmlparser.util.*;
import org.htmlparser.scanners.*;
import org.htmlparser.tags.*;
/**
* A sampler that downloads downloadable components such as images, applets, etc.
* <p>
* For HTML files, this class will download binary files specified in the
* following ways (where <b>url</b> represents the binary file to be downloaded):
* <ul>
* <li><img src=<b>url</b> ... >
* <li><applet code=<b>url</b> ... >
* <li><input type=image src=<b>url</b> ... >
* <li><body background=<b>url</b> ... >
* </ul>
*
* Note that files that are duplicated within the enclosing document will
* only be downloaded once. Also, the processing does not take account of the
* following parameters:
* <ul>
* <li><base href=<b>url</b>>
* <li>< ... codebase=<b>url</b> ... >
* </ul>
*
* The following parameters are not accounted for either (as the textbooks
* say, they are left as an exercise for the interested reader):
* <ul>
* <li><applet ... codebase=<b>url</b> ... >
* <li><area href=<b>url</b> ... >
* <li><embed src=<b>url</b> ... >
* <li><embed codebase=<b>url</b> ... >
* <li><object codebase=<b>url</b> ... >
* <li><table background=<b>url</b> ... >
* <li><td background=<b>url</b> ... >
* <li><tr background=<b>url</b> ... >
* </ul>
*
* Due to the recent refactoring of this class, these might not be as difficult
* as they once might have been.
* <p>
* Finally, this class does not process <b>Style Sheets</b> either.
*
* @author Khor Soon Hin,
* <a href="mailto:[EMAIL PROTECTED]">Martin Ramshaw</a>
* @version $Id: NewHTTPSamplerFull.java,v 1.1 2003/10/21 12:58:26 woolfel Exp $
* @created $Date: 2003/10/21 12:58:26 $
*/
public class NewHTTPSamplerFull
{
/** Used to store the Logger (used for debug and error messages). */
transient private static Logger log =
Hierarchy.getDefaultHierarchy().getLoggerFor("jmeter.protocol.http");
/**
* Used to store the UTF encoding name (which is version dependent).
* @see #getUTFEncodingName
*/
protected static String utfEncodingName;
/**
* Used to store the base URL. It should probably be updated to
* respect the following parameter:
* <pre>
* <base href=<b>url</b>>
* </pre>
*/
protected URL baseUrl;
protected HTTPSampler Sampler = null;
/**
* HtmlParser is used instead of Tidy
*/
protected Parser HtmlParser = null;
/**
* This is the only Constructor.
*/
public NewHTTPSamplerFull()
{
super();
log.debug("Start : NewHTTPSamplerFull");
log.debug("End : NewHTTPSamplerFull");
}
/**
* Samples the <code>Entry</code> passed in and stores the result in
* <code>SampleResult</code>. The original file (which is assumed to be
* an HTML file) is parsed into a DOM tree and examined for embedded binary
* files.
* <p>
* Note that files that are duplicated within the enclosing document will
* only be downloaded once.
*
* @param entry an entry to be sampled
* @return results of the sampling
*/
public SampleResult sample(HTTPSampler sampler)
{
Sampler = sampler;
// Sample the container page.
log.debug("Start : NewHTTPSamplerFull sample");
SampleResult res = sampler.sample(new Entry());
if(log.isDebugEnabled())
{
log.debug("Main page loading time - " + res.getTime());
}
// Now parse the HTML page
return parseForImages(res,sampler);
}
/**
* This method is called by HTTPSampler to get the images
* from the HTML and retrieve it from the server. It is
* the entry point for parsing the HTML.
* @param res
* @param sampler
* @return
*/
protected SampleResult parseForImages(SampleResult res,HTTPSampler sampler)
{
String displayName = res.getSampleLabel();
try {
String contents = new String(res.getResponseData());
StringReader reader = new StringReader(contents);
NodeReader nreader = new NodeReader(reader,contents.length());
HtmlParser = new Parser(nreader,new DefaultParserFeedback ());
addTagListeners(HtmlParser);
} catch (Exception e){
log.error("IOException: had problems reading the InputStream");
}
try
{
baseUrl = sampler.getUrl();
if(log.isDebugEnabled())
{
log.error("baseUrl - " + baseUrl.toString());
}
}
catch(MalformedURLException mfue)
{
log.error("Error creating URL '" + displayName + "'");
log.error("MalformedURLException - " + mfue);
res.setResponseData(mfue.toString().getBytes());
res.setResponseCode(HTTPSampler.NON_HTTP_RESPONSE_CODE);
res.setResponseMessage(HTTPSampler.NON_HTTP_RESPONSE_MESSAGE);
res.setSuccessful(false);
mfue.printStackTrace();
return res;
}
// System.out.println("got the baseURL");
// Now parse the DOM tree
// This is used to ignore duplicated binary files.
Set uniqueURLs = new HashSet();
// look for applets
// This will only work with an Applet .class file.
// Ideally, this should be upgraded to work with Objects (IE)
// and archives (.jar and .zip) files as well.
parseNodes(HtmlParser, uniqueURLs, res,sampler);
// Okay, we're all done now
if(log.isDebugEnabled())
{
log.debug("Total time - " + res.getTime());
}
log.debug("End : NewHTTPSamplerFull sample");
return res;
}
/**
* User HTMLParser to get the relevant nodes. This simplifies
* the process and should provide significant performance
* improvements.
*
* @param parser HTMLParser
* @param uniques used to ensure that binary files are only downloaded once
* @param res <code>SampleResult</code> to store sampling results
*/
protected void parseNodes(Parser parser, Set uniques,
SampleResult res,HTTPSampler sampler)
{
// Okay, we're all done now
if(log.isDebugEnabled())
{
log.debug("Start : NewHTTPSamplerFull");
}
boolean uniqueBinary;
SampleResult binRes = null;
try {
// we start to iterate through the elements
for(NodeIterator e = parser.elements(); e.hasMoreNodes();)
{
uniqueBinary = true;
Node node = e.nextNode();
String binUrlStr = null;
// first we check to see if body tag has a
// background set and we set the NodeIterator
// to the child elements inside the body
if (node instanceof BodyTag){
BodyTag body = (BodyTag)node;
binUrlStr = body.getAttribute("background");
// if the body tag exists, we get the elements
// within the body tag. if we don't we won't
// see the body of the page. The only catch
// with this is if there are images after the
// closing body tag, it won't get parsed. If
// someone puts it outside the body tag, it
// is probably a mistake. Plus it's bad to
// have important content after the closing
// body tag. Peter Lin 10-9-03
e = body.elements();
} else if (node instanceof ImageTag){
ImageTag image = (ImageTag)node;
binUrlStr = image.getImageURL();
} else if (node instanceof AppletTag){
AppletTag applet = (AppletTag)node;
binUrlStr = applet.getAppletClass();
} else if (node instanceof InputTag){
InputTag input = (InputTag)node;
// we check the input tag type for image
String strType = input.getAttribute("type");
if(strType != null &&
strType.equalsIgnoreCase("image"))
{
// then we need to download the binary
binUrlStr = input.getAttribute("src");
}
}
binRes = new SampleResult();
if(binUrlStr == null)
{
continue;
}
// set the baseUrl and binUrl so that if error occurs
// due to MalformedException then at least the values
will be
// visible to the user to aid correction
binRes.setSampleLabel(baseUrl + "," + binUrlStr);
// download the binary
URL binUrl = null;
try
{
binUrl = new URL(baseUrl, binUrlStr);
}
catch(MalformedURLException mfue)
{
log.error("Error creating URL '" + baseUrl +
" , " + binUrlStr + "'");
log.error("MalformedURLException - " + mfue);
binRes.setResponseData(mfue.toString().getBytes());
binRes.setResponseCode(HTTPSampler.NON_HTTP_RESPONSE_CODE);
binRes.setResponseMessage(HTTPSampler.NON_HTTP_RESPONSE_MESSAGE);
binRes.setSuccessful(false);
res.addSubResult(binRes);
break;
}
if(log.isDebugEnabled())
{
log.debug("Binary url - " + binUrlStr);
log.debug("Full Binary url - " + binUrl);
}
binRes.setSampleLabel(binUrl.toString());
uniqueBinary = uniques.add(binUrl.toString());
if (uniqueBinary)
{
// a browser should be smart enough to *not*
download
// a binary file that it already has in its
cache.
try
{
loadBinary(binUrl, binRes,sampler);
}
catch(Exception ioe)
{
log.error("Error reading from URL - "
+ ioe);
binRes.setResponseData(ioe.toString().getBytes());
binRes.setResponseCode(HTTPSampler.NON_HTTP_RESPONSE_CODE);
binRes.setResponseMessage(HTTPSampler.NON_HTTP_RESPONSE_MESSAGE);
binRes.setSuccessful(false);
}
log.debug("Adding result");
res.addSubResult(binRes);
res.setTime(res.getTime() + binRes.getTime());
}
else
{
if(log.isDebugEnabled())
{
log.debug("Skipping duplicate - " +
binUrl);
}
}
}
log.debug("End : NewHTTPSamplerFull parseNodes");
}
catch (ParserException e){
}
}
/**
* Download the binary file from the given <code>URL</code>.
*
* @param url <code>URL</code> from where binary is to be downloaded
* @param res <code>SampleResult</code> to store sampling results
* @return binary downloaded
*
* @throws IOException indicates a problem reading from the URL
*/
protected byte[] loadBinary(URL url, SampleResult res,HTTPSampler sampler)
throws Exception
{
log.debug("Start : loadBinary");
byte[] ret = new byte[0];
res.setSamplerData(new HTTPSampler(url).toString());
HttpURLConnection conn;
try
{
conn = sampler.setupConnection(url, HTTPSampler.GET,res);
sampler.connect();
}
catch(Exception ioe)
{
// don't do anything 'cos presumably the connection will return the
// correct http response codes
if(log.isDebugEnabled())
{
log.debug("loadBinary : error in setupConnection " + ioe);
}
throw ioe;
}
try
{
long time = System.currentTimeMillis();
if(log.isDebugEnabled())
{
log.debug("loadBinary : start time - " + time);
}
int errorLevel = getErrorLevel(conn, res);
if (errorLevel == 2)
{
ret = sampler.readResponse(conn);
res.setSuccessful(true);
long endTime = System.currentTimeMillis();
if(log.isDebugEnabled())
{
log.debug("loadBinary : end time - " + endTime);
}
res.setTime(endTime - time);
}
else
{
res.setSuccessful(false);
int responseCode =
((HttpURLConnection)conn).getResponseCode();
String responseMsg =
((HttpURLConnection)conn).getResponseMessage();
log.error("loadBinary : failed code - " + responseCode);
log.error("loadBinary : failed message - " + responseMsg);
}
if(log.isDebugEnabled())
{
log.debug("loadBinary : binary - " + ret[0]+ret[1]);
log.debug("loadBinary : loadTime - " + res.getTime());
}
log.debug("End : loadBinary");
res.setResponseData(ret);
res.setDataType(SampleResult.BINARY);
return ret;
}
finally
{
try
{
// the server can request that the connection be closed,
// but if we requested that the server close the connection
// the server should echo back our 'close' request.
// Otherwise, let the server disconnect the connection
// when its timeout period is reached.
sampler.disconnect(conn);
}
catch(Exception e)
{
}
}
}
/**
* Get the response code of the URL connection and divide it by 100 thus
* returning 2 (for 2xx response codes), 3 (for 3xx reponse codes), etc.
*
* @param conn <code>HttpURLConnection</code> of URL request
* @param res where all results of sampling will be stored
* @return HTTP response code divided by 100
*/
protected int getErrorLevel(HttpURLConnection conn, SampleResult res)
{
log.debug("Start : getErrorLevel");
int errorLevel = 2;
try
{
int responseCode =
((HttpURLConnection) conn).getResponseCode();
String responseMessage =
((HttpURLConnection) conn).getResponseMessage();
errorLevel = responseCode/100;
res.setResponseCode(String.valueOf(responseCode));
res.setResponseMessage(responseMessage);
if(log.isDebugEnabled())
{
log.debug("getErrorLevel : responseCode - " +
responseCode);
log.debug("getErrorLevel : responseMessage - " +
responseMessage);
}
}
catch (Exception e2)
{
log.error("getErrorLevel : " + conn.getHeaderField(0));
log.error("getErrorLevel : " + conn.getHeaderFieldKey(0));
log.error("getErrorLevel : " +
"Error getting response code for HttpUrlConnection - ",e2);
res.setResponseData(e2.toString().getBytes());
res.setResponseCode(HTTPSampler.NON_HTTP_RESPONSE_CODE);
res.setResponseMessage(HTTPSampler.NON_HTTP_RESPONSE_MESSAGE);
res.setSuccessful(false);
}
log.debug("End : getErrorLevel");
return errorLevel;
}
/**
* Returns a node representing a whole xml given an xml document.
*
* @param text an xml document
* @return a node representing a whole xml
*
* @throws SAXException indicates an error parsing the xml document
*/
protected static void addTagListeners(Parser parser)
{
log.debug("Start : addTagListeners");
// add body tag scanner
parser.addScanner(new BodyScanner());
// add ImageTag scanner
LinkScanner linkScanner = new LinkScanner(LinkTag.LINK_TAG_FILTER);
// parser.addScanner(linkScanner);
parser.addScanner(linkScanner.createImageScanner(ImageTag.IMAGE_TAG_FILTER));
// add input tag scanner
parser.addScanner(new InputTagScanner());
// add applet tag scanner
parser.addScanner(new AppletScanner());
}
/**
* Returns the encoding type which is different for different jdks
* even though they mean the same thing (for example, UTF8 or UTF-8).
*
* @return either UTF8 or UTF-8 depending on the jdk version
* @see #utfEncodingName
*/
protected static String getUTFEncodingName()
{
log.debug("Start : getUTFEncodingName");
if (utfEncodingName == null)
{
String versionNum = System.getProperty( "java.version" );
if(log.isDebugEnabled())
{
log.debug("getUTFEncodingName : version = " + versionNum);
}
if (versionNum.startsWith( "1.1" ))
{
utfEncodingName = "UTF8";
}
else
{
utfEncodingName = "UTF-8";
}
}
if(log.isDebugEnabled())
{
log.debug("getUTFEncodingName : Encoding = " + utfEncodingName);
}
log.debug("End : getUTFEncodingName");
return utfEncodingName;
}
public static class Test extends TestCase
{
private HTTPSampler hsf;
transient private static Logger log =
Hierarchy.getDefaultHierarchy().getLoggerFor("jmeter.test");
public Test(String name)
{
super(name);
}
protected void setUp()
{
log.debug("Start : setUp1");
hsf = new HTTPSampler();
hsf.setMethod(HTTPSampler.GET);
hsf.setProtocol("file");
hsf.setPath("HTTPSamplerFullTestFile.txt");
hsf.setImageParser(true);
log.debug("End : setUp1");
}
public void testGetUTFEncodingName()
{
log.debug("Start : testGetUTFEncodingName");
String javaVersion = System.getProperty("java.version");
System.setProperty("java.version", "1.1");
assertEquals("UTF8", NewHTTPSamplerFull.getUTFEncodingName());
// need to clear utfEncodingName variable first 'cos
// getUTFEncodingName checks to see if it's null
utfEncodingName = null;
System.setProperty("java.version", "1.2");
assertEquals("UTF-8", NewHTTPSamplerFull.getUTFEncodingName());
System.setProperty("java.version", javaVersion);
log.debug("End : testGetUTFEncodingName");
}
public void testGetUrlConfig()
{
log.debug("Start : testGetUrlConfig");
assertEquals(HTTPSampler.GET, hsf.getMethod());
assertEquals("file", hsf.getProtocol());
assertEquals("HTTPSamplerFullTestFile.txt", hsf.getPath());
log.debug("End : testGetUrlConfig");
}
// Can't think of a self-contained way to test this 'cos it requires
// http server. Tried to use file:// but HTTPSampler's sample
// specifically requires http.
public void testSampleMain()
{
log.debug("Start : testSampleMain");
// !ToDo : Have to wait till the day SampleResult is extended to
// store results of all downloaded stuff e.g. images, applets etc
String fileInput = "<html>\n\n" +
"<title>\n" +
" A simple applet\n" +
"</title>\n" +
"<body background=\"back.jpg\" vlink=\"#dd0000\" "+
"link=\"#0000ff\">\n" +
"<center>\n" +
"<h2> A simple applet\n" +
"</h2>\n" +
"<br>\n" +
"<br>\n" +
"<table>\n" +
"<td width = 20>\n" +
"<td width = 500 align = left>\n" +
"<img src=\"/tomcat.gif\">\n" +
"<img src=\"/tomcat.gif\">\n" +
"<a href=\"NervousText.java\"> Read my code <a>\n" +
"<p><applet code=NervousText.class width=400 height=200>\n" +
"</applet>\n" +
"<p><applet code=NervousText.class width=400 height=200>\n" +
"</applet>\n" +
"</table>\n" +
"<form>\n" +
" <input type=\"image\" src=\"/tomcat-power.gif\">\n" +
"</form>\n" +
"<form>\n" +
" <input type=\"image\" src=\"/tomcat-power.gif\">\n" +
"</form>\n" +
"</body>\n" +
"</html>\n";
byte[] bytes = fileInput.getBytes();
try
{
FileOutputStream fos =
new FileOutputStream("HTTPSamplerFullTestFile.txt");
fos.write(bytes);
fos.close();
}
catch(IOException ioe)
{
fail("Cannot create HTTPSamplerFullTestFile.txt in current directory
" +
"for testing - " + ioe);
}
// !ToDo
// hsf.sample(entry);
assertNull("Cannot think of way to test sample", null);
log.debug("End : testSampleMain");
}
protected void tearDown()
{
log.debug("Start : tearDown");
hsf = null;
log.debug("End : tearDown");
}
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]