sebb 2004/02/08 15:52:37
Modified: src/protocol/http/org/apache/jmeter/protocol/http/parser
JTidyHTMLParser.java HtmlParserHTMLParser.java
RegexpHTMLParser.java HTMLParser.java
Log:
Convert parsers to use new URLString/URLCollection classes
Revision Changes Path
1.9 +8 -35
jakarta-jmeter/src/protocol/http/org/apache/jmeter/protocol/http/parser/JTidyHTMLParser.java
Index: JTidyHTMLParser.java
===================================================================
RCS file:
/home/cvs/jakarta-jmeter/src/protocol/http/org/apache/jmeter/protocol/http/parser/JTidyHTMLParser.java,v
retrieving revision 1.8
retrieving revision 1.9
diff -u -r1.8 -r1.9
--- JTidyHTMLParser.java 12 Dec 2003 02:48:47 -0000 1.8
+++ JTidyHTMLParser.java 8 Feb 2004 23:52:37 -0000 1.9
@@ -61,7 +61,6 @@
import java.io.ByteArrayInputStream;
import java.net.MalformedURLException;
import java.net.URL;
-import java.util.Collection;
import java.util.Iterator;
import org.apache.jorphan.logging.LoggingManager;
@@ -94,7 +93,7 @@
/* (non-Javadoc)
* @see
org.apache.jmeter.protocol.http.parser.HTMLParser#getEmbeddedResourceURLs(byte[],
java.net.URL)
*/
- public Iterator getEmbeddedResourceURLs(byte[] html, URL baseUrl, Collection
urls)
+ public Iterator getEmbeddedResourceURLs(byte[] html, URL baseUrl, URLCollection
urls)
throws HTMLParseException
{
Document dom = null;
@@ -122,7 +121,7 @@
*
* @return new base URL
*/
- private URL scanNodes(Node node, Collection urls, URL baseUrl) throws
HTMLParseException
+ private URL scanNodes(Node node, URLCollection urls, URL baseUrl) throws
HTMLParseException
{
if ( node == null ) {
return baseUrl;
@@ -157,13 +156,13 @@
if (name.equalsIgnoreCase("img"))
{
- addURL(urls,getValue(attrs,"src"),baseUrl);
+ urls.addURL(getValue(attrs,"src"),baseUrl);
break;
}
if (name.equalsIgnoreCase("applet"))
{
- addURL(urls,getValue(attrs,"code"),baseUrl);
+ urls.addURL(getValue(attrs,"code"),baseUrl);
break;
}
if (name.equalsIgnoreCase("input"))
@@ -171,18 +170,18 @@
String src=getValue(attrs,"src");
String typ=getValue(attrs,"type");
if ((src!=null) &&(typ.equalsIgnoreCase("image")) ){
- addURL(urls,src,baseUrl);
+ urls.addURL(src,baseUrl);
}
break;
}
if (name.equalsIgnoreCase("link"))
{
- addURL(urls,getValue(attrs,"href"),baseUrl);
+ urls.addURL(getValue(attrs,"href"),baseUrl);
break;
}
String back=getValue(attrs,"background");
if (back != null){
- addURL(urls,back,baseUrl);
+ urls.addURL(back,baseUrl);
break;
}
@@ -218,32 +217,6 @@
return v;
}
- /*
- * Helper method to create and add a URL, if non-null
- * @param urls - set
- * @param url - may be null
- * @param baseUrl
- */
- private void addURL(Collection urls, String url, URL baseUrl)
- {
- if (url == null) return;
- boolean b=false;
- try
- {
- b=urls.add(new URL(baseUrl, url));
- }
- catch(MalformedURLException mfue)
- {
- // Can't build the URL. May be a site error: return
- // the string.
- b=urls.add(url);
- }
- if (b) {
- log.debug("Added "+url);
- } else {
- log.debug("Skipped "+url);
- }
- }
/**
* Returns <code>tidy</code> as HTML parser.
*
1.9 +3 -13
jakarta-jmeter/src/protocol/http/org/apache/jmeter/protocol/http/parser/HtmlParserHTMLParser.java
Index: HtmlParserHTMLParser.java
===================================================================
RCS file:
/home/cvs/jakarta-jmeter/src/protocol/http/org/apache/jmeter/protocol/http/parser/HtmlParserHTMLParser.java,v
retrieving revision 1.8
retrieving revision 1.9
diff -u -r1.8 -r1.9
--- HtmlParserHTMLParser.java 12 Dec 2003 02:48:47 -0000 1.8
+++ HtmlParserHTMLParser.java 8 Feb 2004 23:52:37 -0000 1.9
@@ -61,7 +61,6 @@
import java.io.StringReader;
import java.net.MalformedURLException;
import java.net.URL;
-import java.util.Collection;
import java.util.Iterator;
import org.apache.jorphan.logging.LoggingManager;
@@ -92,7 +91,7 @@
/* (non-Javadoc)
* @see
org.apache.jmeter.protocol.http.parser.HtmlParser#getEmbeddedResourceURLs(byte[],
java.net.URL)
*/
- public Iterator getEmbeddedResourceURLs(byte[] html, URL baseUrl, Collection
urls)
+ public Iterator getEmbeddedResourceURLs(byte[] html, URL baseUrl, URLCollection
urls)
throws HTMLParseException
{
Parser htmlParser= null;
@@ -188,16 +187,7 @@
continue;
}
- try
- {
- urls.add(new URL(baseUrl, binUrlStr));
- }
- catch (MalformedURLException mfue)
- {
- // Can't build the URL? May be a site error: return the
- // string.
- urls.add(binUrlStr);
- }
+ urls.addURL(binUrlStr,baseUrl);
}
log.debug("End : parseNodes");
}
1.14 +3 -21
jakarta-jmeter/src/protocol/http/org/apache/jmeter/protocol/http/parser/RegexpHTMLParser.java
Index: RegexpHTMLParser.java
===================================================================
RCS file:
/home/cvs/jakarta-jmeter/src/protocol/http/org/apache/jmeter/protocol/http/parser/RegexpHTMLParser.java,v
retrieving revision 1.13
retrieving revision 1.14
diff -u -r1.13 -r1.14
--- RegexpHTMLParser.java 7 Jan 2004 00:42:41 -0000 1.13
+++ RegexpHTMLParser.java 8 Feb 2004 23:52:37 -0000 1.14
@@ -59,7 +59,6 @@
import java.net.MalformedURLException;
import java.net.URL;
-import java.util.Collection;
import java.util.Iterator;
import org.apache.jorphan.logging.LoggingManager;
@@ -218,7 +217,7 @@
/* (non-Javadoc)
* @see
org.apache.jmeter.protocol.http.parser.HtmlParser#getEmbeddedResourceURLs(byte[],
java.net.URL)
*/
- public Iterator getEmbeddedResourceURLs(byte[] html, URL baseUrl, Collection
urls)
+ public Iterator getEmbeddedResourceURLs(byte[] html, URL baseUrl, URLCollection
urls)
{
Perl5Matcher matcher= (Perl5Matcher)localMatcher.get();
@@ -272,24 +271,7 @@
}
if (s != null)
{
- try
- {
- urls.add(new URL(baseUrl, s));
- }
- catch (MalformedURLException e)
- {
- // Doesn't even look like a URL? It may be a site
- // error: return the string.
- if (log.isDebugEnabled())
- {
- log.debug(
- "Can't build URL from RL "
- + s
- + " in page "
- + baseUrl);
- }
- urls.add(s);
- }
+ urls.addURL(s,baseUrl);
}
}
}
1.19 +7 -5
jakarta-jmeter/src/protocol/http/org/apache/jmeter/protocol/http/parser/HTMLParser.java
Index: HTMLParser.java
===================================================================
RCS file:
/home/cvs/jakarta-jmeter/src/protocol/http/org/apache/jmeter/protocol/http/parser/HTMLParser.java,v
retrieving revision 1.18
retrieving revision 1.19
diff -u -r1.18 -r1.19
--- HTMLParser.java 8 Feb 2004 17:28:39 -0000 1.18
+++ HTMLParser.java 8 Feb 2004 23:52:37 -0000 1.19
@@ -182,7 +182,7 @@
col = new java.util.HashSet(); //TODO: improve JDK1.3
solution
}
- return getEmbeddedResourceURLs(html, baseUrl,col);
+ return getEmbeddedResourceURLs(html, baseUrl,new
URLCollection(col));
// An additional note on using HashSets to store URLs: I just
// discovered that obtaining the hashCode of a java.net.URL implies
@@ -192,7 +192,9 @@
// thought I'd keep a note just in case...
// BTW, note that using a Vector and removing duplicates via scan
// would not help, since URL.equals requires name resolution too.
- // TODO: maybe change the API to return URL Strings instead of
java.net.URLs?
+ // The above problem has now been addressed with the URLString and
+ // URLCollection classes.
+
}
// See whether we can use LinkedHashSet or not:
@@ -229,7 +231,7 @@
* @return an Iterator for the resource URLs
*/
public abstract Iterator getEmbeddedResourceURLs(byte[] html, URL baseUrl,
- Collection coll)
+ URLCollection coll)
throws HTMLParseException;
@@ -446,7 +448,7 @@
if (c == null) {
result = p.getEmbeddedResourceURLs(buffer,new
URL(url));
} else {
- result = p.getEmbeddedResourceURLs(buffer,new URL(url),c);
+ result = p.getEmbeddedResourceURLs(buffer,new URL(url),new
URLCollection(c));
}
/*
* TODO:
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]