vgritsenko 2002/08/16 21:10:12
Modified: src/java/org/apache/cocoon/components/crawler Tag:
cocoon_2_0_3_branch SimpleCocoonCrawlerImpl.java
Log:
sync with head (fix NPE, close reader)
Revision Changes Path
No revision
No revision
1.9.2.2 +35 -20
xml-cocoon2/src/java/org/apache/cocoon/components/crawler/SimpleCocoonCrawlerImpl.java
Index: SimpleCocoonCrawlerImpl.java
===================================================================
RCS file:
/home/cvs/xml-cocoon2/src/java/org/apache/cocoon/components/crawler/SimpleCocoonCrawlerImpl.java,v
retrieving revision 1.9.2.1
retrieving revision 1.9.2.2
diff -u -r1.9.2.1 -r1.9.2.2
--- SimpleCocoonCrawlerImpl.java 7 Aug 2002 10:52:44 -0000 1.9.2.1
+++ SimpleCocoonCrawlerImpl.java 17 Aug 2002 04:10:12 -0000 1.9.2.2
@@ -85,8 +85,7 @@
* @version CVS $Id$
*/
public class SimpleCocoonCrawlerImpl extends AbstractLoggable
- implements CocoonCrawler, Configurable, Disposable, Recyclable
-{
+ implements CocoonCrawler, Configurable, Disposable, Recyclable {
/**
* Config element name specifying expected link content-typ.
@@ -162,7 +161,7 @@
/**
* Default value of <code>user-agent</code> configuration value.
* @see Constants#COMPLETE_NAME
- *
+ *
* @since
*/
public final static String USER_AGENT_DEFAULT = Constants.COMPLETE_NAME;
@@ -234,7 +233,7 @@
* @since
*/
public void configure(Configuration configuration)
- throws ConfigurationException {
+ throws ConfigurationException {
Configuration[] children;
children = configuration.getChildren(INCLUDE_CONFIG);
@@ -250,7 +249,7 @@
}
} catch (RESyntaxException rese) {
getLogger().error("Cannot create including regular-expression
for " +
- pattern, rese);
+ pattern, rese);
}
}
} else {
@@ -272,7 +271,7 @@
}
} catch (RESyntaxException rese) {
getLogger().error("Cannot create excluding regular-expression
for " +
- pattern, rese);
+ pattern, rese);
}
}
} else {
@@ -414,12 +413,12 @@
*/
private void setDefaultExcludeFromCrawling() {
String[] EXCLUDE_FROM_CRAWLING_DEFAULT = {
- ".*\\.gif(\\?.*)?$",
- ".*\\.png(\\?.*)?$",
- ".*\\.jpe?g(\\?.*)?$",
- ".*\\.js(\\?.*)?$",
- ".*\\.css(\\?.*)?$"
- };
+ ".*\\.gif(\\?.*)?$",
+ ".*\\.png(\\?.*)?$",
+ ".*\\.jpe?g(\\?.*)?$",
+ ".*\\.js(\\?.*)?$",
+ ".*\\.css(\\?.*)?$"
+ };
for (int i = 0; i < EXCLUDE_FROM_CRAWLING_DEFAULT.length; i++) {
String pattern = EXCLUDE_FROM_CRAWLING_DEFAULT[i];
@@ -464,18 +463,27 @@
if (getLogger().isDebugEnabled()) {
getLogger().debug("Getting links of URL " + sURL);
}
+ BufferedReader br = null;
try {
sURL = url.getFile();
URL links = new URL(url, sURL
- + ((sURL.indexOf("?") == -1) ? "?" : "&")
- + linkViewQuery);
+ + ((sURL.indexOf("?") == -1) ? "?" : "&")
+ + linkViewQuery);
URLConnection links_url_connection = links.openConnection();
InputStream is = links_url_connection.getInputStream();
- BufferedReader br = new BufferedReader(new InputStreamReader(is));
+ br = new BufferedReader(new InputStreamReader(is));
String contentType = links_url_connection.getContentType();
+ if (contentType == null) {
+ if (getLogger().isDebugEnabled()) {
+ getLogger().debug("Ignoring " + sURL + " (no content type)");
+ }
+ // there is a check on null in the calling method
+ return null;
+ }
+
int index = contentType.indexOf(';');
- if (contentType != null && index != -1) {
+ if (index != -1) {
contentType = contentType.substring(0, index);
}
if (getLogger().isDebugEnabled()) {
@@ -521,6 +529,14 @@
}
} catch (IOException ioe) {
getLogger().warn("Problems get links of " + url, ioe);
+ } finally {
+ if (br != null) {
+ try {
+ br.close();
+ br = null;
+ } catch (IOException ignored) {
+ }
+ }
}
return url_links;
}
@@ -598,8 +614,7 @@
* @author <a href="mailto:[EMAIL PROTECTED]>Bernhard Huber</a>
* @version $Id$
*/
- public static class CocoonCrawlerIterator implements Iterator
- {
+ public static class CocoonCrawlerIterator implements Iterator {
private SimpleCocoonCrawlerImpl cocoonCrawler;
----------------------------------------------------------------------
In case of troubles, e-mail: [EMAIL PROTECTED]
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]