Author: lewismc
Date: Tue Jan 13 19:46:37 2015
New Revision: 1651455
URL: http://svn.apache.org/r1651455
Log:
NUTCH-1912 Dump tool -mimetype parameter needs to be optional to prevent NPE
Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/src/java/org/apache/nutch/tools/FileDumper.java
Modified: nutch/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1651455&r1=1651454&r2=1651455&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Tue Jan 13 19:46:37 2015
@@ -2,6 +2,8 @@ Nutch Change Log
Nutch Current Development 1.10-SNAPSHOT
+* NUTCH-1912 Dump tool -mimetype parameter needs to be optional to prevent NPE
(Tyler Palsulich via lewismc)
+
* NUTCH-1881 ant target resolve-default to keep test libs (snagel)
* NUTCH-1660 Index filter for Page's latitude and longitude (Yasin Kılınç,
lewismc)
Modified: nutch/trunk/src/java/org/apache/nutch/tools/FileDumper.java
URL:
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/tools/FileDumper.java?rev=1651455&r1=1651454&r2=1651455&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/tools/FileDumper.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/tools/FileDumper.java Tue Jan 13
19:46:37 2015
@@ -113,6 +113,7 @@ public class FileDumper {
* @throws Exception
*/
public void dump(File outputDir, File segmentRootDir, String[] mimeTypes)
throws Exception {
+ if (mimeTypes == null) LOG.info("Accepting all mimetypes.");
//total file counts
Map<String, Integer> typeCounts = new HashMap<String, Integer>();
//filtered file counts
@@ -128,6 +129,10 @@ public class FileDumper {
return file.canRead() && file.isDirectory();
}
});
+ if (segmentDirs == null) {
+ System.err.println("No segment directories found in [" +
segmentRootDir.getAbsolutePath() + "]");
+ return;
+ }
for (File segment : segmentDirs) {
LOG.info("Processing segment: [" + segment.getAbsolutePath() + "]");
@@ -153,7 +158,7 @@ public class FileDumper {
String url = key.toString();
String baseName = FilenameUtils.getBaseName(url);
String extension = FilenameUtils.getExtension(url);
- if (extension == null || (extension != null &&
+ if (extension == null || (extension != null &&
extension.equals(""))){
extension = "html";
}
@@ -166,7 +171,7 @@ public class FileDumper {
String mimeType = new Tika().detect(content.getContent());
collectStats(typeCounts, mimeType);
if (mimeType != null) {
- if (Arrays.asList(mimeTypes).contains(mimeType)) {
+ if (mimeTypes == null ||
Arrays.asList(mimeTypes).contains(mimeType)) {
collectStats(filteredCounts, mimeType);
filter = true;
}
@@ -182,7 +187,6 @@ public class FileDumper {
bas.close();
}
catch(Exception ignore){}
- bas = null;
}
}
@@ -198,7 +202,6 @@ public class FileDumper {
LOG.info("Skipping writing: ["
+ outputFullPath + "]: file already exists");
}
- content = null;
}
}
reader.close();
@@ -237,7 +240,7 @@ public class FileDumper {
.create("segment");
@SuppressWarnings("static-access")
Option mimeOpt = OptionBuilder.withArgName("mimetype")
- .hasArgs().withDescription("an optional list of mimetypes to dump,
excluding all others")
+ .hasArgs().withDescription("an optional list of mimetypes to dump,
excluding all others. Defaults to all.")
.create("mimetype");
//create the options
@@ -272,6 +275,7 @@ public class FileDumper {
}
catch(Exception e) {
LOG.error("FileDumper: " + StringUtils.stringifyException(e));
+ e.printStackTrace();
return;
}
}