Author: snagel
Date: Mon May 11 21:04:59 2015
New Revision: 1678824
URL: http://svn.apache.org/r1678824
Log:
NUTCH-1998 Add support for user-defined file extension to
CommonCrawlDataDumper: fix unit test
Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/src/test/org/apache/nutch/tools/TestCommonCrawlDataDumper.java
Modified: nutch/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1678824&r1=1678823&r2=1678824&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Mon May 11 21:04:59 2015
@@ -2,7 +2,7 @@ Nutch Change Log
Nutch Current Development 1.11-SNAPSHOT
-* NUTCH-1988 Add support for user-defined file extension to
CommonCrawlDataDumper (totaro via mattmann)
+* NUTCH-1998 Add support for user-defined file extension to
CommonCrawlDataDumper (totaro via mattmann)
* NUTCH-1873 Solr IndexWriter/Job to report number of docs indexed. (snagel
via lewismc)
@@ -54,8 +54,6 @@ Release Report: http://s.apache.org/nutc
* NUTCH-1989 Handling invalid URLs in CommonCrawlDataDumper (Giuseppe Totaro
via mattmann)
-* NUTCH-1988 Make nested output directory dump optional (Michael Joyce via
mattmann)
-
* NUTCH-1927 Create a whitelist of IPs/hostnames to allow skipping of
RobotRules parsing (mattmann, snagel)
* NUTCH-1986 Clarify Elastic Search Indexer Plugin Settings (Michael Joyce via
mattmann)
Modified:
nutch/trunk/src/test/org/apache/nutch/tools/TestCommonCrawlDataDumper.java
URL:
http://svn.apache.org/viewvc/nutch/trunk/src/test/org/apache/nutch/tools/TestCommonCrawlDataDumper.java?rev=1678824&r1=1678823&r2=1678824&view=diff
==============================================================================
--- nutch/trunk/src/test/org/apache/nutch/tools/TestCommonCrawlDataDumper.java
(original)
+++ nutch/trunk/src/test/org/apache/nutch/tools/TestCommonCrawlDataDumper.java
Mon May 11 21:04:59 2015
@@ -101,20 +101,16 @@ public class TestCommonCrawlDataDumper {
CommonCrawlDataDumper dumper = new CommonCrawlDataDumper(
new CommonCrawlConfig());
- dumper.dump(tempDir, sampleSegmentDir, false, null, false);
+ dumper.dump(tempDir, sampleSegmentDir, false, null, false, "");
Collection<File> tempFiles = FileUtils.listFiles(tempDir,
FileFilterUtils.fileFileFilter(),
FileFilterUtils.directoryFileFilter());
- boolean hasAll = true;
for (String expectedFileName : crawledFiles) {
- if (!hasFile(expectedFileName, tempFiles)) {
- hasAll = false;
- break;
- }
+ assertTrue("Missed file " + expectedFileName + " in dump",
+ hasFile(expectedFileName, tempFiles));
}
- assertTrue(hasAll);
}