Author: markus
Date: Sun Jul 17 14:01:51 2011
New Revision: 1147615
URL: http://svn.apache.org/viewvc?rev=1147615&view=rev
Log:
NUTCH-1029 ReadDB throws EOFException
Modified:
nutch/branches/branch-1.4/CHANGES.txt
nutch/branches/branch-1.4/src/java/org/apache/nutch/crawl/CrawlDbReader.java
Modified: nutch/branches/branch-1.4/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/branches/branch-1.4/CHANGES.txt?rev=1147615&r1=1147614&r2=1147615&view=diff
==============================================================================
--- nutch/branches/branch-1.4/CHANGES.txt (original)
+++ nutch/branches/branch-1.4/CHANGES.txt Sun Jul 17 14:01:51 2011
@@ -2,6 +2,8 @@ Nutch Change Log
Release 1.4 - Current development
+* NUTCH-1029 Readdb throws EOFException (markus)
+
* NUTCH-1036 Solr jobs should increment counters in Reporter (markus)
* NUTCH-987 Support HTTP auth for Solr communication (markus)
Modified:
nutch/branches/branch-1.4/src/java/org/apache/nutch/crawl/CrawlDbReader.java
URL:
http://svn.apache.org/viewvc/nutch/branches/branch-1.4/src/java/org/apache/nutch/crawl/CrawlDbReader.java?rev=1147615&r1=1147614&r2=1147615&view=diff
==============================================================================
---
nutch/branches/branch-1.4/src/java/org/apache/nutch/crawl/CrawlDbReader.java
(original)
+++
nutch/branches/branch-1.4/src/java/org/apache/nutch/crawl/CrawlDbReader.java
Sun Jul 17 14:01:51 2011
@@ -313,6 +313,9 @@ public class CrawlDbReader implements Cl
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(LongWritable.class);
+ // https://issues.apache.org/jira/browse/NUTCH-1029
+ job.setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs", false);
+
JobClient.runJob(job);
// reading the result
@@ -464,7 +467,7 @@ public class CrawlDbReader implements Cl
job.setOutputValueClass(Text.class);
job.setNumReduceTasks(1); // create a single file.
-
+
JobClient.runJob(job);
FileSystem fs = FileSystem.get(config);
fs.delete(tempDir, true);