Author: ab
Date: Sun Apr 30 16:33:45 2006
New Revision: 398462
URL: http://svn.apache.org/viewcvs?rev=398462&view=rev
Log:
Temporary workaround for a situation where we may end up with a
lone STATUS_SIGNATURE. The real reason for this error is
unknown at this moment, please report if you encounter this.
Reported by Michael Stack.
Modified:
lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java?rev=398462&r1=398461&r2=398462&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java
(original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java Sun
Apr 30 16:33:45 2006
@@ -19,11 +19,16 @@
import java.util.Iterator;
import java.io.IOException;
+import java.util.logging.*;
+
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapred.*;
+import org.apache.hadoop.util.LogFormatter;
/** Merge new page entries with existing entries. */
public class CrawlDbReducer implements Reducer {
+ public static final Logger LOG =
+ LogFormatter.getLogger("org.apache.nutch.crawl.CrawlDbReducer");
private int retryMax;
private CrawlDatum result = new CrawlDatum();
@@ -102,6 +107,9 @@
result.setNextFetchTime();
break;
+ case CrawlDatum.STATUS_SIGNATURE:
+ LOG.warning("Lone CrawlDatum.STATUS_SIGNATURE: " + key);
+ return;
case CrawlDatum.STATUS_FETCH_RETRY: // temporary failure
if (old != null)
result.setSignature(old.getSignature()); // use old signature
@@ -119,7 +127,7 @@
break;
default:
- throw new RuntimeException("Unknown status: "+highest.getStatus());
+ throw new RuntimeException("Unknown status: " + highest.getStatus() + "
" + key);
}
result.setScore(result.getScore() + scoreIncrement);