Author: ab Date: Sun Apr 30 16:33:45 2006 New Revision: 398462 URL: http://svn.apache.org/viewcvs?rev=398462&view=rev Log: Temporary workaround for a situation where we may end up with a lone STATUS_SIGNATURE. The real reason for this error is unknown at this moment, please report if you encounter this.
Reported by Michael Stack. Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java?rev=398462&r1=398461&r2=398462&view=diff ============================================================================== --- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java (original) +++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java Sun Apr 30 16:33:45 2006 @@ -19,11 +19,16 @@ import java.util.Iterator; import java.io.IOException; +import java.util.logging.*; + import org.apache.hadoop.io.*; import org.apache.hadoop.mapred.*; +import org.apache.hadoop.util.LogFormatter; /** Merge new page entries with existing entries. */ public class CrawlDbReducer implements Reducer { + public static final Logger LOG = + LogFormatter.getLogger("org.apache.nutch.crawl.CrawlDbReducer"); private int retryMax; private CrawlDatum result = new CrawlDatum(); @@ -102,6 +107,9 @@ result.setNextFetchTime(); break; + case CrawlDatum.STATUS_SIGNATURE: + LOG.warning("Lone CrawlDatum.STATUS_SIGNATURE: " + key); + return; case CrawlDatum.STATUS_FETCH_RETRY: // temporary failure if (old != null) result.setSignature(old.getSignature()); // use old signature @@ -119,7 +127,7 @@ break; default: - throw new RuntimeException("Unknown status: "+highest.getStatus()); + throw new RuntimeException("Unknown status: " + highest.getStatus() + " " + key); } result.setScore(result.getScore() + scoreIncrement); ------------------------------------------------------- Using Tomcat but need to do more? Need to support web services, security? Get stuff done quickly with pre-integrated technology to make your job easier Download IBM WebSphere Application Server v.1.0.1 based on Apache Geronimo http://sel.as-us.falkag.net/sel?cmd=lnk&kid=120709&bid=263057&dat=121642 _______________________________________________ Nutch-cvs mailing list Nutch-cvs@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/nutch-cvs