Author: cutting
Date: Mon Nov  7 09:55:59 2005
New Revision: 331556

URL: http://svn.apache.org/viewcvs?rev=331556&view=rev
Log:
Fix to only try to parse successful fetches.  Also, log number of threads in
task process, not in controller, as this may be overridden by nutch-site.xml.

Modified:
    lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/Fetcher.java

Modified: 
lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/Fetcher.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/Fetcher.java?rev=331556&r1=331555&r2=331556&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/Fetcher.java 
(original)
+++ lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/Fetcher.java 
Mon Nov  7 09:55:59 2005
@@ -202,7 +202,7 @@
         (SCORE_KEY, Float.toString(datum.getScore()));
 
       Parse parse = null;
-      if (parsing) {
+      if (parsing && status == CrawlDatum.STATUS_FETCH_SUCCESS) {
         ParseStatus parseStatus;
         try {
           parse = ParseUtil.parse(content);
@@ -280,6 +280,8 @@
     this.maxRedirect = getConf().getInt("http.redirect.max", 3);
     
     int threadCount = getConf().getInt("fetcher.threads.fetch", 10);
+    LOG.info("Fetcher: threads: " + threadCount);
+
     for (int i = 0; i < threadCount; i++) {       // spawn threads
       new FetcherThread().start();
     }
@@ -311,8 +313,6 @@
 
     LOG.info("Fetcher: starting");
     LOG.info("Fetcher: segment: " + segment);
-    LOG.info("Fetcher: threads: " + threads);
-
 
     JobConf job = new JobConf(getConf());
 


Reply via email to