Author: cutting
Date: Mon Nov 7 09:55:59 2005
New Revision: 331556
URL: http://svn.apache.org/viewcvs?rev=331556&view=rev
Log:
Fix to only try to parse successful fetches. Also, log number of threads in
task process, not in controller, as this may be overridden by nutch-site.xml.
Modified:
lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/Fetcher.java
Modified:
lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/Fetcher.java
URL:
http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/Fetcher.java?rev=331556&r1=331555&r2=331556&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/Fetcher.java
(original)
+++ lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/Fetcher.java
Mon Nov 7 09:55:59 2005
@@ -202,7 +202,7 @@
(SCORE_KEY, Float.toString(datum.getScore()));
Parse parse = null;
- if (parsing) {
+ if (parsing && status == CrawlDatum.STATUS_FETCH_SUCCESS) {
ParseStatus parseStatus;
try {
parse = ParseUtil.parse(content);
@@ -280,6 +280,8 @@
this.maxRedirect = getConf().getInt("http.redirect.max", 3);
int threadCount = getConf().getInt("fetcher.threads.fetch", 10);
+ LOG.info("Fetcher: threads: " + threadCount);
+
for (int i = 0; i < threadCount; i++) { // spawn threads
new FetcherThread().start();
}
@@ -311,8 +313,6 @@
LOG.info("Fetcher: starting");
LOG.info("Fetcher: segment: " + segment);
- LOG.info("Fetcher: threads: " + threads);
-
JobConf job = new JobConf(getConf());