Author: mattmann
Date: Tue Jun 2 04:29:05 2015
New Revision: 1683039
URL: http://svn.apache.org/r1683039
Log:
- fix for NUTCH-2015 Make FetchNodeDb optional (off by default) if NutchServer
is not used contributed by Sujen Shah this closes #25
Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/src/java/org/apache/nutch/fetcher/FetcherThread.java
Modified: nutch/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1683039&r1=1683038&r2=1683039&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Tue Jun 2 04:29:05 2015
@@ -2,6 +2,8 @@ Nutch Change Log
Nutch Current Development 1.11-SNAPSHOT
+* NUTCH-2015 Make FetchNodeDb optional (off by default) if NutchServer is not
used (Sujen Shah via mattmann)
+
* NUTCH-208 http: proxy exception list: (Matthias Günter, siren, markus,
lewismc)
* NUTCH-2007 add test libs to classpath of bin/nutch junit (snagel)
Modified: nutch/trunk/src/java/org/apache/nutch/fetcher/FetcherThread.java
URL:
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/fetcher/FetcherThread.java?rev=1683039&r1=1683038&r2=1683039&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/fetcher/FetcherThread.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/fetcher/FetcherThread.java Tue Jun 2
04:29:05 2015
@@ -57,6 +57,7 @@ import org.apache.nutch.protocol.Protoco
import org.apache.nutch.protocol.ProtocolStatus;
import org.apache.nutch.scoring.ScoringFilterException;
import org.apache.nutch.scoring.ScoringFilters;
+import org.apache.nutch.service.NutchServer;
import org.apache.nutch.util.StringUtil;
import org.apache.nutch.util.URLUtil;
import org.slf4j.Logger;
@@ -126,6 +127,7 @@ public class FetcherThread extends Threa
//Used by the REST service
private FetchNode fetchNode;
+ private boolean reportToNutchServer;
public FetcherThread(Configuration conf, AtomicInteger activeThreads,
FetchItemQueues fetchQueues,
QueueFeeder feeder, AtomicInteger spinWaiting, AtomicLong
lastRequestStart, Reporter reporter,
@@ -189,11 +191,17 @@ public class FetcherThread extends Threa
FetchItem fit = null;
try {
-
+ // checking for the server to be running and fetcher.parse to be true
+ if (parsing && NutchServer.getInstance().isRunning())
+ reportToNutchServer = true;
+
while (true) {
// creating FetchNode for storing in FetchNodeDb
- this.fetchNode = new FetchNode();
-
+ if (reportToNutchServer)
+ this.fetchNode = new FetchNode();
+ else
+ this.fetchNode = null;
+
// check whether must be stopped
if (isHalted()) {
LOG.debug(getName() + " set to halted");
@@ -289,10 +297,12 @@ public class FetcherThread extends Threa
String urlString = fit.url.toString();
- //used for FetchNode
- fetchNode.setStatus(status.getCode());
- fetchNode.setFetchTime(System.currentTimeMillis());
- fetchNode.setUrl(fit.url);
+ // used for FetchNode
+ if (fetchNode != null) {
+ fetchNode.setStatus(status.getCode());
+ fetchNode.setFetchTime(System.currentTimeMillis());
+ fetchNode.setUrl(fit.url);
+ }
reporter.incrCounter("FetcherStatus", status.getName(), 1);
@@ -620,11 +630,12 @@ public class FetcherThread extends Threa
fromHost = null;
}
- //used by fetchNode
- fetchNode.setOutlinks(links);
- fetchNode.setTitle(parseData.getTitle());
- FetchNodeDb.getInstance().put(fetchNode.getUrl().toString(),
fetchNode);
-
+ //used by fetchNode
+ if(fetchNode!=null){
+ fetchNode.setOutlinks(links);
+ fetchNode.setTitle(parseData.getTitle());
+ FetchNodeDb.getInstance().put(fetchNode.getUrl().toString(),
fetchNode);
+ }
int validCount = 0;
// Process all outlinks, normalize, filter and deduplicate