Author: snagel
Date: Wed Oct 7 19:02:42 2015
New Revision: 1707360
URL: http://svn.apache.org/viewvc?rev=1707360&view=rev
Log:
NUTCH-2124 Fetcher following same redirect again and again
Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/src/java/org/apache/nutch/fetcher/FetcherThread.java
Modified: nutch/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1707360&r1=1707359&r2=1707360&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Wed Oct 7 19:02:42 2015
@@ -2,6 +2,8 @@ Nutch Change Log
Nutch Current Development 1.11-SNAPSHOT
+* NUTCH-2124 Fetcher following same redirect again and again (Yogendra Kumar
Soni via snagel)
+
* NUTCH-2123 Seed List REST API returns Text but headers indicate/require JSON
(Aron Ahmadia, Sujen Shah via mattmann)
Modified: nutch/trunk/src/java/org/apache/nutch/fetcher/FetcherThread.java
URL:
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/fetcher/FetcherThread.java?rev=1707360&r1=1707359&r2=1707360&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/fetcher/FetcherThread.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/fetcher/FetcherThread.java Wed Oct 7
19:02:42 2015
@@ -325,7 +325,7 @@ public class FetcherThread extends Threa
newUrl, refreshTime < Fetcher.PERM_REFRESH_TIME,
Fetcher.CONTENT_REDIR);
if (redirUrl != null) {
- queueRedirect(redirUrl, fit);
+ fit = queueRedirect(redirUrl, fit);
}
}
break;
@@ -346,7 +346,7 @@ public class FetcherThread extends Threa
Text redirUrl = handleRedirect(fit.url, fit.datum, urlString,
newUrl, temp, Fetcher.PROTOCOL_REDIR);
if (redirUrl != null) {
- queueRedirect(redirUrl, fit);
+ fit = queueRedirect(redirUrl, fit);
} else {
// stop redirecting
redirecting = false;
@@ -485,7 +485,7 @@ public class FetcherThread extends Threa
}
}
- private void queueRedirect(Text redirUrl, FetchItem fit)
+ private FetchItem queueRedirect(Text redirUrl, FetchItem fit)
throws ScoringFilterException {
CrawlDatum newDatum = new CrawlDatum(CrawlDatum.STATUS_DB_UNFETCHED,
fit.datum.getFetchInterval(), fit.datum.getScore());
@@ -506,6 +506,7 @@ public class FetcherThread extends Threa
reporter.incrCounter("FetcherStatus", "FetchItem.notCreated.redirect",
1);
}
+ return fit;
}
private void logError(Text url, String message) {