This is an automated email from the ASF dual-hosted git repository. snagel pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/nutch.git
commit 524a59480a3e258a0363faf343fa57875f8f9ea8 Author: Sebastian Nagel <sna...@apache.org> AuthorDate: Mon Oct 8 14:50:51 2018 +0200 NUTCH-2630 Fetcher to log skipped records by robots.txt - change required log level to INFO (default) for messages reporting skipped URLs because of robots.txt rules (disallow or crawl delay larger than fetcher.max.crawl.delay) --- src/java/org/apache/nutch/fetcher/FetcherThread.java | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/java/org/apache/nutch/fetcher/FetcherThread.java b/src/java/org/apache/nutch/fetcher/FetcherThread.java index bfcc374..6ba920e 100644 --- a/src/java/org/apache/nutch/fetcher/FetcherThread.java +++ b/src/java/org/apache/nutch/fetcher/FetcherThread.java @@ -302,9 +302,7 @@ public class FetcherThread extends Thread { if (!rules.isAllowed(fit.url.toString())) { // unblock ((FetchItemQueues) fetchQueues).finishFetchItem(fit, true); - if (LOG.isDebugEnabled()) { - LOG.debug("Denied by robots.txt: {}", fit.url); - } + LOG.info("Denied by robots.txt: {}", fit.url); output(fit.url, fit.datum, null, ProtocolStatus.STATUS_ROBOTS_DENIED, CrawlDatum.STATUS_FETCH_GONE); @@ -315,7 +313,7 @@ public class FetcherThread extends Thread { if (rules.getCrawlDelay() > maxCrawlDelay && maxCrawlDelay >= 0) { // unblock ((FetchItemQueues) fetchQueues).finishFetchItem(fit, true); - LOG.debug("Crawl-Delay for {} too long ({}), skipping", fit.url, + LOG.info("Crawl-Delay for {} too long ({}), skipping", fit.url, rules.getCrawlDelay()); output(fit.url, fit.datum, null, ProtocolStatus.STATUS_ROBOTS_DENIED,