Author: snagel
Date: Fri Aug 22 21:23:32 2014
New Revision: 1619934
URL: http://svn.apache.org/r1619934
Log:
NUTCH-1409 remove deprecated properties db.{default,max}.fetch.interval,
generate.max.per.host.by.ip
Modified:
nutch/branches/2.x/CHANGES.txt
nutch/branches/2.x/src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java
nutch/trunk/CHANGES.txt
nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java
nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java
Modified: nutch/branches/2.x/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1619934&r1=1619933&r2=1619934&view=diff
==============================================================================
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Fri Aug 22 21:23:32 2014
@@ -2,6 +2,8 @@ Nutch Change Log
Current Development
+* NUTCH-1409 remove deprecated properties db.{default,max}.fetch.interval,
generate.max.per.host.by.ip (Matthias Agethle via snagel)
+
* NUTCH-1819 batchId in GeneratorJob ( Fjodor Vershinin via lewismc)
* NUTCH-1708 use same id when indexing and deleting redirects (snagel)
Modified:
nutch/branches/2.x/src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java?rev=1619934&r1=1619933&r2=1619934&view=diff
==============================================================================
---
nutch/branches/2.x/src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java
(original)
+++
nutch/branches/2.x/src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java
Fri Aug 22 21:23:32 2014
@@ -60,12 +60,8 @@ implements FetchSchedule {
public void setConf(Configuration conf) {
super.setConf(conf);
if (conf == null) return;
- int oldDefaultInterval = conf.getInt("db.default.fetch.interval", 0);
defaultInterval = conf.getInt("db.fetch.interval.default", 0);
- if (oldDefaultInterval > 0 && defaultInterval == 0) defaultInterval =
oldDefaultInterval * SECONDS_PER_DAY;
- int oldMaxInterval = conf.getInt("db.max.fetch.interval", 0);
maxInterval = conf.getInt("db.fetch.interval.max", 0 );
- if (oldMaxInterval > 0 && maxInterval == 0) maxInterval = oldMaxInterval *
FetchSchedule.SECONDS_PER_DAY;
LOG.info("defaultInterval=" + defaultInterval);
LOG.info("maxInterval=" + maxInterval);
}
Modified: nutch/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1619934&r1=1619933&r2=1619934&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Fri Aug 22 21:23:32 2014
@@ -2,7 +2,7 @@ Nutch Change Log
Nutch Current Development
-* NUTCH-XX
+* NUTCH-1409 remove deprecated properties db.{default,max}.fetch.interval,
generate.max.per.host.by.ip (Matthias Agethle via snagel)
Nutch 1.9 Release Change Log - 12/08/2014 (dd/mm/yyyy)
Release Report - http://s.apache.org/1.9-release
Modified: nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java
URL:
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java?rev=1619934&r1=1619933&r2=1619934&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java Fri Aug 22
21:23:32 2014
@@ -50,9 +50,7 @@ public class CrawlDbReducer implements R
retryMax = job.getInt("db.fetch.retry.max", 3);
scfilters = new ScoringFilters(job);
additionsAllowed = job.getBoolean(CrawlDb.CRAWLDB_ADDITIONS_ALLOWED, true);
- int oldMaxInterval = job.getInt("db.max.fetch.interval", 0);
maxInterval = job.getInt("db.fetch.interval.max", 0 );
- if (oldMaxInterval > 0 && maxInterval == 0) maxInterval = oldMaxInterval *
FetchSchedule.SECONDS_PER_DAY;
schedule = FetchScheduleFactory.getFetchSchedule(job);
int maxLinks = job.getInt("db.update.max.inlinks", 10000);
linked = new InlinkPriorityQueue(maxLinks);
Modified: nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java
URL:
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java?rev=1619934&r1=1619933&r2=1619934&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java Fri Aug 22
21:23:32 2014
@@ -74,9 +74,6 @@ public class Generator extends Configure
public static final String GENERATOR_DELAY = "crawl.gen.delay";
public static final String GENERATOR_MAX_NUM_SEGMENTS =
"generate.max.num.segments";
- // deprecated parameters
- public static final String GENERATE_MAX_PER_HOST_BY_IP =
"generate.max.per.host.by.ip";
-
public static class SelectorEntry implements Writable {
public Text url;
public CrawlDatum datum;
@@ -505,10 +502,6 @@ public class Generator extends Configure
LOG.info("Generator: topN: " + topN);
}
- if ("true".equals(getConf().get(GENERATE_MAX_PER_HOST_BY_IP))){
- LOG.info("Generator: GENERATE_MAX_PER_HOST_BY_IP will be ignored, use
partition.url.mode instead");
- }
-
// map to inverted subset due for fetch, sort by score
JobConf job = new NutchJob(getConf());
job.setJobName("generate: select from " + dbDir);