Author: tejasp
Date: Fri May 3 19:27:39 2013
New Revision: 1478939
URL: http://svn.apache.org/r1478939
Log:
NUTCH-1514 Phase out the deprecated configuration properties (if possible)
Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/conf/nutch-default.xml
nutch/trunk/src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java
nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java
Modified: nutch/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1478939&r1=1478938&r2=1478939&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Fri May 3 19:27:39 2013
@@ -2,6 +2,8 @@ Nutch Change Log
(trunk): Current Development
+* NUTCH-1514 Phase out the deprecated configuration properties (if possible)
(tejasp)
+
* NUTCH-1334 NPE in FetcherOutputFormat (jnioche via tejasp)
* NUTCH-1549 Fix deprecated use of Tika MimeType API in o.a.n.util.MimeUtil
(tejasp)
Modified: nutch/trunk/conf/nutch-default.xml
URL:
http://svn.apache.org/viewvc/nutch/trunk/conf/nutch-default.xml?rev=1478939&r1=1478938&r2=1478939&view=diff
==============================================================================
--- nutch/trunk/conf/nutch-default.xml (original)
+++ nutch/trunk/conf/nutch-default.xml Fri May 3 19:27:39 2013
@@ -325,13 +325,6 @@
<!-- web db properties -->
<property>
- <name>db.default.fetch.interval</name>
- <value>30</value>
- <description>(DEPRECATED) The default number of days between re-fetches of a
page.
- </description>
-</property>
-
-<property>
<name>db.fetch.interval.default</name>
<value>2592000</value>
<description>The default number of seconds between re-fetches of a page (30
days).
@@ -611,14 +604,6 @@
</property>
<property>
- <name>generate.max.per.host</name>
- <value>-1</value>
- <description>(Deprecated). Use generate.max.count and generate.count.mode
instead.
- The maximum number of urls per host in a single
- fetchlist. -1 if unlimited.</description>
-</property>
-
-<property>
<name>generate.min.score</name>
<value>0</value>
<description>Select only entries with a score larger than
@@ -698,8 +683,7 @@
<name>fetcher.threads.per.queue</name>
<value>1</value>
<description>This number is the maximum number of threads that
- should be allowed to access a queue at one time. Replaces
- deprecated parameter 'fetcher.threads.per.host'.
+ should be allowed to access a queue at one time.
</description>
</property>
@@ -707,8 +691,7 @@
<name>fetcher.queue.mode</name>
<value>byHost</value>
<description>Determines how to put URLs into queues. Default value is
'byHost',
- also takes 'byDomain' or 'byIP'. Replaces the deprecated parameter
- 'fetcher.threads.per.host.by.ip'.
+ also takes 'byDomain' or 'byIP'.
</description>
</property>
Modified: nutch/trunk/src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java
URL:
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java?rev=1478939&r1=1478938&r2=1478939&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java
(original)
+++ nutch/trunk/src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java Fri
May 3 19:27:39 2013
@@ -47,12 +47,8 @@ public abstract class AbstractFetchSched
public void setConf(Configuration conf) {
super.setConf(conf);
if (conf == null) return;
- int oldDefaultInterval = conf.getInt("db.default.fetch.interval", 0);
defaultInterval = conf.getInt("db.fetch.interval.default", 0);
- if (oldDefaultInterval > 0 && defaultInterval == 0) defaultInterval =
oldDefaultInterval * SECONDS_PER_DAY;
- int oldMaxInterval = conf.getInt("db.max.fetch.interval", 0);
maxInterval = conf.getInt("db.fetch.interval.max", 0 );
- if (oldMaxInterval > 0 && maxInterval == 0) maxInterval = oldMaxInterval *
FetchSchedule.SECONDS_PER_DAY;
LOG.info("defaultInterval=" + defaultInterval);
LOG.info("maxInterval=" + maxInterval);
}
Modified: nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java
URL:
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java?rev=1478939&r1=1478938&r2=1478939&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java Fri May 3
19:27:39 2013
@@ -76,7 +76,6 @@ public class Generator extends Configure
// deprecated parameters
public static final String GENERATE_MAX_PER_HOST_BY_IP =
"generate.max.per.host.by.ip";
- public static final String GENERATE_MAX_PER_HOST = "generate.max.per.host";
public static class SelectorEntry implements Writable {
public Text url;
@@ -140,10 +139,7 @@ public class Generator extends Configure
curTime = job.getLong(GENERATOR_CUR_TIME, System.currentTimeMillis());
limit = job.getLong(GENERATOR_TOP_N, Long.MAX_VALUE) /
job.getNumReduceTasks();
maxCount = job.getInt(GENERATOR_MAX_COUNT, -1);
- // back compatibility with old param
- int oldMaxPerHost = job.getInt(GENERATE_MAX_PER_HOST, -1);
- if (maxCount==-1 && oldMaxPerHost!=-1){
- maxCount = oldMaxPerHost;
+ if (maxCount==-1){
byDomain = false;
}
if (GENERATOR_COUNT_VALUE_DOMAIN.equals(job.get(GENERATOR_COUNT_MODE)))
byDomain = true;