Author: snagel Date: Fri Aug 22 21:23:32 2014 New Revision: 1619934 URL: http://svn.apache.org/r1619934 Log: NUTCH-1409 remove deprecated properties db.{default,max}.fetch.interval, generate.max.per.host.by.ip
Modified: nutch/branches/2.x/CHANGES.txt nutch/branches/2.x/src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java nutch/trunk/CHANGES.txt nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java Modified: nutch/branches/2.x/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1619934&r1=1619933&r2=1619934&view=diff ============================================================================== --- nutch/branches/2.x/CHANGES.txt (original) +++ nutch/branches/2.x/CHANGES.txt Fri Aug 22 21:23:32 2014 @@ -2,6 +2,8 @@ Nutch Change Log Current Development +* NUTCH-1409 remove deprecated properties db.{default,max}.fetch.interval, generate.max.per.host.by.ip (Matthias Agethle via snagel) + * NUTCH-1819 batchId in GeneratorJob ( Fjodor Vershinin via lewismc) * NUTCH-1708 use same id when indexing and deleting redirects (snagel) Modified: nutch/branches/2.x/src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java?rev=1619934&r1=1619933&r2=1619934&view=diff ============================================================================== --- nutch/branches/2.x/src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java (original) +++ nutch/branches/2.x/src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java Fri Aug 22 21:23:32 2014 @@ -60,12 +60,8 @@ implements FetchSchedule { public void setConf(Configuration conf) { super.setConf(conf); if (conf == null) return; - int oldDefaultInterval = conf.getInt("db.default.fetch.interval", 0); defaultInterval = conf.getInt("db.fetch.interval.default", 0); - if (oldDefaultInterval > 0 && defaultInterval == 0) defaultInterval = oldDefaultInterval * SECONDS_PER_DAY; - int oldMaxInterval = conf.getInt("db.max.fetch.interval", 0); maxInterval = conf.getInt("db.fetch.interval.max", 0 ); - if (oldMaxInterval > 0 && maxInterval == 0) maxInterval = oldMaxInterval * FetchSchedule.SECONDS_PER_DAY; LOG.info("defaultInterval=" + defaultInterval); LOG.info("maxInterval=" + maxInterval); } Modified: nutch/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1619934&r1=1619933&r2=1619934&view=diff ============================================================================== --- nutch/trunk/CHANGES.txt (original) +++ nutch/trunk/CHANGES.txt Fri Aug 22 21:23:32 2014 @@ -2,7 +2,7 @@ Nutch Change Log Nutch Current Development -* NUTCH-XX +* NUTCH-1409 remove deprecated properties db.{default,max}.fetch.interval, generate.max.per.host.by.ip (Matthias Agethle via snagel) Nutch 1.9 Release Change Log - 12/08/2014 (dd/mm/yyyy) Release Report - http://s.apache.org/1.9-release Modified: nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java?rev=1619934&r1=1619933&r2=1619934&view=diff ============================================================================== --- nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java (original) +++ nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReducer.java Fri Aug 22 21:23:32 2014 @@ -50,9 +50,7 @@ public class CrawlDbReducer implements R retryMax = job.getInt("db.fetch.retry.max", 3); scfilters = new ScoringFilters(job); additionsAllowed = job.getBoolean(CrawlDb.CRAWLDB_ADDITIONS_ALLOWED, true); - int oldMaxInterval = job.getInt("db.max.fetch.interval", 0); maxInterval = job.getInt("db.fetch.interval.max", 0 ); - if (oldMaxInterval > 0 && maxInterval == 0) maxInterval = oldMaxInterval * FetchSchedule.SECONDS_PER_DAY; schedule = FetchScheduleFactory.getFetchSchedule(job); int maxLinks = job.getInt("db.update.max.inlinks", 10000); linked = new InlinkPriorityQueue(maxLinks); Modified: nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java?rev=1619934&r1=1619933&r2=1619934&view=diff ============================================================================== --- nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java (original) +++ nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java Fri Aug 22 21:23:32 2014 @@ -74,9 +74,6 @@ public class Generator extends Configure public static final String GENERATOR_DELAY = "crawl.gen.delay"; public static final String GENERATOR_MAX_NUM_SEGMENTS = "generate.max.num.segments"; - // deprecated parameters - public static final String GENERATE_MAX_PER_HOST_BY_IP = "generate.max.per.host.by.ip"; - public static class SelectorEntry implements Writable { public Text url; public CrawlDatum datum; @@ -505,10 +502,6 @@ public class Generator extends Configure LOG.info("Generator: topN: " + topN); } - if ("true".equals(getConf().get(GENERATE_MAX_PER_HOST_BY_IP))){ - LOG.info("Generator: GENERATE_MAX_PER_HOST_BY_IP will be ignored, use partition.url.mode instead"); - } - // map to inverted subset due for fetch, sort by score JobConf job = new NutchJob(getConf()); job.setJobName("generate: select from " + dbDir);