Author: snagel Date: Sun Mar 30 19:58:59 2014 New Revision: 1583193 URL: http://svn.apache.org/r1583193 Log: NUTCH-1645 Junit Test Case for Adaptive Fetch Schedule class
Added: nutch/trunk/src/test/org/apache/nutch/crawl/TestAdaptiveFetchSchedule.java (with props) Modified: nutch/trunk/CHANGES.txt Modified: nutch/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1583193&r1=1583192&r2=1583193&view=diff ============================================================================== --- nutch/trunk/CHANGES.txt (original) +++ nutch/trunk/CHANGES.txt Sun Mar 30 19:58:59 2014 @@ -2,6 +2,8 @@ Nutch Change Log Nutch Current Development +* NUTCH-1645 Junit Test Case for Adaptive Fetch Schedule class (Yasin Kılınç, lufeng, Sertac TURKEL via snagel) + * NUTCH-1737 Upgrade to recent JUnit 4.x (lewismc) * NUTCH-1733 parse-html to support HTML5 charset definitions (snagel) Added: nutch/trunk/src/test/org/apache/nutch/crawl/TestAdaptiveFetchSchedule.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/test/org/apache/nutch/crawl/TestAdaptiveFetchSchedule.java?rev=1583193&view=auto ============================================================================== --- nutch/trunk/src/test/org/apache/nutch/crawl/TestAdaptiveFetchSchedule.java (added) +++ nutch/trunk/src/test/org/apache/nutch/crawl/TestAdaptiveFetchSchedule.java Sun Mar 30 19:58:59 2014 @@ -0,0 +1,121 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.nutch.crawl; + +import junit.framework.TestCase; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.Text; +import org.apache.nutch.util.NutchConfiguration; +import org.junit.Before; +import org.junit.Test; + +/** + * Test cases for AdaptiveFetchSchedule. + * + */ +public class TestAdaptiveFetchSchedule extends TestCase { + + private float inc_rate; + private float dec_rate; + private Configuration conf; + private long curTime, lastModified; + private int changed, interval, calculateInterval; + + @Before + public void setUp() throws Exception { + super.setUp(); + conf = NutchConfiguration.create(); + inc_rate = conf.getFloat("db.fetch.schedule.adaptive.inc_rate", 0.2f); + dec_rate = conf.getFloat("db.fetch.schedule.adaptive.dec_rate", 0.2f); + interval = 100; + lastModified = 0; + } + + /** + * Test the core functionality of AdaptiveFetchSchedule. + * + */ + + @Test + public void testAdaptiveFetchSchedule() { + + FetchSchedule fs = new AdaptiveFetchSchedule(); + fs.setConf(conf); + + CrawlDatum p = prepareCrawlDatum(); + Text url = new Text("http://www.example.com"); + + changed = FetchSchedule.STATUS_UNKNOWN; + fs.setFetchSchedule(url, p, p.getFetchTime(), + p.getModifiedTime(), curTime, lastModified, changed); + validateFetchInterval(changed, p.getFetchInterval()); + + changed = FetchSchedule.STATUS_MODIFIED; + fs.setFetchSchedule(url, p, p.getFetchTime(), + p.getModifiedTime(), curTime, lastModified, changed); + validateFetchInterval(changed, p.getFetchInterval()); + p.setFetchInterval(interval); + + changed = FetchSchedule.STATUS_NOTMODIFIED; + fs.setFetchSchedule(url, p, p.getFetchTime(), + p.getModifiedTime(), curTime, lastModified, changed); + validateFetchInterval(changed, p.getFetchInterval()); + + } + + /** + * Prepare a CrawlDatum (STATUS_DB_UNFETCHED) to Test AdaptiveFetchSchedule. + * + * @return properly initialized CrawlDatum + */ + public CrawlDatum prepareCrawlDatum() { + CrawlDatum p = new CrawlDatum(); + p.setStatus(CrawlDatum.STATUS_DB_UNFETCHED); + p.setFetchInterval(interval); + p.setScore(1.0f); + p.setFetchTime(0); + return p; + } + + /** + * + * The Method validates interval values according to changed parameter. + * + * @param changed + * status value to check calculated interval value. + * @param getInterval + * to test IntervalValue from CrawlDatum which is calculated via + * AdaptiveFetchSchedule algorithm. + */ + private void validateFetchInterval(int changed, int getInterval) { + + if (changed == FetchSchedule.STATUS_UNKNOWN) { + assertEquals(getInterval, interval); + + } else if (changed == FetchSchedule.STATUS_MODIFIED) { + calculateInterval = (int) (interval - (interval * dec_rate)); + assertEquals(getInterval, calculateInterval); + + } else if (changed == FetchSchedule.STATUS_NOTMODIFIED) { + calculateInterval = (int) (interval + (interval * inc_rate)); + assertEquals(getInterval, calculateInterval); + } + + } + +} Propchange: nutch/trunk/src/test/org/apache/nutch/crawl/TestAdaptiveFetchSchedule.java ------------------------------------------------------------------------------ svn:eol-style = native