Modified: nutch/trunk/src/test/org/apache/nutch/crawl/TestCrawlDbStates.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/test/org/apache/nutch/crawl/TestCrawlDbStates.java?rev=1655526&r1=1655525&r2=1655526&view=diff ============================================================================== --- nutch/trunk/src/test/org/apache/nutch/crawl/TestCrawlDbStates.java (original) +++ nutch/trunk/src/test/org/apache/nutch/crawl/TestCrawlDbStates.java Thu Jan 29 05:38:59 2015 @@ -55,26 +55,26 @@ import org.slf4j.LoggerFactory; * <li>configuration properties</li> * <li>(additional) CrawlDatums of status linked (stemming from inlinks)</li> * </ul> - * </li> - * </ul> + * </li> </ul> */ public class TestCrawlDbStates { - private static final Logger LOG = LoggerFactory.getLogger(TestCrawlDbStates.class); + private static final Logger LOG = LoggerFactory + .getLogger(TestCrawlDbStates.class); protected static final byte[][] fetchDbStatusPairs = { - { -1, STATUS_DB_UNFETCHED }, - { STATUS_FETCH_SUCCESS, STATUS_DB_FETCHED }, - { STATUS_FETCH_GONE, STATUS_DB_GONE }, - { STATUS_FETCH_REDIR_TEMP, STATUS_DB_REDIR_TEMP }, - { STATUS_FETCH_REDIR_PERM, STATUS_DB_REDIR_PERM }, + { -1, STATUS_DB_UNFETCHED }, { STATUS_FETCH_SUCCESS, STATUS_DB_FETCHED }, + { STATUS_FETCH_GONE, STATUS_DB_GONE }, + { STATUS_FETCH_REDIR_TEMP, STATUS_DB_REDIR_TEMP }, + { STATUS_FETCH_REDIR_PERM, STATUS_DB_REDIR_PERM }, { STATUS_FETCH_NOTMODIFIED, STATUS_DB_NOTMODIFIED }, - { STATUS_FETCH_RETRY, -1 }, // fetch_retry does not have a CrawlDb counter-part - { -1, STATUS_DB_DUPLICATE }, - }; + { STATUS_FETCH_RETRY, -1 }, // fetch_retry does not have a CrawlDb + // counter-part + { -1, STATUS_DB_DUPLICATE }, }; /** tested {@link FetchSchedule} implementations */ - protected String[] schedules = {"DefaultFetchSchedule", "AdaptiveFetchSchedule"}; + protected String[] schedules = { "DefaultFetchSchedule", + "AdaptiveFetchSchedule" }; /** CrawlDatum as result of a link */ protected final CrawlDatum linked = new CrawlDatum(STATUS_LINKED, @@ -101,7 +101,7 @@ public class TestCrawlDbStates { int retryMax = conf.getInt("db.fetch.retry.max", 3); for (String sched : schedules) { LOG.info("Testing state transitions with " + sched); - conf.set("db.fetch.schedule.class", "org.apache.nutch.crawl."+sched); + conf.set("db.fetch.schedule.class", "org.apache.nutch.crawl." + sched); FetchSchedule schedule = FetchScheduleFactory .getFetchSchedule(new JobConf(conf)); for (int i = 0; i < fetchDbStatusPairs.length; i++) { @@ -138,8 +138,8 @@ public class TestCrawlDbStates { } String fromDbStatusName = (fromDbStatus == -1 ? "<not in CrawlDb>" : getStatusName(fromDbStatus)); - String fetchStatusName = (fetchStatus == -1 ? "<only inlinks>" : CrawlDatum - .getStatusName(fetchStatus)); + String fetchStatusName = (fetchStatus == -1 ? "<only inlinks>" + : CrawlDatum.getStatusName(fetchStatus)); LOG.info(fromDbStatusName + " + " + fetchStatusName + " => " + getStatusName(toDbStatus)); List<CrawlDatum> values = new ArrayList<CrawlDatum>(); @@ -147,7 +147,8 @@ public class TestCrawlDbStates { CrawlDatum fetch = null; if (fetchStatus == -1) { // nothing fetched, need at least one in-link - if (l == 0) continue; + if (l == 0) + continue; } else { fetch = new CrawlDatum(); if (fromDb != null) { @@ -183,7 +184,7 @@ public class TestCrawlDbStates { } } } - } + } /** * Test states after inject: inject must not modify the status of CrawlDatums @@ -199,7 +200,7 @@ public class TestCrawlDbStates { ScoringFilters scfilters = new ScoringFilters(conf); for (String sched : schedules) { LOG.info("Testing inject with " + sched); - conf.set("db.fetch.schedule.class", "org.apache.nutch.crawl."+sched); + conf.set("db.fetch.schedule.class", "org.apache.nutch.crawl." + sched); FetchSchedule schedule = FetchScheduleFactory .getFetchSchedule(new JobConf(conf)); List<CrawlDatum> values = new ArrayList<CrawlDatum>(); @@ -219,8 +220,8 @@ public class TestCrawlDbStates { .getStatusName(fromDbStatus)) + " + " + getStatusName(STATUS_INJECTED) + " => " + getStatusName(toDbStatus)); - CrawlDatum injected = new CrawlDatum(STATUS_INJECTED, - conf.getInt("db.fetch.interval.default", 2592000), 0.1f); + CrawlDatum injected = new CrawlDatum(STATUS_INJECTED, conf.getInt( + "db.fetch.interval.default", 2592000), 0.1f); schedule.initializeSchedule(CrawlDbUpdateUtil.dummyURL, injected); try { scfilters.injectedScore(CrawlDbUpdateUtil.dummyURL, injected); @@ -236,10 +237,10 @@ public class TestCrawlDbStates { byte status = res.get(0).getStatus(); if (status != toDbStatus) { fail("Inject for " - + (fromDbStatus == -1 ? "" : getStatusName(fromDbStatus) + " and ") - + getStatusName(STATUS_INJECTED) - + " results in " + getStatusName(status) - + " (expected: " + getStatusName(toDbStatus) + ")"); + + (fromDbStatus == -1 ? "" : getStatusName(fromDbStatus) + + " and ") + getStatusName(STATUS_INJECTED) + " results in " + + getStatusName(status) + " (expected: " + + getStatusName(toDbStatus) + ")"); } values.clear(); } @@ -258,8 +259,7 @@ public class TestCrawlDbStates { * <li>modified time is set</li> * <li>re-fetch is triggered after a certain time to force the fetched content * to be in a recent segment (old segments are deleted, see comments in - * {@link CrawlDbReducer#reduce(Text, Iterator, OutputCollector, Reporter)} - * </li> + * {@link CrawlDbReducer#reduce(Text, Iterator, OutputCollector, Reporter)}</li> * </ul> */ @Test @@ -270,7 +270,7 @@ public class TestCrawlDbStates { for (String sched : schedules) { String desc = "test notmodified by signature comparison + " + sched; LOG.info(desc); - conf.set("db.fetch.schedule.class", "org.apache.nutch.crawl."+sched); + conf.set("db.fetch.schedule.class", "org.apache.nutch.crawl." + sched); ContinuousCrawlTestUtil crawlUtil = new CrawlTestFetchNotModified(conf); if (!crawlUtil.run(20)) { fail("failed: " + desc); @@ -280,8 +280,9 @@ public class TestCrawlDbStates { for (String sched : schedules) { String desc = "test notmodified by HTTP 304 + " + sched; LOG.info(desc); - conf.set("db.fetch.schedule.class", "org.apache.nutch.crawl."+sched); - ContinuousCrawlTestUtil crawlUtil = new CrawlTestFetchNotModifiedHttp304(conf); + conf.set("db.fetch.schedule.class", "org.apache.nutch.crawl." + sched); + ContinuousCrawlTestUtil crawlUtil = new CrawlTestFetchNotModifiedHttp304( + conf); if (!crawlUtil.run(20)) { fail("failed: " + desc); } @@ -294,7 +295,9 @@ public class TestCrawlDbStates { protected long currFetchTime; /** time the last fetch took place */ protected long lastFetchTime; - /** time the document was fetched first (at all or after it has been changed) */ + /** + * time the document was fetched first (at all or after it has been changed) + */ protected long firstFetchTime; /** state in CrawlDb before the last fetch */ protected byte previousDbState; @@ -304,18 +307,21 @@ public class TestCrawlDbStates { private long maxFetchInterval; private FetchSchedule schedule; - CrawlTestFetchNotModified(Configuration conf) { super(conf); - maxFetchInterval = conf.getLong("db.fetch.interval.max", 7776000); // default = 90 days - maxFetchInterval += (24*60*60); // but take one day more to avoid false alarms - maxFetchInterval *= 1000; // in milli-seconds + maxFetchInterval = conf.getLong("db.fetch.interval.max", 7776000); // default + // = 90 + // days + maxFetchInterval += (24 * 60 * 60); // but take one day more to avoid + // false alarms + maxFetchInterval *= 1000; // in milli-seconds schedule = FetchScheduleFactory.getFetchSchedule(new JobConf(conf)); } @Override protected boolean check(CrawlDatum result) { - if (lastFetchTime > 0 && (currFetchTime - lastFetchTime) > maxFetchInterval) { + if (lastFetchTime > 0 + && (currFetchTime - lastFetchTime) > maxFetchInterval) { LOG.error("last effective fetch (HTTP 200, not HTTP 304), at " + new Date(lastFetchTime) + ", took place more than db.fetch.interval.max time, " @@ -377,7 +383,6 @@ public class TestCrawlDbStates { return false; } - // test modified time private boolean checkModifiedTime(CrawlDatum result, long modifiedTime) { if (result.getModifiedTime() == 0) { @@ -402,7 +407,7 @@ public class TestCrawlDbStates { datum = super.fetch(datum, currentTime); if (firstFetchTime == 0) { firstFetchTime = currFetchTime; - } else if ((currFetchTime - firstFetchTime) > (duration/2)) { + } else if ((currFetchTime - firstFetchTime) > (duration / 2)) { // simulate a modification after "one year" changeContent(); firstFetchTime = currFetchTime; @@ -411,7 +416,8 @@ public class TestCrawlDbStates { } } - protected class CrawlTestFetchNotModifiedHttp304 extends CrawlTestFetchNotModified { + protected class CrawlTestFetchNotModifiedHttp304 extends + CrawlTestFetchNotModified { CrawlTestFetchNotModifiedHttp304(Configuration conf) { super(conf); @@ -424,13 +430,13 @@ public class TestCrawlDbStates { previousDbState = datum.getStatus(); lastSignature = datum.getSignature(); int httpCode; - /* document is "really" fetched (no HTTP 304) - * - if last-modified time or signature are unset - * (page has not been fetched before or fetch is forced) - * - for test purposes, we simulate a modified after "one year" + /* + * document is "really" fetched (no HTTP 304) - if last-modified time or + * signature are unset (page has not been fetched before or fetch is + * forced) - for test purposes, we simulate a modified after "one year" */ if (datum.getModifiedTime() == 0 && datum.getSignature() == null - || (currFetchTime - firstFetchTime) > (duration/2)) { + || (currFetchTime - firstFetchTime) > (duration / 2)) { firstFetchTime = currFetchTime; httpCode = 200; datum.setStatus(STATUS_FETCH_SUCCESS); @@ -450,8 +456,8 @@ public class TestCrawlDbStates { /** * NUTCH-1245: a fetch_gone should always result in a db_gone. * <p> - * Even in a long-running continuous crawl, when a gone page is - * re-fetched several times over time. + * Even in a long-running continuous crawl, when a gone page is re-fetched + * several times over time. * </p> */ @Test @@ -476,8 +482,7 @@ public class TestCrawlDbStates { LOG.info("NUTCH-1245 (misconfiguration): test with db.fetch.interval.default > (1.5 * db.fetch.interval.max)"); Configuration conf = CrawlDBTestUtil.createConfiguration(); int fetchIntervalMax = conf.getInt("db.fetch.interval.max", 0); - conf.setInt("db.fetch.interval.default", - 3 + (int) (fetchIntervalMax * 1.5)); + conf.setInt("db.fetch.interval.default", 3 + (int) (fetchIntervalMax * 1.5)); ContinuousCrawlTestUtil crawlUtil = new ContinuousCrawlTestUtil(conf, STATUS_FETCH_GONE, STATUS_DB_GONE); if (!crawlUtil.run(0)) { @@ -485,14 +490,12 @@ public class TestCrawlDbStates { } } - /** - * Test whether signatures are reset for "content-less" states - * (gone, redirect, etc.): otherwise, if this state is temporary - * and the document appears again with the old content, it may - * get marked as not_modified in CrawlDb just after the redirect - * state. In this case we cannot expect content in segments. - * Cf. NUTCH-1422: reset signature for redirects. + * Test whether signatures are reset for "content-less" states (gone, + * redirect, etc.): otherwise, if this state is temporary and the document + * appears again with the old content, it may get marked as not_modified in + * CrawlDb just after the redirect state. In this case we cannot expect + * content in segments. Cf. NUTCH-1422: reset signature for redirects. */ // TODO: can only test if solution is done in CrawlDbReducer @Test @@ -501,7 +504,7 @@ public class TestCrawlDbStates { Configuration conf = CrawlDBTestUtil.createConfiguration(); for (String sched : schedules) { LOG.info("Testing reset signature with " + sched); - conf.set("db.fetch.schedule.class", "org.apache.nutch.crawl."+sched); + conf.set("db.fetch.schedule.class", "org.apache.nutch.crawl." + sched); ContinuousCrawlTestUtil crawlUtil = new CrawlTestSignatureReset(conf); if (!crawlUtil.run(20)) { fail("failed: signature not reset"); @@ -511,8 +514,7 @@ public class TestCrawlDbStates { private class CrawlTestSignatureReset extends ContinuousCrawlTestUtil { - byte[][] noContentStates = { - { STATUS_FETCH_GONE, STATUS_DB_GONE }, + byte[][] noContentStates = { { STATUS_FETCH_GONE, STATUS_DB_GONE }, { STATUS_FETCH_REDIR_TEMP, STATUS_DB_REDIR_TEMP }, { STATUS_FETCH_REDIR_PERM, STATUS_DB_REDIR_PERM } }; @@ -528,15 +530,15 @@ public class TestCrawlDbStates { datum = super.fetch(datum, currentTime); counter++; // flip-flopping between successful fetch and one of content-less states - if (counter%2 == 1) { + if (counter % 2 == 1) { fetchState = STATUS_FETCH_SUCCESS; } else { - fetchState = noContentStates[(counter%6)/2][0]; + fetchState = noContentStates[(counter % 6) / 2][0]; } LOG.info("Step " + counter + ": fetched with " + getStatusName(fetchState)); datum.setStatus(fetchState); - return datum; + return datum; } @Override @@ -560,6 +562,4 @@ public class TestCrawlDbStates { } - } -
Modified: nutch/trunk/src/test/org/apache/nutch/crawl/TestGenerator.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/test/org/apache/nutch/crawl/TestGenerator.java?rev=1655526&r1=1655525&r2=1655526&view=diff ============================================================================== --- nutch/trunk/src/test/org/apache/nutch/crawl/TestGenerator.java (original) +++ nutch/trunk/src/test/org/apache/nutch/crawl/TestGenerator.java Thu Jan 29 05:38:59 2015 @@ -36,7 +36,7 @@ import org.junit.Test; * Basic generator test. 1. Insert entries in crawldb 2. Generates entries to * fetch 3. Verifies that number of generated urls match 4. Verifies that * highest scoring urls are generated - * + * */ public class TestGenerator { @@ -71,7 +71,7 @@ public class TestGenerator { /** * Test that generator generates fetchlish ordered by score (desc). - * + * * @throws Exception */ @Test @@ -82,8 +82,7 @@ public class TestGenerator { ArrayList<URLCrawlDatum> list = new ArrayList<URLCrawlDatum>(); for (int i = 0; i <= 100; i++) { - list.add(createURLCrawlDatum("http://aaa/" + pad(i), - 1, i)); + list.add(createURLCrawlDatum("http://aaa/" + pad(i), 1, i)); } createCrawlDB(list); @@ -94,7 +93,7 @@ public class TestGenerator { CrawlDatum.GENERATE_DIR_NAME), "part-00000"); ArrayList<URLCrawlDatum> l = readContents(fetchlist); - + // sort urls by score desc Collections.sort(l, new ScoreComparator()); @@ -132,18 +131,16 @@ public class TestGenerator { /** * Test that generator obeys the property "generate.max.per.host". - * @throws Exception + * + * @throws Exception */ @Test - public void testGenerateHostLimit() throws Exception{ + public void testGenerateHostLimit() throws Exception { ArrayList<URLCrawlDatum> list = new ArrayList<URLCrawlDatum>(); - list.add(createURLCrawlDatum("http://www.example.com/index1.html", - 1, 1)); - list.add(createURLCrawlDatum("http://www.example.com/index2.html", - 1, 1)); - list.add(createURLCrawlDatum("http://www.example.com/index3.html", - 1, 1)); + list.add(createURLCrawlDatum("http://www.example.com/index1.html", 1, 1)); + list.add(createURLCrawlDatum("http://www.example.com/index2.html", 1, 1)); + list.add(createURLCrawlDatum("http://www.example.com/index3.html", 1, 1)); createCrawlDB(list); @@ -190,10 +187,11 @@ public class TestGenerator { /** * Test that generator obeys the property "generator.max.count" and * "generator.count.per.domain". - * @throws Exception + * + * @throws Exception */ @Test - public void testGenerateDomainLimit() throws Exception{ + public void testGenerateDomainLimit() throws Exception { ArrayList<URLCrawlDatum> list = new ArrayList<URLCrawlDatum>(); list.add(createURLCrawlDatum("http://a.example.com/index.html", 1, 1)); @@ -204,7 +202,8 @@ public class TestGenerator { Configuration myConfiguration = new Configuration(conf); myConfiguration.setInt(Generator.GENERATOR_MAX_COUNT, 2); - myConfiguration.set(Generator.GENERATOR_COUNT_MODE, Generator.GENERATOR_COUNT_VALUE_DOMAIN); + myConfiguration.set(Generator.GENERATOR_COUNT_MODE, + Generator.GENERATOR_COUNT_VALUE_DOMAIN); Path generatedSegment = generateFetchlist(Integer.MAX_VALUE, myConfiguration, false); @@ -219,7 +218,8 @@ public class TestGenerator { myConfiguration = new Configuration(myConfiguration); myConfiguration.setInt(Generator.GENERATOR_MAX_COUNT, 3); - generatedSegment = generateFetchlist(Integer.MAX_VALUE, myConfiguration, false); + generatedSegment = generateFetchlist(Integer.MAX_VALUE, myConfiguration, + false); fetchlistPath = new Path(new Path(generatedSegment, CrawlDatum.GENERATE_DIR_NAME), "part-00000"); @@ -245,11 +245,12 @@ public class TestGenerator { /** * Test generator obeys the filter setting. - * @throws Exception - * @throws IOException + * + * @throws Exception + * @throws IOException */ @Test - public void testFilter() throws IOException, Exception{ + public void testFilter() throws IOException, Exception { ArrayList<URLCrawlDatum> list = new ArrayList<URLCrawlDatum>(); @@ -267,7 +268,8 @@ public class TestGenerator { Assert.assertNull("should be null (0 entries)", generatedSegment); - generatedSegment = generateFetchlist(Integer.MAX_VALUE, myConfiguration, false); + generatedSegment = generateFetchlist(Integer.MAX_VALUE, myConfiguration, + false); Path fetchlistPath = new Path(new Path(generatedSegment, CrawlDatum.GENERATE_DIR_NAME), "part-00000"); @@ -279,14 +281,16 @@ public class TestGenerator { } - /** * Read contents of fetchlist. - * @param fetchlist path to Generated fetchlist + * + * @param fetchlist + * path to Generated fetchlist * @return Generated {@link URLCrawlDatum} objects * @throws IOException */ - private ArrayList<URLCrawlDatum> readContents(Path fetchlist) throws IOException { + private ArrayList<URLCrawlDatum> readContents(Path fetchlist) + throws IOException { // verify results SequenceFile.Reader reader = new SequenceFile.Reader(fs, fetchlist, conf); @@ -307,8 +311,11 @@ public class TestGenerator { /** * Generate Fetchlist. - * @param numResults number of results to generate - * @param config Configuration to use + * + * @param numResults + * number of results to generate + * @param config + * Configuration to use * @return path to generated segment * @throws IOException */ @@ -318,14 +325,16 @@ public class TestGenerator { Generator g = new Generator(config); Path[] generatedSegment = g.generate(dbDir, segmentsDir, -1, numResults, Long.MAX_VALUE, filter, false); - if (generatedSegment==null) return null; + if (generatedSegment == null) + return null; return generatedSegment[0]; } /** * Creates CrawlDB. - * - * @param list database contents + * + * @param list + * database contents * @throws IOException * @throws Exception */ @@ -342,9 +351,13 @@ public class TestGenerator { /** * Constructs new {@link URLCrawlDatum} from submitted parameters. - * @param url url to use - * @param fetchInterval {@link CrawlDatum#setFetchInterval(float)} - * @param score {@link CrawlDatum#setScore(float)} + * + * @param url + * url to use + * @param fetchInterval + * {@link CrawlDatum#setFetchInterval(float)} + * @param score + * {@link CrawlDatum#setScore(float)} * @return Constructed object */ private URLCrawlDatum createURLCrawlDatum(final String url, Modified: nutch/trunk/src/test/org/apache/nutch/crawl/TestInjector.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/test/org/apache/nutch/crawl/TestInjector.java?rev=1655526&r1=1655525&r2=1655526&view=diff ============================================================================== --- nutch/trunk/src/test/org/apache/nutch/crawl/TestInjector.java (original) +++ nutch/trunk/src/test/org/apache/nutch/crawl/TestInjector.java Thu Jan 29 05:38:59 2015 @@ -34,138 +34,143 @@ import org.junit.Before; import org.junit.Test; /** - * Basic injector test: - * 1. Creates a text file with urls - * 2. Injects them into crawldb - * 3. Reads crawldb entries and verifies contents - * 4. Injects more urls into webdb - * 5. Reads crawldb entries and verifies contents + * Basic injector test: 1. Creates a text file with urls 2. Injects them into + * crawldb 3. Reads crawldb entries and verifies contents 4. Injects more urls + * into webdb 5. Reads crawldb entries and verifies contents * */ public class TestInjector { private Configuration conf; private FileSystem fs; - final static Path testdir=new Path("build/test/inject-test"); + final static Path testdir = new Path("build/test/inject-test"); Path crawldbPath; Path urlPath; - + @Before public void setUp() throws Exception { conf = CrawlDBTestUtil.createConfiguration(); - urlPath=new Path(testdir,"urls"); - crawldbPath=new Path(testdir,"crawldb"); - fs=FileSystem.get(conf); - if (fs.exists(urlPath)) fs.delete(urlPath, false); - if (fs.exists(crawldbPath)) fs.delete(crawldbPath, true); + urlPath = new Path(testdir, "urls"); + crawldbPath = new Path(testdir, "crawldb"); + fs = FileSystem.get(conf); + if (fs.exists(urlPath)) + fs.delete(urlPath, false); + if (fs.exists(crawldbPath)) + fs.delete(crawldbPath, true); } - + @After - public void tearDown() throws IOException{ + public void tearDown() throws IOException { fs.delete(testdir, true); } @Test public void testInject() throws IOException { - ArrayList<String> urls=new ArrayList<String>(); - // We'll use a separate list for MD so we can still compare url with containsAll - ArrayList<String> metadata=new ArrayList<String>(); - for(int i=0;i<100;i++) { + ArrayList<String> urls = new ArrayList<String>(); + // We'll use a separate list for MD so we can still compare url with + // containsAll + ArrayList<String> metadata = new ArrayList<String>(); + for (int i = 0; i < 100; i++) { urls.add("http://zzz.com/" + i + ".html"); - metadata.add("\tnutch.score=2." + i + "\tnutch.fetchInterval=171717\tkey=value"); + metadata.add("\tnutch.score=2." + i + + "\tnutch.fetchInterval=171717\tkey=value"); } CrawlDBTestUtil.generateSeedList(fs, urlPath, urls, metadata); - - Injector injector=new Injector(conf); + + Injector injector = new Injector(conf); injector.inject(crawldbPath, urlPath); - + // verify results - List<String>read=readCrawldb(); - + List<String> read = readCrawldb(); + Collections.sort(read); Collections.sort(urls); Assert.assertEquals(urls.size(), read.size()); - + Assert.assertTrue(read.containsAll(urls)); Assert.assertTrue(urls.containsAll(read)); - - //inject more urls - ArrayList<String> urls2=new ArrayList<String>(); - for(int i=0;i<100;i++) { + + // inject more urls + ArrayList<String> urls2 = new ArrayList<String>(); + for (int i = 0; i < 100; i++) { urls2.add("http://xxx.com/" + i + ".html"); - // We'll overwrite previously injected records but preserve their original MD + // We'll overwrite previously injected records but preserve their original + // MD urls2.add("http://zzz.com/" + i + ".html"); } CrawlDBTestUtil.generateSeedList(fs, urlPath, urls2); - injector=new Injector(conf); + injector = new Injector(conf); conf.setBoolean("db.injector.update", true); injector.inject(crawldbPath, urlPath); urls.addAll(urls2); - + // verify results - read=readCrawldb(); + read = readCrawldb(); Collections.sort(read); Collections.sort(urls); // We should have 100 less records because we've overwritten Assert.assertEquals(urls.size() - 100, read.size()); - + Assert.assertTrue(read.containsAll(urls)); Assert.assertTrue(urls.containsAll(read)); - + // Check if we correctly preserved MD Map<String, CrawlDatum> records = readCrawldbRecords(); - + // Iterate over the urls, we're looking for http://zzz.com/ prefixed URLs // so we can check for MD and score and interval Text writableKey = new Text("key"); Text writableValue = new Text("value"); for (String url : urls) { - if (url.indexOf("http://zzz") == 0) { + if (url.indexOf("http://zzz") == 0) { // Check for fetch interval Assert.assertTrue(records.get(url).getFetchInterval() == 171717); // Check for default score Assert.assertTrue(records.get(url).getScore() != 1.0); // Check for MD key=value - Assert.assertEquals(writableValue, records.get(url).getMetaData().get(writableKey)); + Assert.assertEquals(writableValue, + records.get(url).getMetaData().get(writableKey)); } } } - - private List<String> readCrawldb() throws IOException{ - Path dbfile=new Path(crawldbPath,CrawlDb.CURRENT_NAME + "/part-00000/data"); + + private List<String> readCrawldb() throws IOException { + Path dbfile = new Path(crawldbPath, CrawlDb.CURRENT_NAME + + "/part-00000/data"); System.out.println("reading:" + dbfile); @SuppressWarnings("resource") - SequenceFile.Reader reader=new SequenceFile.Reader(fs, dbfile, conf); - ArrayList<String> read=new ArrayList<String>(); - - READ: - do { - Text key=new Text(); - CrawlDatum value=new CrawlDatum(); - if(!reader.next(key, value)) break READ; + SequenceFile.Reader reader = new SequenceFile.Reader(fs, dbfile, conf); + ArrayList<String> read = new ArrayList<String>(); + + READ: do { + Text key = new Text(); + CrawlDatum value = new CrawlDatum(); + if (!reader.next(key, value)) + break READ; read.add(key.toString()); - } while(true); + } while (true); return read; } - - private HashMap<String,CrawlDatum> readCrawldbRecords() throws IOException{ - Path dbfile=new Path(crawldbPath,CrawlDb.CURRENT_NAME + "/part-00000/data"); + + private HashMap<String, CrawlDatum> readCrawldbRecords() throws IOException { + Path dbfile = new Path(crawldbPath, CrawlDb.CURRENT_NAME + + "/part-00000/data"); System.out.println("reading:" + dbfile); @SuppressWarnings("resource") - SequenceFile.Reader reader=new SequenceFile.Reader(fs, dbfile, conf); - HashMap<String,CrawlDatum> read=new HashMap<String,CrawlDatum>(); - - READ: - do { - Text key=new Text(); - CrawlDatum value=new CrawlDatum(); - if(!reader.next(key, value)) break READ; + SequenceFile.Reader reader = new SequenceFile.Reader(fs, dbfile, conf); + HashMap<String, CrawlDatum> read = new HashMap<String, CrawlDatum>(); + + READ: do { + Text key = new Text(); + CrawlDatum value = new CrawlDatum(); + if (!reader.next(key, value)) + break READ; read.put(key.toString(), value); - } while(true); + } while (true); return read; } Modified: nutch/trunk/src/test/org/apache/nutch/crawl/TestLinkDbMerger.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/test/org/apache/nutch/crawl/TestLinkDbMerger.java?rev=1655526&r1=1655525&r2=1655526&view=diff ============================================================================== --- nutch/trunk/src/test/org/apache/nutch/crawl/TestLinkDbMerger.java (original) +++ nutch/trunk/src/test/org/apache/nutch/crawl/TestLinkDbMerger.java Thu Jan 29 05:38:59 2015 @@ -35,41 +35,28 @@ import org.junit.Before; import org.junit.Test; public class TestLinkDbMerger { - private static final Logger LOG = Logger.getLogger(TestLinkDbMerger.class.getName()); - + private static final Logger LOG = Logger.getLogger(TestLinkDbMerger.class + .getName()); + String url10 = "http://example.com/foo"; - String[] urls10 = new String[] { - "http://example.com/100", - "http://example.com/101" - }; + String[] urls10 = new String[] { "http://example.com/100", + "http://example.com/101" }; String url11 = "http://example.com/"; - String[] urls11 = new String[] { - "http://example.com/110", - "http://example.com/111" - }; - + String[] urls11 = new String[] { "http://example.com/110", + "http://example.com/111" }; + String url20 = "http://example.com/"; - String[] urls20 = new String[] { - "http://foo.com/200", - "http://foo.com/201" - }; + String[] urls20 = new String[] { "http://foo.com/200", "http://foo.com/201" }; String url21 = "http://example.com/bar"; - String[] urls21 = new String[] { - "http://foo.com/210", - "http://foo.com/211" - }; - + String[] urls21 = new String[] { "http://foo.com/210", "http://foo.com/211" }; + String[] urls10_expected = urls10; - String[] urls11_expected = new String[] { - urls11[0], - urls11[1], - urls20[0], - urls20[1] - }; + String[] urls11_expected = new String[] { urls11[0], urls11[1], urls20[0], + urls20[1] }; String[] urls20_expected = urls11_expected; String[] urls21_expected = urls21; - + TreeMap<String, String[]> init1 = new TreeMap<String, String[]>(); TreeMap<String, String[]> init2 = new TreeMap<String, String[]>(); HashMap<String, String[]> expected = new HashMap<String, String[]>(); @@ -77,7 +64,7 @@ public class TestLinkDbMerger { Path testDir; FileSystem fs; LinkDbReader reader; - + @Before public void setUp() throws Exception { init1.put(url10, urls10); @@ -90,20 +77,22 @@ public class TestLinkDbMerger { expected.put(url21, urls21_expected); conf = NutchConfiguration.create(); fs = FileSystem.get(conf); - testDir = new Path("build/test/test-linkdb-" + - new java.util.Random().nextInt()); + testDir = new Path("build/test/test-linkdb-" + + new java.util.Random().nextInt()); fs.mkdirs(testDir); } - + @After public void tearDown() { try { if (fs.exists(testDir)) fs.delete(testDir, true); - } catch (Exception e) { } + } catch (Exception e) { + } try { reader.close(); - } catch (Exception e) { } + } catch (Exception e) { + } } @Test @@ -118,7 +107,7 @@ public class TestLinkDbMerger { createLinkDb(conf, fs, linkdb2, init2); LinkDbMerger merger = new LinkDbMerger(conf); LOG.fine("* merging linkdbs to " + output); - merger.merge(output, new Path[]{linkdb1, linkdb2}, false, false); + merger.merge(output, new Path[] { linkdb1, linkdb2 }, false, false); LOG.fine("* reading linkdb: " + output); reader = new LinkDbReader(conf, output); Iterator<String> it = expected.keySet().iterator(); @@ -132,7 +121,7 @@ public class TestLinkDbMerger { ArrayList<String> links = new ArrayList<String>(); Iterator<?> it2 = inlinks.iterator(); while (it2.hasNext()) { - Inlink in = (Inlink)it2.next(); + Inlink in = (Inlink) it2.next(); links.add(in.getFromUrl()); } for (int i = 0; i < vals.length; i++) { @@ -143,11 +132,13 @@ public class TestLinkDbMerger { reader.close(); fs.delete(testDir, true); } - - private void createLinkDb(Configuration config, FileSystem fs, Path linkdb, TreeMap<String, String[]> init) throws Exception { + + private void createLinkDb(Configuration config, FileSystem fs, Path linkdb, + TreeMap<String, String[]> init) throws Exception { LOG.fine("* creating linkdb: " + linkdb); Path dir = new Path(linkdb, LinkDb.CURRENT_NAME); - MapFile.Writer writer = new MapFile.Writer(config, fs, new Path(dir, "part-00000").toString(), Text.class, Inlinks.class); + MapFile.Writer writer = new MapFile.Writer(config, fs, new Path(dir, + "part-00000").toString(), Text.class, Inlinks.class); Iterator<String> it = init.keySet().iterator(); while (it.hasNext()) { String key = it.next(); Modified: nutch/trunk/src/test/org/apache/nutch/crawl/TestSignatureFactory.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/test/org/apache/nutch/crawl/TestSignatureFactory.java?rev=1655526&r1=1655525&r2=1655526&view=diff ============================================================================== --- nutch/trunk/src/test/org/apache/nutch/crawl/TestSignatureFactory.java (original) +++ nutch/trunk/src/test/org/apache/nutch/crawl/TestSignatureFactory.java Thu Jan 29 05:38:59 2015 @@ -25,9 +25,9 @@ public class TestSignatureFactory { @Test public void testGetSignature() { - Configuration conf=NutchConfiguration.create(); - Signature signature1=SignatureFactory.getSignature(conf); - Signature signature2=SignatureFactory.getSignature(conf); + Configuration conf = NutchConfiguration.create(); + Signature signature1 = SignatureFactory.getSignature(conf); + Signature signature2 = SignatureFactory.getSignature(conf); Assert.assertNotNull(signature1); Assert.assertNotNull(signature2); Assert.assertEquals(signature1, signature2); Modified: nutch/trunk/src/test/org/apache/nutch/fetcher/TestFetcher.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/test/org/apache/nutch/fetcher/TestFetcher.java?rev=1655526&r1=1655525&r2=1655526&view=diff ============================================================================== --- nutch/trunk/src/test/org/apache/nutch/fetcher/TestFetcher.java (original) +++ nutch/trunk/src/test/org/apache/nutch/fetcher/TestFetcher.java Thu Jan 29 05:38:59 2015 @@ -39,17 +39,13 @@ import org.junit.Test; import org.mortbay.jetty.Server; /** - * Basic fetcher test - * 1. generate seedlist - * 2. inject - * 3. generate - * 3. fetch - * 4. Verify contents - * + * Basic fetcher test 1. generate seedlist 2. inject 3. generate 3. fetch 4. + * Verify contents + * */ public class TestFetcher { - final static Path testdir=new Path("build/test/fetch-test"); + final static Path testdir = new Path("build/test/fetch-test"); Configuration conf; FileSystem fs; Path crawldbPath; @@ -58,112 +54,118 @@ public class TestFetcher { Server server; @Before - public void setUp() throws Exception{ - conf=CrawlDBTestUtil.createConfiguration(); - fs=FileSystem.get(conf); + public void setUp() throws Exception { + conf = CrawlDBTestUtil.createConfiguration(); + fs = FileSystem.get(conf); fs.delete(testdir, true); - urlPath=new Path(testdir,"urls"); - crawldbPath=new Path(testdir,"crawldb"); - segmentsPath=new Path(testdir,"segments"); - server=CrawlDBTestUtil.getServer(conf.getInt("content.server.port",50000), "build/test/data/fetch-test-site"); + urlPath = new Path(testdir, "urls"); + crawldbPath = new Path(testdir, "crawldb"); + segmentsPath = new Path(testdir, "segments"); + server = CrawlDBTestUtil.getServer( + conf.getInt("content.server.port", 50000), + "build/test/data/fetch-test-site"); server.start(); } @After - public void tearDown() throws Exception{ + public void tearDown() throws Exception { server.stop(); fs.delete(testdir, true); } - + @Test public void testFetch() throws IOException { - - //generate seedlist - ArrayList<String> urls=new ArrayList<String>(); - - addUrl(urls,"index.html"); - addUrl(urls,"pagea.html"); - addUrl(urls,"pageb.html"); - addUrl(urls,"dup_of_pagea.html"); - addUrl(urls,"nested_spider_trap.html"); - addUrl(urls,"exception.html"); - + + // generate seedlist + ArrayList<String> urls = new ArrayList<String>(); + + addUrl(urls, "index.html"); + addUrl(urls, "pagea.html"); + addUrl(urls, "pageb.html"); + addUrl(urls, "dup_of_pagea.html"); + addUrl(urls, "nested_spider_trap.html"); + addUrl(urls, "exception.html"); + CrawlDBTestUtil.generateSeedList(fs, urlPath, urls); - - //inject - Injector injector=new Injector(conf); + + // inject + Injector injector = new Injector(conf); injector.inject(crawldbPath, urlPath); - //generate - Generator g=new Generator(conf); + // generate + Generator g = new Generator(conf); Path[] generatedSegment = g.generate(crawldbPath, segmentsPath, 1, Long.MAX_VALUE, Long.MAX_VALUE, false, false); - long time=System.currentTimeMillis(); - //fetch - Fetcher fetcher=new Fetcher(conf); + long time = System.currentTimeMillis(); + // fetch + Fetcher fetcher = new Fetcher(conf); // Set fetcher.parse to true conf.setBoolean("fetcher.parse", true); fetcher.fetch(generatedSegment[0], 1); - time=System.currentTimeMillis()-time; - - //verify politeness, time taken should be more than (num_of_pages +1)*delay - int minimumTime=(int) ((urls.size()+1)*1000*conf.getFloat("fetcher.server.delay",5)); + time = System.currentTimeMillis() - time; + + // verify politeness, time taken should be more than (num_of_pages +1)*delay + int minimumTime = (int) ((urls.size() + 1) * 1000 * conf.getFloat( + "fetcher.server.delay", 5)); Assert.assertTrue(time > minimumTime); - - //verify content - Path content=new Path(new Path(generatedSegment[0], Content.DIR_NAME),"part-00000/data"); + + // verify content + Path content = new Path(new Path(generatedSegment[0], Content.DIR_NAME), + "part-00000/data"); @SuppressWarnings("resource") - SequenceFile.Reader reader=new SequenceFile.Reader(fs, content, conf); - - ArrayList<String> handledurls=new ArrayList<String>(); - - READ_CONTENT: - do { - Text key=new Text(); - Content value=new Content(); - if(!reader.next(key, value)) break READ_CONTENT; - String contentString=new String(value.getContent()); - if(contentString.indexOf("Nutch fetcher test page")!=-1) { + SequenceFile.Reader reader = new SequenceFile.Reader(fs, content, conf); + + ArrayList<String> handledurls = new ArrayList<String>(); + + READ_CONTENT: do { + Text key = new Text(); + Content value = new Content(); + if (!reader.next(key, value)) + break READ_CONTENT; + String contentString = new String(value.getContent()); + if (contentString.indexOf("Nutch fetcher test page") != -1) { handledurls.add(key.toString()); } - } while(true); + } while (true); reader.close(); Collections.sort(urls); Collections.sort(handledurls); - //verify that enough pages were handled + // verify that enough pages were handled Assert.assertEquals(urls.size(), handledurls.size()); - //verify that correct pages were handled + // verify that correct pages were handled Assert.assertTrue(handledurls.containsAll(urls)); Assert.assertTrue(urls.containsAll(handledurls)); - + handledurls.clear(); - //verify parse data - Path parseData = new Path(new Path(generatedSegment[0], ParseData.DIR_NAME),"part-00000/data"); + // verify parse data + Path parseData = new Path( + new Path(generatedSegment[0], ParseData.DIR_NAME), "part-00000/data"); reader = new SequenceFile.Reader(fs, parseData, conf); - - READ_PARSE_DATA: - do { + + READ_PARSE_DATA: do { Text key = new Text(); ParseData value = new ParseData(); - if(!reader.next(key, value)) break READ_PARSE_DATA; - // make sure they all contain "nutch.segment.name" and "nutch.content.digest" + if (!reader.next(key, value)) + break READ_PARSE_DATA; + // make sure they all contain "nutch.segment.name" and + // "nutch.content.digest" // keys in parse metadata Metadata contentMeta = value.getContentMeta(); - if (contentMeta.get(Nutch.SEGMENT_NAME_KEY) != null - && contentMeta.get(Nutch.SIGNATURE_KEY) != null) { + if (contentMeta.get(Nutch.SEGMENT_NAME_KEY) != null + && contentMeta.get(Nutch.SIGNATURE_KEY) != null) { handledurls.add(key.toString()); } - } while(true); - + } while (true); + Collections.sort(handledurls); Assert.assertEquals(urls.size(), handledurls.size()); @@ -173,9 +175,10 @@ public class TestFetcher { } private void addUrl(ArrayList<String> urls, String page) { - urls.add("http://127.0.0.1:" + server.getConnectors()[0].getPort() + "/" + page); + urls.add("http://127.0.0.1:" + server.getConnectors()[0].getPort() + "/" + + page); } - + @Test public void testAgentNameCheck() { Modified: nutch/trunk/src/test/org/apache/nutch/indexer/TestIndexingFilters.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/test/org/apache/nutch/indexer/TestIndexingFilters.java?rev=1655526&r1=1655525&r2=1655526&view=diff ============================================================================== --- nutch/trunk/src/test/org/apache/nutch/indexer/TestIndexingFilters.java (original) +++ nutch/trunk/src/test/org/apache/nutch/indexer/TestIndexingFilters.java Thu Jan 29 05:38:59 2015 @@ -33,13 +33,14 @@ public class TestIndexingFilters { /** * Test behaviour when defined filter does not exist. + * * @throws IndexingException */ @Test public void testNonExistingIndexingFilter() throws IndexingException { Configuration conf = NutchConfiguration.create(); - conf.addResource("nutch-default.xml"); - conf.addResource("crawl-tests.xml"); + conf.addResource("nutch-default.xml"); + conf.addResource("crawl-tests.xml"); String class1 = "NonExistingFilter"; String class2 = "org.apache.nutch.indexer.basic.BasicIndexingFilter"; @@ -47,34 +48,35 @@ public class TestIndexingFilters { IndexingFilters filters = new IndexingFilters(conf); filters.filter(new NutchDocument(), new ParseImpl("text", new ParseData( - new ParseStatus(), "title", new Outlink[0], new Metadata())), new Text( - "http://www.example.com/"), new CrawlDatum(), new Inlinks()); + new ParseStatus(), "title", new Outlink[0], new Metadata())), new Text( + "http://www.example.com/"), new CrawlDatum(), new Inlinks()); } /** * Test behaviour when NutchDOcument is null */ @Test - public void testNutchDocumentNullIndexingFilter() throws IndexingException{ + public void testNutchDocumentNullIndexingFilter() throws IndexingException { Configuration conf = NutchConfiguration.create(); conf.addResource("nutch-default.xml"); conf.addResource("crawl-tests.xml"); IndexingFilters filters = new IndexingFilters(conf); - NutchDocument doc = filters.filter(null, new ParseImpl("text", new ParseData( - new ParseStatus(), "title", new Outlink[0], new Metadata())), new Text( - "http://www.example.com/"), new CrawlDatum(), new Inlinks()); - + NutchDocument doc = filters.filter(null, new ParseImpl("text", + new ParseData(new ParseStatus(), "title", new Outlink[0], + new Metadata())), new Text("http://www.example.com/"), + new CrawlDatum(), new Inlinks()); + Assert.assertNull(doc); } /** * Test behaviour when reset the index filter order will not take effect - * + * * @throws IndexingException */ @Test - public void testFilterCacheIndexingFilter() throws IndexingException{ + public void testFilterCacheIndexingFilter() throws IndexingException { Configuration conf = NutchConfiguration.create(); conf.addResource("nutch-default.xml"); conf.addResource("crawl-tests.xml"); @@ -83,24 +85,26 @@ public class TestIndexingFilters { conf.set(IndexingFilters.INDEXINGFILTER_ORDER, class1); IndexingFilters filters1 = new IndexingFilters(conf); - NutchDocument fdoc1 = filters1.filter(new NutchDocument(),new ParseImpl("text",new ParseData( - new ParseStatus(),"title",new Outlink[0],new Metadata())),new Text("http://www.example.com/"), - new CrawlDatum(),new Inlinks()); + NutchDocument fdoc1 = filters1.filter(new NutchDocument(), new ParseImpl( + "text", new ParseData(new ParseStatus(), "title", new Outlink[0], + new Metadata())), new Text("http://www.example.com/"), + new CrawlDatum(), new Inlinks()); // add another index filter String class2 = "org.apache.nutch.indexer.metadata.MetadataIndexer"; // set content metadata Metadata md = new Metadata(); - md.add("example","data"); + md.add("example", "data"); // set content metadata property defined in MetadataIndexer - conf.set("index.content.md","example"); + conf.set("index.content.md", "example"); // add MetadataIndxer filter conf.set(IndexingFilters.INDEXINGFILTER_ORDER, class1 + " " + class2); IndexingFilters filters2 = new IndexingFilters(conf); - NutchDocument fdoc2 = filters2.filter(new NutchDocument(),new ParseImpl("text",new ParseData( - new ParseStatus(),"title",new Outlink[0],md)),new Text("http://www.example.com/"), - new CrawlDatum(),new Inlinks()); - Assert.assertEquals(fdoc1.getFieldNames().size(),fdoc2.getFieldNames().size()); + NutchDocument fdoc2 = filters2.filter(new NutchDocument(), new ParseImpl( + "text", new ParseData(new ParseStatus(), "title", new Outlink[0], md)), + new Text("http://www.example.com/"), new CrawlDatum(), new Inlinks()); + Assert.assertEquals(fdoc1.getFieldNames().size(), fdoc2.getFieldNames() + .size()); } } Modified: nutch/trunk/src/test/org/apache/nutch/metadata/TestMetadata.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/test/org/apache/nutch/metadata/TestMetadata.java?rev=1655526&r1=1655525&r2=1655526&view=diff ============================================================================== --- nutch/trunk/src/test/org/apache/nutch/metadata/TestMetadata.java (original) +++ nutch/trunk/src/test/org/apache/nutch/metadata/TestMetadata.java Thu Jan 29 05:38:59 2015 @@ -33,7 +33,6 @@ public class TestMetadata { private static final String CONTENTTYPE = "contenttype"; - /** * Test to ensure that only non-null values get written when the * {@link Metadata} object is written using a Writeable. @@ -280,4 +279,3 @@ public class TestMetadata { } } - Modified: nutch/trunk/src/test/org/apache/nutch/metadata/TestSpellCheckedMetadata.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/test/org/apache/nutch/metadata/TestSpellCheckedMetadata.java?rev=1655526&r1=1655525&r2=1655526&view=diff ============================================================================== --- nutch/trunk/src/test/org/apache/nutch/metadata/TestSpellCheckedMetadata.java (original) +++ nutch/trunk/src/test/org/apache/nutch/metadata/TestSpellCheckedMetadata.java Thu Jan 29 05:38:59 2015 @@ -29,7 +29,7 @@ import org.junit.Test; /** * JUnit based tests of class * {@link org.apache.nutch.metadata.SpellCheckedMetadata}. - * + * * @author Chris Mattmann * @author Jérôme Charron */ @@ -40,20 +40,20 @@ public class TestSpellCheckedMetadata { /** Test for the <code>getNormalizedName(String)</code> method. */ @Test public void testGetNormalizedName() { - Assert.assertEquals("Content-Type", SpellCheckedMetadata - .getNormalizedName("Content-Type")); - Assert.assertEquals("Content-Type", SpellCheckedMetadata - .getNormalizedName("ContentType")); - Assert.assertEquals("Content-Type", SpellCheckedMetadata - .getNormalizedName("Content-type")); - Assert.assertEquals("Content-Type", SpellCheckedMetadata - .getNormalizedName("contenttype")); - Assert.assertEquals("Content-Type", SpellCheckedMetadata - .getNormalizedName("contentype")); - Assert.assertEquals("Content-Type", SpellCheckedMetadata - .getNormalizedName("contntype")); + Assert.assertEquals("Content-Type", + SpellCheckedMetadata.getNormalizedName("Content-Type")); + Assert.assertEquals("Content-Type", + SpellCheckedMetadata.getNormalizedName("ContentType")); + Assert.assertEquals("Content-Type", + SpellCheckedMetadata.getNormalizedName("Content-type")); + Assert.assertEquals("Content-Type", + SpellCheckedMetadata.getNormalizedName("contenttype")); + Assert.assertEquals("Content-Type", + SpellCheckedMetadata.getNormalizedName("contentype")); + Assert.assertEquals("Content-Type", + SpellCheckedMetadata.getNormalizedName("contntype")); } - + /** Test for the <code>add(String, String)</code> method. */ @Test public void testAdd() { @@ -253,8 +253,8 @@ public class TestSpellCheckedMetadata { } /** - * IO Test method, usable only when you plan to do changes in metadata - * to measure relative performance impact. + * IO Test method, usable only when you plan to do changes in metadata to + * measure relative performance impact. */ @Test public final void testHandlingSpeed() { Modified: nutch/trunk/src/test/org/apache/nutch/net/TestURLFilters.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/test/org/apache/nutch/net/TestURLFilters.java?rev=1655526&r1=1655525&r2=1655526&view=diff ============================================================================== --- nutch/trunk/src/test/org/apache/nutch/net/TestURLFilters.java (original) +++ nutch/trunk/src/test/org/apache/nutch/net/TestURLFilters.java Thu Jan 29 05:38:59 2015 @@ -24,6 +24,7 @@ public class TestURLFilters { /** * Testcase for NUTCH-325. + * * @throws URLFilterException */ @Test Modified: nutch/trunk/src/test/org/apache/nutch/net/TestURLNormalizers.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/test/org/apache/nutch/net/TestURLNormalizers.java?rev=1655526&r1=1655525&r2=1655526&view=diff ============================================================================== --- nutch/trunk/src/test/org/apache/nutch/net/TestURLNormalizers.java (original) +++ nutch/trunk/src/test/org/apache/nutch/net/TestURLNormalizers.java Thu Jan 29 05:38:59 2015 @@ -31,41 +31,53 @@ public class TestURLNormalizers { String clazz1 = "org.apache.nutch.net.urlnormalizer.regex.RegexURLNormalizer"; String clazz2 = "org.apache.nutch.net.urlnormalizer.basic.BasicURLNormalizer"; conf.set("urlnormalizer.order", clazz1 + " " + clazz2); - - URLNormalizers normalizers = new URLNormalizers(conf, URLNormalizers.SCOPE_DEFAULT); - + + URLNormalizers normalizers = new URLNormalizers(conf, + URLNormalizers.SCOPE_DEFAULT); + Assert.assertNotNull(normalizers); try { - normalizers.normalize("http://www.example.com/", URLNormalizers.SCOPE_DEFAULT); + normalizers.normalize("http://www.example.com/", + URLNormalizers.SCOPE_DEFAULT); } catch (MalformedURLException mue) { Assert.fail(mue.toString()); } // NUTCH-1011 - Get rid of superfluous slashes try { - String normalizedSlashes = normalizers.normalize("http://www.example.com//path/to//somewhere.html", URLNormalizers.SCOPE_DEFAULT); - Assert.assertEquals(normalizedSlashes, "http://www.example.com/path/to/somewhere.html"); + String normalizedSlashes = normalizers.normalize( + "http://www.example.com//path/to//somewhere.html", + URLNormalizers.SCOPE_DEFAULT); + Assert.assertEquals(normalizedSlashes, + "http://www.example.com/path/to/somewhere.html"); } catch (MalformedURLException mue) { Assert.fail(mue.toString()); } - + // HostNormalizer NUTCH-1319 try { - String normalizedHost = normalizers.normalize("http://www.example.org//path/to//somewhere.html", URLNormalizers.SCOPE_DEFAULT); - Assert.assertEquals(normalizedHost, "http://example.org/path/to/somewhere.html"); + String normalizedHost = normalizers.normalize( + "http://www.example.org//path/to//somewhere.html", + URLNormalizers.SCOPE_DEFAULT); + Assert.assertEquals(normalizedHost, + "http://example.org/path/to/somewhere.html"); } catch (MalformedURLException mue) { Assert.fail(mue.toString()); } - + // check the order int pos1 = -1, pos2 = -1; - URLNormalizer[] impls = normalizers.getURLNormalizers(URLNormalizers.SCOPE_DEFAULT); + URLNormalizer[] impls = normalizers + .getURLNormalizers(URLNormalizers.SCOPE_DEFAULT); for (int i = 0; i < impls.length; i++) { - if (impls[i].getClass().getName().equals(clazz1)) pos1 = i; - if (impls[i].getClass().getName().equals(clazz2)) pos2 = i; + if (impls[i].getClass().getName().equals(clazz1)) + pos1 = i; + if (impls[i].getClass().getName().equals(clazz2)) + pos2 = i; } if (pos1 != -1 && pos2 != -1) { - Assert.assertTrue("RegexURLNormalizer before BasicURLNormalizer", pos1 < pos2); + Assert.assertTrue("RegexURLNormalizer before BasicURLNormalizer", + pos1 < pos2); } } } Modified: nutch/trunk/src/test/org/apache/nutch/parse/TestOutlinkExtractor.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/test/org/apache/nutch/parse/TestOutlinkExtractor.java?rev=1655526&r1=1655525&r2=1655526&view=diff ============================================================================== --- nutch/trunk/src/test/org/apache/nutch/parse/TestOutlinkExtractor.java (original) +++ nutch/trunk/src/test/org/apache/nutch/parse/TestOutlinkExtractor.java Thu Jan 29 05:38:59 2015 @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - + package org.apache.nutch.parse; import org.apache.nutch.parse.Outlink; @@ -34,54 +34,66 @@ import org.junit.Test; public class TestOutlinkExtractor { private static Configuration conf = NutchConfiguration.create(); - + @Test public void testGetNoOutlinks() { - Outlink[] outlinks = null; - + Outlink[] outlinks = null; + outlinks = OutlinkExtractor.getOutlinks(null, conf); Assert.assertNotNull(outlinks); Assert.assertEquals(0, outlinks.length); - + outlinks = OutlinkExtractor.getOutlinks("", conf); Assert.assertNotNull(outlinks); Assert.assertEquals(0, outlinks.length); } - + @Test public void testGetOutlinksHttp() { - Outlink[] outlinks = OutlinkExtractor.getOutlinks( - "Test with http://www.nutch.org/index.html is it found? " + - "What about www.google.com at http://www.google.de " + - "A longer URL could be http://www.sybit.com/solutions/portals.html", conf); - + Outlink[] outlinks = OutlinkExtractor + .getOutlinks( + "Test with http://www.nutch.org/index.html is it found? " + + "What about www.google.com at http://www.google.de " + + "A longer URL could be http://www.sybit.com/solutions/portals.html", + conf); + Assert.assertTrue("Url not found!", outlinks.length == 3); - Assert.assertEquals("Wrong URL", "http://www.nutch.org/index.html", outlinks[0].getToUrl()); - Assert.assertEquals("Wrong URL", "http://www.google.de", outlinks[1].getToUrl()); - Assert.assertEquals("Wrong URL", "http://www.sybit.com/solutions/portals.html", outlinks[2].getToUrl()); + Assert.assertEquals("Wrong URL", "http://www.nutch.org/index.html", + outlinks[0].getToUrl()); + Assert.assertEquals("Wrong URL", "http://www.google.de", + outlinks[1].getToUrl()); + Assert.assertEquals("Wrong URL", + "http://www.sybit.com/solutions/portals.html", outlinks[2].getToUrl()); } - + @Test public void testGetOutlinksHttp2() { - Outlink[] outlinks = OutlinkExtractor.getOutlinks( - "Test with http://www.nutch.org/index.html is it found? " + - "What about www.google.com at http://www.google.de " + - "A longer URL could be http://www.sybit.com/solutions/portals.html", "http://www.sybit.de", conf); - + Outlink[] outlinks = OutlinkExtractor + .getOutlinks( + "Test with http://www.nutch.org/index.html is it found? " + + "What about www.google.com at http://www.google.de " + + "A longer URL could be http://www.sybit.com/solutions/portals.html", + "http://www.sybit.de", conf); + Assert.assertTrue("Url not found!", outlinks.length == 3); - Assert.assertEquals("Wrong URL", "http://www.nutch.org/index.html", outlinks[0].getToUrl()); - Assert.assertEquals("Wrong URL", "http://www.google.de", outlinks[1].getToUrl()); - Assert.assertEquals("Wrong URL", "http://www.sybit.com/solutions/portals.html", outlinks[2].getToUrl()); + Assert.assertEquals("Wrong URL", "http://www.nutch.org/index.html", + outlinks[0].getToUrl()); + Assert.assertEquals("Wrong URL", "http://www.google.de", + outlinks[1].getToUrl()); + Assert.assertEquals("Wrong URL", + "http://www.sybit.com/solutions/portals.html", outlinks[2].getToUrl()); } - + @Test public void testGetOutlinksFtp() { Outlink[] outlinks = OutlinkExtractor.getOutlinks( - "Test with ftp://www.nutch.org is it found? " + - "What about www.google.com at ftp://www.google.de", conf); - - Assert.assertTrue("Url not found!", outlinks.length >1); - Assert.assertEquals("Wrong URL", "ftp://www.nutch.org", outlinks[0].getToUrl()); - Assert.assertEquals("Wrong URL", "ftp://www.google.de", outlinks[1].getToUrl()); + "Test with ftp://www.nutch.org is it found? " + + "What about www.google.com at ftp://www.google.de", conf); + + Assert.assertTrue("Url not found!", outlinks.length > 1); + Assert.assertEquals("Wrong URL", "ftp://www.nutch.org", + outlinks[0].getToUrl()); + Assert.assertEquals("Wrong URL", "ftp://www.google.de", + outlinks[1].getToUrl()); } } Modified: nutch/trunk/src/test/org/apache/nutch/parse/TestParseData.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/test/org/apache/nutch/parse/TestParseData.java?rev=1655526&r1=1655525&r2=1655526&view=diff ============================================================================== --- nutch/trunk/src/test/org/apache/nutch/parse/TestParseData.java (original) +++ nutch/trunk/src/test/org/apache/nutch/parse/TestParseData.java Thu Jan 29 05:38:59 2015 @@ -31,30 +31,27 @@ public class TestParseData { String title = "The Foo Page"; - Outlink[] outlinks = new Outlink[] { - new Outlink("http://foo.com/", "Foo"), - new Outlink("http://bar.com/", "Bar") - }; + Outlink[] outlinks = new Outlink[] { new Outlink("http://foo.com/", "Foo"), + new Outlink("http://bar.com/", "Bar") }; Metadata metaData = new Metadata(); metaData.add("Language", "en/us"); metaData.add("Charset", "UTF-8"); - ParseData r = new ParseData(ParseStatus.STATUS_SUCCESS, title, outlinks, metaData); - + ParseData r = new ParseData(ParseStatus.STATUS_SUCCESS, title, outlinks, + metaData); + WritableTestUtils.testWritable(r, null); } @Test public void testMaxOutlinks() throws Exception { Outlink[] outlinks = new Outlink[128]; - for (int i=0; i<outlinks.length; i++) { + for (int i = 0; i < outlinks.length; i++) { outlinks[i] = new Outlink("http://outlink.com/" + i, "Outlink" + i); } ParseData original = new ParseData(ParseStatus.STATUS_SUCCESS, - "Max Outlinks Title", - outlinks, - new Metadata()); + "Max Outlinks Title", outlinks, new Metadata()); ParseData data = (ParseData) WritableTestUtils.writeRead(original, null); Assert.assertEquals(outlinks.length, data.getOutlinks().length); } Modified: nutch/trunk/src/test/org/apache/nutch/parse/TestParseText.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/test/org/apache/nutch/parse/TestParseText.java?rev=1655526&r1=1655525&r2=1655526&view=diff ============================================================================== --- nutch/trunk/src/test/org/apache/nutch/parse/TestParseText.java (original) +++ nutch/trunk/src/test/org/apache/nutch/parse/TestParseText.java Thu Jan 29 05:38:59 2015 @@ -22,7 +22,7 @@ import org.junit.Test; /** Unit tests for ParseText. */ -public class TestParseText { +public class TestParseText { @Test public void testParseText() throws Exception { Modified: nutch/trunk/src/test/org/apache/nutch/parse/TestParserFactory.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/test/org/apache/nutch/parse/TestParserFactory.java?rev=1655526&r1=1655525&r2=1655526&view=diff ============================================================================== --- nutch/trunk/src/test/org/apache/nutch/parse/TestParserFactory.java (original) +++ nutch/trunk/src/test/org/apache/nutch/parse/TestParserFactory.java Thu Jan 29 05:38:59 2015 @@ -27,25 +27,25 @@ import org.junit.Test; /** * Unit test for new parse plugin selection. - * + * * @author Sebastien Le Callonnec * @version 1.0 */ public class TestParserFactory { - + private Configuration conf; private ParserFactory parserFactory; /** Inits the Test Case with the test parse-plugin file */ @Before public void setUp() throws Exception { - conf = NutchConfiguration.create(); - conf.set("plugin.includes", ".*"); - conf.set("parse.plugin.file", - "org/apache/nutch/parse/parse-plugin-test.xml"); - parserFactory = new ParserFactory(conf); + conf = NutchConfiguration.create(); + conf.set("plugin.includes", ".*"); + conf.set("parse.plugin.file", + "org/apache/nutch/parse/parse-plugin-test.xml"); + parserFactory = new ParserFactory(conf); } - + /** Unit test for <code>getExtensions(String)</code> method. */ @Test public void testGetExtensions() throws Exception { @@ -56,48 +56,50 @@ public class TestParserFactory { ext = parserFactory.getExtensions("foo/bar").get(0); Assert.assertEquals("parse-tika", ext.getDescriptor().getPluginId()); } - + /** Unit test to check <code>getParsers</code> method */ @Test public void testGetParsers() throws Exception { - Parser [] parsers = parserFactory.getParsers("text/html", "http://foo.com"); + Parser[] parsers = parserFactory.getParsers("text/html", "http://foo.com"); Assert.assertNotNull(parsers); Assert.assertEquals(1, parsers.length); - Assert.assertEquals("org.apache.nutch.parse.tika.TikaParser", - parsers[0].getClass().getName()); + Assert.assertEquals("org.apache.nutch.parse.tika.TikaParser", parsers[0] + .getClass().getName()); parsers = parserFactory.getParsers("text/html; charset=ISO-8859-1", - "http://foo.com"); + "http://foo.com"); Assert.assertNotNull(parsers); Assert.assertEquals(1, parsers.length); - Assert.assertEquals("org.apache.nutch.parse.tika.TikaParser", - parsers[0].getClass().getName()); - + Assert.assertEquals("org.apache.nutch.parse.tika.TikaParser", parsers[0] + .getClass().getName()); + parsers = parserFactory.getParsers("application/x-javascript", - "http://foo.com"); + "http://foo.com"); Assert.assertNotNull(parsers); Assert.assertEquals(1, parsers.length); - Assert.assertEquals("org.apache.nutch.parse.js.JSParseFilter", - parsers[0].getClass().getName()); - + Assert.assertEquals("org.apache.nutch.parse.js.JSParseFilter", parsers[0] + .getClass().getName()); + parsers = parserFactory.getParsers("text/plain", "http://foo.com"); Assert.assertNotNull(parsers); Assert.assertEquals(1, parsers.length); - Assert.assertEquals("org.apache.nutch.parse.tika.TikaParser", - parsers[0].getClass().getName()); - + Assert.assertEquals("org.apache.nutch.parse.tika.TikaParser", parsers[0] + .getClass().getName()); + Parser parser1 = parserFactory.getParsers("text/plain", "http://foo.com")[0]; Parser parser2 = parserFactory.getParsers("*", "http://foo.com")[0]; - - Assert.assertEquals("Different instances!", parser1.hashCode(), parser2.hashCode()); - - //test and make sure that the rss parser is loaded even though its plugin.xml - //doesn't claim to support text/rss, only application/rss+xml - parsers = parserFactory.getParsers("text/rss","http://foo.com"); + + Assert.assertEquals("Different instances!", parser1.hashCode(), + parser2.hashCode()); + + // test and make sure that the rss parser is loaded even though its + // plugin.xml + // doesn't claim to support text/rss, only application/rss+xml + parsers = parserFactory.getParsers("text/rss", "http://foo.com"); Assert.assertNotNull(parsers); - Assert.assertEquals(1,parsers.length); - Assert.assertEquals("org.apache.nutch.parse.tika.TikaParser", - parsers[0].getClass().getName()); + Assert.assertEquals(1, parsers.length); + Assert.assertEquals("org.apache.nutch.parse.tika.TikaParser", parsers[0] + .getClass().getName()); } - + } Modified: nutch/trunk/src/test/org/apache/nutch/plugin/HelloWorldExtension.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/test/org/apache/nutch/plugin/HelloWorldExtension.java?rev=1655526&r1=1655525&r2=1655526&view=diff ============================================================================== --- nutch/trunk/src/test/org/apache/nutch/plugin/HelloWorldExtension.java (original) +++ nutch/trunk/src/test/org/apache/nutch/plugin/HelloWorldExtension.java Thu Jan 29 05:38:59 2015 @@ -24,8 +24,11 @@ package org.apache.nutch.plugin; */ public class HelloWorldExtension implements ITestExtension { - /* (non-Javadoc) - * @see org.apache.nutch.plugin.ITestExtension#testGetExtension(java.lang.String) + /* + * (non-Javadoc) + * + * @see + * org.apache.nutch.plugin.ITestExtension#testGetExtension(java.lang.String) */ public String testGetExtension(String hello) { return hello + " World"; Modified: nutch/trunk/src/test/org/apache/nutch/plugin/ITestExtension.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/test/org/apache/nutch/plugin/ITestExtension.java?rev=1655526&r1=1655525&r2=1655526&view=diff ============================================================================== --- nutch/trunk/src/test/org/apache/nutch/plugin/ITestExtension.java (original) +++ nutch/trunk/src/test/org/apache/nutch/plugin/ITestExtension.java Thu Jan 29 05:38:59 2015 @@ -15,11 +15,12 @@ * limitations under the License. */ package org.apache.nutch.plugin; + /** * A Simple Test Extension Interface. * * @author joa23 - * + * */ public interface ITestExtension { public String testGetExtension(String hello); Modified: nutch/trunk/src/test/org/apache/nutch/plugin/SimpleTestPlugin.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/test/org/apache/nutch/plugin/SimpleTestPlugin.java?rev=1655526&r1=1655525&r2=1655526&view=diff ============================================================================== --- nutch/trunk/src/test/org/apache/nutch/plugin/SimpleTestPlugin.java (original) +++ nutch/trunk/src/test/org/apache/nutch/plugin/SimpleTestPlugin.java Thu Jan 29 05:38:59 2015 @@ -28,8 +28,8 @@ import org.apache.hadoop.conf.Configurat public class SimpleTestPlugin extends Plugin { /** - * @param pDescriptor - * @param conf + * @param pDescriptor + * @param conf */ public SimpleTestPlugin(PluginDescriptor pDescriptor, Configuration conf) { @@ -55,4 +55,3 @@ public class SimpleTestPlugin extends Pl } } - Modified: nutch/trunk/src/test/org/apache/nutch/plugin/TestPluginSystem.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/test/org/apache/nutch/plugin/TestPluginSystem.java?rev=1655526&r1=1655525&r2=1655526&view=diff ============================================================================== --- nutch/trunk/src/test/org/apache/nutch/plugin/TestPluginSystem.java (original) +++ nutch/trunk/src/test/org/apache/nutch/plugin/TestPluginSystem.java Thu Jan 29 05:38:59 2015 @@ -42,15 +42,15 @@ public class TestPluginSystem { private int fPluginCount; private LinkedList<File> fFolders = new LinkedList<File>(); - private Configuration conf ; + private Configuration conf; private PluginRepository repository; @Before public void setUp() throws Exception { this.conf = NutchConfiguration.create(); conf.set("plugin.includes", ".*"); - // String string = this.conf.get("plugin.includes", ""); - // conf.set("plugin.includes", string + "|Dummy*"); + // String string = this.conf.get("plugin.includes", ""); + // conf.set("plugin.includes", string + "|Dummy*"); fPluginCount = 5; createDummyPlugins(fPluginCount); this.repository = PluginRepository.get(conf); @@ -86,8 +86,7 @@ public class TestPluginSystem { */ @Test public void testLoadPlugins() { - PluginDescriptor[] descriptors = repository - .getPluginDescriptors(); + PluginDescriptor[] descriptors = repository.getPluginDescriptors(); int k = descriptors.length; Assert.assertTrue(fPluginCount <= k); for (int i = 0; i < descriptors.length; i++) { @@ -123,11 +122,10 @@ public class TestPluginSystem { @Test public void testGetExtensionAndAttributes() { String xpId = " sdsdsd"; - ExtensionPoint extensionPoint =repository - .getExtensionPoint(xpId); + ExtensionPoint extensionPoint = repository.getExtensionPoint(xpId); Assert.assertEquals(extensionPoint, null); - Extension[] extension1 = repository - .getExtensionPoint(getGetExtensionId()).getExtensions(); + Extension[] extension1 = repository.getExtensionPoint(getGetExtensionId()) + .getExtensions(); Assert.assertEquals(extension1.length, fPluginCount); for (int i = 0; i < extension1.length; i++) { Extension extension2 = extension1[i]; @@ -141,8 +139,8 @@ public class TestPluginSystem { */ @Test public void testGetExtensionInstances() throws PluginRuntimeException { - Extension[] extensions = repository - .getExtensionPoint(getGetExtensionId()).getExtensions(); + Extension[] extensions = repository.getExtensionPoint(getGetExtensionId()) + .getExtensions(); Assert.assertEquals(extensions.length, fPluginCount); for (int i = 0; i < extensions.length; i++) { Extension extension = extensions[i]; @@ -161,8 +159,7 @@ public class TestPluginSystem { */ @Test public void testGetClassLoader() { - PluginDescriptor[] descriptors = repository - .getPluginDescriptors(); + PluginDescriptor[] descriptors = repository.getPluginDescriptors(); for (int i = 0; i < descriptors.length; i++) { PluginDescriptor descriptor = descriptors[i]; Assert.assertNotNull(descriptor.getClassLoader()); @@ -174,8 +171,7 @@ public class TestPluginSystem { */ @Test public void testGetResources() throws IOException { - PluginDescriptor[] descriptors = repository - .getPluginDescriptors(); + PluginDescriptor[] descriptors = repository.getPluginDescriptors(); for (int i = 0; i < descriptors.length; i++) { PluginDescriptor descriptor = descriptors[i]; if (!descriptor.getPluginId().startsWith("getPluginFolder()")) { @@ -183,8 +179,7 @@ public class TestPluginSystem { } String value = descriptor.getResourceString("key", Locale.UK); Assert.assertEquals("value", value); - value = descriptor.getResourceString("key", - Locale.TRADITIONAL_CHINESE); + value = descriptor.getResourceString("key", Locale.TRADITIONAL_CHINESE); Assert.assertEquals("value", value); } @@ -199,7 +194,8 @@ public class TestPluginSystem { Assert.fail("no plugin directory setuped.."); String name = strings[0]; - return new PluginManifestParser(conf, this.repository).getPluginFolder(name).toString(); + return new PluginManifestParser(conf, this.repository) + .getPluginFolder(name).toString(); } /** @@ -213,8 +209,7 @@ public class TestPluginSystem { File folder = new File(string); folder.mkdir(); for (int i = 0; i < pCount; i++) { - String pluginFolder = string + File.separator + "DummyPlugin" - + i; + String pluginFolder = string + File.separator + "DummyPlugin" + i; File file = new File(pluginFolder); file.mkdir(); fFolders.add(file); @@ -265,9 +260,8 @@ public class TestPluginSystem { */ private void createPluginManifest(int i, String pFolderPath) throws IOException { - FileWriter out = new FileWriter(pFolderPath + File.separator - + "plugin.xml"); - String xml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" + FileWriter out = new FileWriter(pFolderPath + File.separator + "plugin.xml"); + String xml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" + "<!--this is just a simple plugin for testing issues.-->" + "<plugin id=\"org.apache.nutch.plugin." + i Modified: nutch/trunk/src/test/org/apache/nutch/protocol/TestContent.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/test/org/apache/nutch/protocol/TestContent.java?rev=1655526&r1=1655525&r2=1655526&view=diff ============================================================================== --- nutch/trunk/src/test/org/apache/nutch/protocol/TestContent.java (original) +++ nutch/trunk/src/test/org/apache/nutch/protocol/TestContent.java Thu Jan 29 05:38:59 2015 @@ -58,52 +58,36 @@ public class TestContent { Content c = null; Metadata p = new Metadata(); - c = new Content("http://www.foo.com/", - "http://www.foo.com/", - "".getBytes("UTF8"), - "text/html; charset=UTF-8", p, conf); + c = new Content("http://www.foo.com/", "http://www.foo.com/", + "".getBytes("UTF8"), "text/html; charset=UTF-8", p, conf); Assert.assertEquals("text/html", c.getContentType()); - c = new Content("http://www.foo.com/foo.html", - "http://www.foo.com/", - "".getBytes("UTF8"), - "", p, conf); + c = new Content("http://www.foo.com/foo.html", "http://www.foo.com/", + "".getBytes("UTF8"), "", p, conf); Assert.assertEquals("text/html", c.getContentType()); - c = new Content("http://www.foo.com/foo.html", - "http://www.foo.com/", - "".getBytes("UTF8"), - null, p, conf); + c = new Content("http://www.foo.com/foo.html", "http://www.foo.com/", + "".getBytes("UTF8"), null, p, conf); Assert.assertEquals("text/html", c.getContentType()); - c = new Content("http://www.foo.com/", - "http://www.foo.com/", - "<html></html>".getBytes("UTF8"), - "", p, conf); + c = new Content("http://www.foo.com/", "http://www.foo.com/", + "<html></html>".getBytes("UTF8"), "", p, conf); Assert.assertEquals("text/html", c.getContentType()); - c = new Content("http://www.foo.com/foo.html", - "http://www.foo.com/", - "<html></html>".getBytes("UTF8"), - "text/plain", p, conf); + c = new Content("http://www.foo.com/foo.html", "http://www.foo.com/", + "<html></html>".getBytes("UTF8"), "text/plain", p, conf); Assert.assertEquals("text/html", c.getContentType()); - c = new Content("http://www.foo.com/foo.png", - "http://www.foo.com/", - "<html></html>".getBytes("UTF8"), - "text/plain", p, conf); + c = new Content("http://www.foo.com/foo.png", "http://www.foo.com/", + "<html></html>".getBytes("UTF8"), "text/plain", p, conf); Assert.assertEquals("text/html", c.getContentType()); - c = new Content("http://www.foo.com/", - "http://www.foo.com/", - "".getBytes("UTF8"), - "", p, conf); + c = new Content("http://www.foo.com/", "http://www.foo.com/", + "".getBytes("UTF8"), "", p, conf); Assert.assertEquals(MimeTypes.OCTET_STREAM, c.getContentType()); - c = new Content("http://www.foo.com/", - "http://www.foo.com/", - "".getBytes("UTF8"), - null, p, conf); + c = new Content("http://www.foo.com/", "http://www.foo.com/", + "".getBytes("UTF8"), null, p, conf); Assert.assertNotNull(c.getContentType()); } Modified: nutch/trunk/src/test/org/apache/nutch/protocol/TestProtocolFactory.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/test/org/apache/nutch/protocol/TestProtocolFactory.java?rev=1655526&r1=1655525&r2=1655526&view=diff ============================================================================== --- nutch/trunk/src/test/org/apache/nutch/protocol/TestProtocolFactory.java (original) +++ nutch/trunk/src/test/org/apache/nutch/protocol/TestProtocolFactory.java Thu Jan 29 05:38:59 2015 @@ -27,58 +27,59 @@ public class TestProtocolFactory { Configuration conf; ProtocolFactory factory; - + @Before public void setUp() throws Exception { conf = NutchConfiguration.create(); conf.set("plugin.includes", ".*"); conf.set("http.agent.name", "test-bot"); - factory=new ProtocolFactory(conf); + factory = new ProtocolFactory(conf); } @Test - public void testGetProtocol(){ + public void testGetProtocol() { - //non existing protocol + // non existing protocol try { factory.getProtocol("xyzxyz://somehost"); Assert.fail("Must throw ProtocolNotFound"); } catch (ProtocolNotFound e) { - //all is ok - } catch (Exception ex){ + // all is ok + } catch (Exception ex) { Assert.fail("Must not throw any other exception"); } - - Protocol httpProtocol=null; - - //existing protocol + + Protocol httpProtocol = null; + + // existing protocol try { - httpProtocol=factory.getProtocol("http://somehost"); + httpProtocol = factory.getProtocol("http://somehost"); Assert.assertNotNull(httpProtocol); - } catch (Exception ex){ + } catch (Exception ex) { Assert.fail("Must not throw any other exception"); } - //cache key - Object protocol = ObjectCache.get(conf).getObject(Protocol.X_POINT_ID + "http"); + // cache key + Object protocol = ObjectCache.get(conf).getObject( + Protocol.X_POINT_ID + "http"); Assert.assertNotNull(protocol); Assert.assertEquals(httpProtocol, protocol); - - //test same object instance + + // test same object instance try { - Assert.assertTrue(httpProtocol==factory.getProtocol("http://somehost")); + Assert.assertTrue(httpProtocol == factory.getProtocol("http://somehost")); } catch (ProtocolNotFound e) { Assert.fail("Must not throw any exception"); } } - + @Test - public void testContains(){ + public void testContains() { Assert.assertTrue(factory.contains("http", "http")); Assert.assertTrue(factory.contains("http", "http,ftp")); Assert.assertTrue(factory.contains("http", " http , ftp")); Assert.assertTrue(factory.contains("smb", "ftp,smb,http")); Assert.assertFalse(factory.contains("smb", "smbb")); } - + } Modified: nutch/trunk/src/test/org/apache/nutch/segment/TestSegmentMerger.java URL: http://svn.apache.org/viewvc/nutch/trunk/src/test/org/apache/nutch/segment/TestSegmentMerger.java?rev=1655526&r1=1655525&r2=1655526&view=diff ============================================================================== --- nutch/trunk/src/test/org/apache/nutch/segment/TestSegmentMerger.java (original) +++ nutch/trunk/src/test/org/apache/nutch/segment/TestSegmentMerger.java Thu Jan 29 05:38:59 2015 @@ -40,12 +40,13 @@ public class TestSegmentMerger { Path seg2; Path out; int countSeg1, countSeg2; - + @Before public void setUp() throws Exception { conf = NutchConfiguration.create(); fs = FileSystem.get(conf); - testDir = new Path(conf.get("hadoop.tmp.dir"), "merge-" + System.currentTimeMillis()); + testDir = new Path(conf.get("hadoop.tmp.dir"), "merge-" + + System.currentTimeMillis()); seg1 = new Path(testDir, "seg1"); seg2 = new Path(testDir, "seg2"); out = new Path(testDir, "out"); @@ -55,12 +56,13 @@ public class TestSegmentMerger { DecimalFormat df = new DecimalFormat("0000000"); Text k = new Text(); Path ptPath = new Path(new Path(seg1, ParseText.DIR_NAME), "part-00000"); - MapFile.Writer w = new MapFile.Writer(conf, fs, ptPath.toString(), Text.class, ParseText.class); + MapFile.Writer w = new MapFile.Writer(conf, fs, ptPath.toString(), + Text.class, ParseText.class); long curSize = 0; countSeg1 = 0; FileStatus fileStatus = fs.getFileStatus(ptPath); long blkSize = fileStatus.getBlockSize(); - + while (curSize < blkSize * 2) { k.set("seg1-" + df.format(countSeg1)); w.append(k, new ParseText("seg1 text " + countSeg1)); @@ -71,7 +73,8 @@ public class TestSegmentMerger { System.err.println(" - done: " + countSeg1 + " records."); System.err.println("Creating large segment 2..."); ptPath = new Path(new Path(seg2, ParseText.DIR_NAME), "part-00000"); - w = new MapFile.Writer(conf, fs, ptPath.toString(), Text.class, ParseText.class); + w = new MapFile.Writer(conf, fs, ptPath.toString(), Text.class, + ParseText.class); curSize = 0; countSeg2 = 0; while (curSize < blkSize * 2) { @@ -83,16 +86,16 @@ public class TestSegmentMerger { w.close(); System.err.println(" - done: " + countSeg2 + " records."); } - + @After public void tearDown() throws Exception { fs.delete(testDir, true); } - + @Test public void testLargeMerge() throws Exception { SegmentMerger merger = new SegmentMerger(conf); - merger.merge(out, new Path[]{seg1, seg2}, false, false, -1); + merger.merge(out, new Path[] { seg1, seg2 }, false, false, -1); // verify output FileStatus[] stats = fs.listStatus(out); // there should be just one path @@ -100,7 +103,8 @@ public class TestSegmentMerger { Path outSeg = stats[0].getPath(); Text k = new Text(); ParseText v = new ParseText(); - MapFile.Reader[] readers = MapFileOutputFormat.getReaders(fs, new Path(outSeg, ParseText.DIR_NAME), conf); + MapFile.Reader[] readers = MapFileOutputFormat.getReaders(fs, new Path( + outSeg, ParseText.DIR_NAME), conf); int cnt1 = 0, cnt2 = 0; for (MapFile.Reader r : readers) { while (r.next(k, v)) {
