Author: lewismc
Date: Fri Jan 18 20:38:55 2013
New Revision: 1435334
URL: http://svn.apache.org/viewvc?rev=1435334&view=rev
Log:
NUTCH-1453 Substantiate tests for IndexingFilters
Modified:
nutch/branches/2.x/CHANGES.txt
nutch/branches/2.x/src/test/org/apache/nutch/indexer/TestIndexingFilters.java
Modified: nutch/branches/2.x/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1435334&r1=1435333&r2=1435334&view=diff
==============================================================================
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Fri Jan 18 20:38:55 2013
@@ -2,9 +2,11 @@ Nutch Change Log
Release 2.2 - Current Development
+* NUTCH-1453 Substantiate tests for IndexingFilters (lufeng via lewismc)
+
* NUTCH-1274 Fix [cast] javac warnings (Tejas Patil via lewismc)
-* NUTCH-1516 Nutch 2.x pom.xml out of sync with ivy.xml
+* NUTCH-1516 Nutch 2.x pom.xml out of sync with ivy.xml (lewismc)
* NUTCH-1510 Upgrade to Hadoop 1.1.1 (markus)
Modified:
nutch/branches/2.x/src/test/org/apache/nutch/indexer/TestIndexingFilters.java
URL:
http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/indexer/TestIndexingFilters.java?rev=1435334&r1=1435333&r2=1435334&view=diff
==============================================================================
---
nutch/branches/2.x/src/test/org/apache/nutch/indexer/TestIndexingFilters.java
(original)
+++
nutch/branches/2.x/src/test/org/apache/nutch/indexer/TestIndexingFilters.java
Fri Jan 18 20:38:55 2013
@@ -20,6 +20,7 @@ import junit.framework.TestCase;
import org.apache.avro.util.Utf8;
import org.apache.hadoop.conf.Configuration;
+import org.apache.nutch.metadata.Metadata;
import org.apache.nutch.storage.WebPage;
import org.apache.nutch.util.NutchConfiguration;
@@ -31,18 +32,70 @@ public class TestIndexingFilters extends
*/
public void testNonExistingIndexingFilter() throws IndexingException {
Configuration conf = NutchConfiguration.create();
+ conf.addResource("nutch-default.xml");
+ conf.addResource("crawl-tests.xml");
+
String class1 = "NonExistingFilter";
String class2 = "org.apache.nutch.indexer.basic.BasicIndexingFilter";
conf.set(IndexingFilters.INDEXINGFILTER_ORDER, class1 + " " + class2);
IndexingFilters filters = new IndexingFilters(conf);
-// filters.filter(new NutchDocument(), new ParseImpl("text", new ParseData(
-// new ParseStatus(), "title", new Outlink[0], new Metadata())), new
Text(
-// "http://www.example.com/"), new CrawlDatum(), new Inlinks());
WebPage page = new WebPage();
page.setText(new Utf8("text"));
page.setTitle(new Utf8("title"));
filters.filter(new NutchDocument(),"http://www.example.com/",page);
}
+ /**
+ * Test behaviour when NutchDOcument is null
+ * @throws IndexingException
+ */
+ public void testNutchDocumentNullIndexingFilter() throws IndexingException{
+ Configuration conf = NutchConfiguration.create();
+ conf.addResource("nutch-default.xml");
+ conf.addResource("crawl-tests.xml");
+
+ IndexingFilters filters = new IndexingFilters(conf);
+ WebPage page = new WebPage();
+ page.setText(new Utf8("text"));
+ page.setTitle(new Utf8("title"));
+ NutchDocument doc = filters.filter(null,"http://www.example.com/",page);
+
+ assertNull(doc);
+ }
+
+ /**
+ * Test behaviour when reset the index filter order will not take effect
+ *
+ * @throws IndexingException
+ */
+ public void testFilterCacheIndexingFilter() throws IndexingException{
+ Configuration conf = NutchConfiguration.create();
+ conf.addResource("nutch-default.xml");
+ conf.addResource("crawl-tests.xml");
+
+ String class1 = "org.apache.nutch.indexer.basic.BasicIndexingFilter";
+ conf.set(IndexingFilters.INDEXINGFILTER_ORDER, class1);
+
+ IndexingFilters filters1 = new IndexingFilters(conf);
+ WebPage page = new WebPage();
+ page.setText(new Utf8("text"));
+ page.setTitle(new Utf8("title"));
+ NutchDocument fdoc1 = filters1.filter(new
NutchDocument(),"http://www.example.com/",page);
+
+ // add another index filter
+ String class2 = "org.apache.nutch.indexer.metadata.MetadataIndexer";
+ // set content metadata
+ Metadata md = new Metadata();
+ md.add("example","data");
+ // set content metadata property defined in MetadataIndexer
+ conf.set("index.content.md","example");
+ // add MetadataIndxer filter
+ conf.set(IndexingFilters.INDEXINGFILTER_ORDER, class1 + " " + class2);
+ IndexingFilters filters2 = new IndexingFilters(conf);
+ NutchDocument fdoc2 = filters2.filter(new
NutchDocument(),"http://www.example.com/",page);
+
assertEquals(fdoc1.getFieldNames().size(),fdoc2.getFieldNames().size());
+ }
+
+
}