Modified: lucene/nutch/branches/mapred/src/plugin/index-basic/plugin.xml URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/index-basic/plugin.xml?rev=326238&r1=326237&r2=326238&view=diff ============================================================================== --- lucene/nutch/branches/mapred/src/plugin/index-basic/plugin.xml (original) +++ lucene/nutch/branches/mapred/src/plugin/index-basic/plugin.xml Tue Oct 18 13:59:40 2005 @@ -12,6 +12,10 @@ </library> </runtime> + <requires> + <import plugin="nutch-extensionpoints"/> + </requires> + <extension id="org.apache.nutch.indexer.basic" name="Nutch Basic Indexing Filter" point="org.apache.nutch.indexer.IndexingFilter">
Modified: lucene/nutch/branches/mapred/src/plugin/index-more/plugin.xml URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/index-more/plugin.xml?rev=326238&r1=326237&r2=326238&view=diff ============================================================================== --- lucene/nutch/branches/mapred/src/plugin/index-more/plugin.xml (original) +++ lucene/nutch/branches/mapred/src/plugin/index-more/plugin.xml Tue Oct 18 13:59:40 2005 @@ -12,6 +12,10 @@ </library> </runtime> + <requires> + <import plugin="nutch-extensionpoints"/> + </requires> + <extension id="org.apache.nutch.indexer.more" name="Nutch More Indexing Filter" point="org.apache.nutch.indexer.IndexingFilter"> Modified: lucene/nutch/branches/mapred/src/plugin/languageidentifier/plugin.xml URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/languageidentifier/plugin.xml?rev=326238&r1=326237&r2=326238&view=diff ============================================================================== --- lucene/nutch/branches/mapred/src/plugin/languageidentifier/plugin.xml (original) +++ lucene/nutch/branches/mapred/src/plugin/languageidentifier/plugin.xml Tue Oct 18 13:59:40 2005 @@ -5,13 +5,15 @@ version="1.0.0" provider-name="nutch.org"> - - <runtime> <library name="language-identifier.jar"> <export name="*"/> </library> </runtime> + + <requires> + <import plugin="nutch-extensionpoints"/> + </requires> <extension id="org.apache.nutch.analysis.lang.LanguageParser" name="Nutch language Parser" Modified: lucene/nutch/branches/mapred/src/plugin/ontology/plugin.xml URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/ontology/plugin.xml?rev=326238&r1=326237&r2=326238&view=diff ============================================================================== --- lucene/nutch/branches/mapred/src/plugin/ontology/plugin.xml (original) +++ lucene/nutch/branches/mapred/src/plugin/ontology/plugin.xml Tue Oct 18 13:59:40 2005 @@ -18,6 +18,10 @@ </runtime> + <requires> + <import plugin="nutch-extensionpoints"/> + </requires> + <!-- attribute "point" is the plugin interface class --> <!-- seems kinda redundant to have to define the point here too --> <extension id="org.apache.nutch.ontology.OntologyImpl" Modified: lucene/nutch/branches/mapred/src/plugin/parse-ext/plugin.xml URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/parse-ext/plugin.xml?rev=326238&r1=326237&r2=326238&view=diff ============================================================================== --- lucene/nutch/branches/mapred/src/plugin/parse-ext/plugin.xml (original) +++ lucene/nutch/branches/mapred/src/plugin/parse-ext/plugin.xml Tue Oct 18 13:59:40 2005 @@ -5,13 +5,15 @@ version="1.0.0" provider-name="nutch.org"> - - <runtime> <library name="parse-ext.jar"> <export name="*"/> </library> </runtime> + + <requires> + <import plugin="nutch-extensionpoints"/> + </requires> <extension id="org.apache.nutch.parse.ext" name="ExtParse" Modified: lucene/nutch/branches/mapred/src/plugin/parse-ext/src/test/org/apache/nutch/parse/ext/TestExtParser.java URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/parse-ext/src/test/org/apache/nutch/parse/ext/TestExtParser.java?rev=326238&r1=326237&r2=326238&view=diff ============================================================================== --- lucene/nutch/branches/mapred/src/plugin/parse-ext/src/test/org/apache/nutch/parse/ext/TestExtParser.java (original) +++ lucene/nutch/branches/mapred/src/plugin/parse-ext/src/test/org/apache/nutch/parse/ext/TestExtParser.java Tue Oct 18 13:59:40 2005 @@ -21,9 +21,8 @@ import org.apache.nutch.protocol.Content; import org.apache.nutch.protocol.ProtocolException; -import org.apache.nutch.parse.ParserFactory; -import org.apache.nutch.parse.Parser; import org.apache.nutch.parse.Parse; +import org.apache.nutch.parse.ParseUtil; import org.apache.nutch.parse.ParseException; import junit.framework.TestCase; @@ -46,8 +45,7 @@ public class TestExtParser extends TestCase { private File tempFile = null; private String urlString = null; - private Content content = null;; - private Parser parser = null;; + private Content content = null; private Parse parse = null; private String expectedText = "nutch rocks nutch rocks nutch rocks"; @@ -107,15 +105,13 @@ // check external parser that does 'cat' contentType = "application/vnd.nutch.example.cat"; content.setContentType(contentType); - parser = ParserFactory.getParser(contentType, urlString); - parse = parser.getParse(content); + parse = ParseUtil.parseByParserId("parse-ext", content); assertEquals(expectedText,parse.getText()); // check external parser that does 'md5sum' contentType = "application/vnd.nutch.example.md5sum"; content.setContentType(contentType); - parser = ParserFactory.getParser(contentType, urlString); - parse = parser.getParse(content); + parse = ParseUtil.parseByParserId("parse-ext", content); assertTrue(parse.getText().startsWith(expectedMD5sum)); } } Modified: lucene/nutch/branches/mapred/src/plugin/parse-html/plugin.xml URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/parse-html/plugin.xml?rev=326238&r1=326237&r2=326238&view=diff ============================================================================== --- lucene/nutch/branches/mapred/src/plugin/parse-html/plugin.xml (original) +++ lucene/nutch/branches/mapred/src/plugin/parse-html/plugin.xml Tue Oct 18 13:59:40 2005 @@ -5,8 +5,6 @@ version="1.0.0" provider-name="nutch.org"> - - <runtime> <library name="parse-html.jar"> <export name="*"/> @@ -14,6 +12,10 @@ <library name="nekohtml-0.9.4.jar"/> <library name="tagsoup-1.0rc3.jar"/> </runtime> + + <requires> + <import plugin="nutch-extensionpoints"/> + </requires> <extension id="org.apache.nutch.parse.html" name="HtmlParse" Modified: lucene/nutch/branches/mapred/src/plugin/parse-js/plugin.xml URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/parse-js/plugin.xml?rev=326238&r1=326237&r2=326238&view=diff ============================================================================== --- lucene/nutch/branches/mapred/src/plugin/parse-js/plugin.xml (original) +++ lucene/nutch/branches/mapred/src/plugin/parse-js/plugin.xml Tue Oct 18 13:59:40 2005 @@ -11,6 +11,10 @@ </library> </runtime> + <requires> + <import plugin="nutch-extensionpoints"/> + </requires> + <extension id="org.apache.nutch.parse.js" name="JS Parser" point="org.apache.nutch.parse.Parser"> Modified: lucene/nutch/branches/mapred/src/plugin/parse-mp3/plugin.xml URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/parse-mp3/plugin.xml?rev=326238&r1=326237&r2=326238&view=diff ============================================================================== --- lucene/nutch/branches/mapred/src/plugin/parse-mp3/plugin.xml (original) +++ lucene/nutch/branches/mapred/src/plugin/parse-mp3/plugin.xml Tue Oct 18 13:59:40 2005 @@ -12,6 +12,10 @@ <library name="jid3lib-0.5.1.jar"/> </runtime> + <requires> + <import plugin="nutch-extensionpoints"/> + </requires> + <extension point="org.apache.nutch.parse.Parser" id="org.apache.nutch.parse.mp3" name="MP3Parse"> Modified: lucene/nutch/branches/mapred/src/plugin/parse-mp3/src/test/org/apache/nutch/parse/mp3/TestMP3Parser.java URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/parse-mp3/src/test/org/apache/nutch/parse/mp3/TestMP3Parser.java?rev=326238&r1=326237&r2=326238&view=diff ============================================================================== --- lucene/nutch/branches/mapred/src/plugin/parse-mp3/src/test/org/apache/nutch/parse/mp3/TestMP3Parser.java (original) +++ lucene/nutch/branches/mapred/src/plugin/parse-mp3/src/test/org/apache/nutch/parse/mp3/TestMP3Parser.java Tue Oct 18 13:59:40 2005 @@ -19,7 +19,7 @@ import junit.framework.TestCase; import org.apache.nutch.parse.Parse; import org.apache.nutch.parse.ParseException; -import org.apache.nutch.parse.Parser; +import org.apache.nutch.parse.ParseUtil; import org.apache.nutch.parse.ParserFactory; import org.apache.nutch.protocol.Content; import org.apache.nutch.protocol.Protocol; @@ -60,15 +60,13 @@ String urlString; Protocol protocol; Content content; - Parser parser; Parse parse; urlString = "file:" + sampleDir + fileSeparator + id3v2; protocol = ProtocolFactory.getProtocol(urlString); content = protocol.getContent(urlString); - parser = ParserFactory.getParser(content.getContentType(), urlString); - parse = parser.getParse(content); + parse = ParseUtil.parseByParserId("parse-mp3",content); Properties metadata = parse.getData().getMetadata(); assertEquals("postgresql comment id3v2", metadata.getProperty("COMM-Text")); assertEquals("postgresql composer id3v2", metadata.getProperty("TCOM-Text")); Modified: lucene/nutch/branches/mapred/src/plugin/parse-mspowerpoint/plugin.xml URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/parse-mspowerpoint/plugin.xml?rev=326238&r1=326237&r2=326238&view=diff ============================================================================== --- lucene/nutch/branches/mapred/src/plugin/parse-mspowerpoint/plugin.xml (original) +++ lucene/nutch/branches/mapred/src/plugin/parse-mspowerpoint/plugin.xml Tue Oct 18 13:59:40 2005 @@ -13,6 +13,7 @@ <requires> <import plugin="lib-jakarta-poi"/> + <import plugin="nutch-extensionpoints"/> </requires> <extension id="net.nutch.parse.mspowerpoint" Modified: lucene/nutch/branches/mapred/src/plugin/parse-mspowerpoint/src/test/org/apache/nutch/parse/mspowerpoint/TestMSPowerPointParser.java URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/parse-mspowerpoint/src/test/org/apache/nutch/parse/mspowerpoint/TestMSPowerPointParser.java?rev=326238&r1=326237&r2=326238&view=diff ============================================================================== --- lucene/nutch/branches/mapred/src/plugin/parse-mspowerpoint/src/test/org/apache/nutch/parse/mspowerpoint/TestMSPowerPointParser.java (original) +++ lucene/nutch/branches/mapred/src/plugin/parse-mspowerpoint/src/test/org/apache/nutch/parse/mspowerpoint/TestMSPowerPointParser.java Tue Oct 18 13:59:40 2005 @@ -29,8 +29,7 @@ import org.apache.nutch.parse.Parse; import org.apache.nutch.parse.ParseData; -import org.apache.nutch.parse.Parser; -import org.apache.nutch.parse.ParserFactory; +import org.apache.nutch.parse.ParseUtil; import org.apache.nutch.protocol.Content; import org.apache.nutch.protocol.Protocol; import org.apache.nutch.protocol.ProtocolFactory; @@ -123,9 +122,7 @@ */ public void testContent() throws Exception { - Parser parser = ParserFactory.getParser(this.content.getContentType(), - this.urlString); - Parse parse = parser.getParse(this.content); + Parse parse = ParseUtil.parseByParserId("parse-mspowerpoint",this.content); ParseData data = parse.getData(); String text = parse.getText(); @@ -162,10 +159,8 @@ */ public void testMeta() throws Exception { - Parser parser = ParserFactory.getParser(this.content.getContentType(), - this.urlString); - Parse parse = parser.getParse(this.content); - + Parse parse = ParseUtil.parseByParserId("parse-mspowerpoint",content); + ParseData data = parse.getData(); final FileExtensionFilter titleFilter = new FileExtensionFilter( Modified: lucene/nutch/branches/mapred/src/plugin/parse-msword/plugin.xml URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/parse-msword/plugin.xml?rev=326238&r1=326237&r2=326238&view=diff ============================================================================== --- lucene/nutch/branches/mapred/src/plugin/parse-msword/plugin.xml (original) +++ lucene/nutch/branches/mapred/src/plugin/parse-msword/plugin.xml Tue Oct 18 13:59:40 2005 @@ -13,6 +13,10 @@ <library name="poi-scratchpad-2.1-20040508.jar"/> </runtime> + <requires> + <import plugin="nutch-extensionpoints"/> + </requires> + <extension id="org.apache.nutch.parse.msword" name="MSWordParse" point="org.apache.nutch.parse.Parser"> Modified: lucene/nutch/branches/mapred/src/plugin/parse-msword/src/test/org/apache/nutch/parse/msword/TestMSWordParser.java URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/parse-msword/src/test/org/apache/nutch/parse/msword/TestMSWordParser.java?rev=326238&r1=326237&r2=326238&view=diff ============================================================================== --- lucene/nutch/branches/mapred/src/plugin/parse-msword/src/test/org/apache/nutch/parse/msword/TestMSWordParser.java (original) +++ lucene/nutch/branches/mapred/src/plugin/parse-msword/src/test/org/apache/nutch/parse/msword/TestMSWordParser.java Tue Oct 18 13:59:40 2005 @@ -21,9 +21,8 @@ import org.apache.nutch.protocol.Content; import org.apache.nutch.protocol.ProtocolException; -import org.apache.nutch.parse.ParserFactory; -import org.apache.nutch.parse.Parser; import org.apache.nutch.parse.Parse; +import org.apache.nutch.parse.ParseUtil; import org.apache.nutch.parse.ParseException; import junit.framework.TestCase; @@ -57,7 +56,6 @@ String urlString; Protocol protocol; Content content; - Parser parser; Parse parse; for (int i=0; i<sampleFiles.length; i++) { @@ -65,9 +63,7 @@ protocol = ProtocolFactory.getProtocol(urlString); content = protocol.getProtocolOutput(urlString).getContent(); - - parser = ParserFactory.getParser(content.getContentType(), urlString); - parse = parser.getParse(content); + parse = ParseUtil.parseByParserId("parse-msword",content); assertTrue(parse.getText().startsWith(expectedText)); } Modified: lucene/nutch/branches/mapred/src/plugin/parse-pdf/plugin.xml URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/parse-pdf/plugin.xml?rev=326238&r1=326237&r2=326238&view=diff ============================================================================== --- lucene/nutch/branches/mapred/src/plugin/parse-pdf/plugin.xml (original) +++ lucene/nutch/branches/mapred/src/plugin/parse-pdf/plugin.xml Tue Oct 18 13:59:40 2005 @@ -10,9 +10,13 @@ <library name="parse-pdf.jar"> <export name="*"/> </library> - <library name="PDFBox-0.7.0.jar"/> + <library name="PDFBox-0.7.2-log4j.jar"/> <library name="log4j-1.2.9.jar"/> </runtime> + + <requires> + <import plugin="nutch-extensionpoints"/> + </requires> <extension id="org.apache.nutch.parse.pdf" name="PdfParse" Modified: lucene/nutch/branches/mapred/src/plugin/parse-pdf/src/test/org/apache/nutch/parse/pdf/TestPdfParser.java URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/parse-pdf/src/test/org/apache/nutch/parse/pdf/TestPdfParser.java?rev=326238&r1=326237&r2=326238&view=diff ============================================================================== --- lucene/nutch/branches/mapred/src/plugin/parse-pdf/src/test/org/apache/nutch/parse/pdf/TestPdfParser.java (original) +++ lucene/nutch/branches/mapred/src/plugin/parse-pdf/src/test/org/apache/nutch/parse/pdf/TestPdfParser.java Tue Oct 18 13:59:40 2005 @@ -21,9 +21,8 @@ import org.apache.nutch.protocol.Content; import org.apache.nutch.protocol.ProtocolException; -import org.apache.nutch.parse.ParserFactory; -import org.apache.nutch.parse.Parser; import org.apache.nutch.parse.Parse; +import org.apache.nutch.parse.ParseUtil; import org.apache.nutch.parse.ParseException; import junit.framework.TestCase; @@ -57,7 +56,6 @@ String urlString; Protocol protocol; Content content; - Parser parser; Parse parse; for (int i=0; i<sampleFiles.length; i++) { @@ -65,9 +63,7 @@ protocol = ProtocolFactory.getProtocol(urlString); content = protocol.getProtocolOutput(urlString).getContent(); - - parser = ParserFactory.getParser(content.getContentType(), urlString); - parse = parser.getParse(content); + parse = ParseUtil.parseByParserId("parse-pdf",content); int index = parse.getText().indexOf(expectedText); assertTrue(index > 0); Modified: lucene/nutch/branches/mapred/src/plugin/parse-rss/plugin.xml URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/parse-rss/plugin.xml?rev=326238&r1=326237&r2=326238&view=diff ============================================================================== --- lucene/nutch/branches/mapred/src/plugin/parse-rss/plugin.xml (original) +++ lucene/nutch/branches/mapred/src/plugin/parse-rss/plugin.xml Tue Oct 18 13:59:40 2005 @@ -20,8 +20,11 @@ <library name="xercesImpl.jar"/> <library name="xml-apis.jar"/> <library name="xml-rpc-1.2.jar"/> - </runtime> + + <requires> + <import plugin="nutch-extensionpoints"/> + </requires> <extension id="org.apache.nutch.parse.rss" name="RssParse" Modified: lucene/nutch/branches/mapred/src/plugin/parse-rss/src/java/org/apache/nutch/parse/rss/RSSParser.java URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/parse-rss/src/java/org/apache/nutch/parse/rss/RSSParser.java?rev=326238&r1=326237&r2=326238&view=diff ============================================================================== --- lucene/nutch/branches/mapred/src/plugin/parse-rss/src/java/org/apache/nutch/parse/rss/RSSParser.java (original) +++ lucene/nutch/branches/mapred/src/plugin/parse-rss/src/java/org/apache/nutch/parse/rss/RSSParser.java Tue Oct 18 13:59:40 2005 @@ -157,11 +157,13 @@ if (r.getLink() != null) { try { // get the outlink - theOutlinks.add(new Outlink(r.getLink(), r - .getDescription())); + if (r.getDescription()!= null ) { + theOutlinks.add(new Outlink(r.getLink(), r.getDescription())); + } else { + theOutlinks.add(new Outlink(r.getLink(), "")); + } } catch (MalformedURLException e) { - LOG - .info("nutch:parse-rss:RSSParser Exception: MalformedURL: " + LOG.info("nutch:parse-rss:RSSParser Exception: MalformedURL: " + r.getLink() + ": Attempting to continue processing outlinks"); e.printStackTrace(); @@ -185,12 +187,13 @@ if (whichLink != null) { try { - theOutlinks.add(new Outlink(whichLink, theRSSItem - .getDescription())); - + if (theRSSItem.getDescription()!=null) { + theOutlinks.add(new Outlink(whichLink, theRSSItem.getDescription())); + } else { + theOutlinks.add(new Outlink(whichLink, "")); + } } catch (MalformedURLException e) { - LOG - .info("nutch:parse-rss:RSSParser Exception: MalformedURL: " + LOG.info("nutch:parse-rss:RSSParser Exception: MalformedURL: " + whichLink + ": Attempting to continue processing outlinks"); e.printStackTrace(); @@ -206,23 +209,18 @@ LOG.fine("nutch:parse-rss:getParse:contentTitle=" + contentTitle); } else { - LOG - .fine("nutch:parse-rss:Error:getParse: No RSS Channels recorded!"); + LOG.fine("nutch:parse-rss:Error:getParse: No RSS Channels recorded!"); } // format the outlinks + Outlink[] outlinks = (Outlink[]) theOutlinks.toArray(new Outlink[theOutlinks.size()]); - Outlink[] outlinks = (Outlink[]) theOutlinks - .toArray(new Outlink[theOutlinks.size()]); - - LOG.fine("nutch:parse-rss:getParse:found " + outlinks.length - + " outlinks"); + LOG.fine("nutch:parse-rss:getParse:found " + outlinks.length + " outlinks"); // LOG.info("Outlinks: "+outlinks); ParseData parseData = new ParseData(ParseStatus.STATUS_SUCCESS, contentTitle.toString(), outlinks, content.getMetadata()); return new ParseImpl(indexText.toString(), parseData); - } } Modified: lucene/nutch/branches/mapred/src/plugin/parse-rss/src/test/org/apache/nutch/parse/rss/TestRSSParser.java URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/parse-rss/src/test/org/apache/nutch/parse/rss/TestRSSParser.java?rev=326238&r1=326237&r2=326238&view=diff ============================================================================== --- lucene/nutch/branches/mapred/src/plugin/parse-rss/src/test/org/apache/nutch/parse/rss/TestRSSParser.java (original) +++ lucene/nutch/branches/mapred/src/plugin/parse-rss/src/test/org/apache/nutch/parse/rss/TestRSSParser.java Tue Oct 18 13:59:40 2005 @@ -21,9 +21,8 @@ import org.apache.nutch.protocol.Content; import org.apache.nutch.protocol.ProtocolException; -import org.apache.nutch.parse.ParserFactory; -import org.apache.nutch.parse.Parser; import org.apache.nutch.parse.Parse; +import org.apache.nutch.parse.ParseUtil; import org.apache.nutch.parse.ParseException; import org.apache.nutch.parse.ParseData; import org.apache.nutch.parse.Outlink; @@ -75,7 +74,6 @@ String urlString; Protocol protocol; Content content; - Parser parser; Parse parse; for (int i = 0; i < sampleFiles.length; i++) { @@ -83,10 +81,7 @@ protocol = ProtocolFactory.getProtocol(urlString); content = protocol.getProtocolOutput(urlString).getContent(); - - parser = ParserFactory.getParser(content.getContentType(), - urlString); - parse = parser.getParse(content); + parse = ParseUtil.parseByParserId("parse-rss",content); //check that there are 3 outlinks: //http://test.channel.com Modified: lucene/nutch/branches/mapred/src/plugin/parse-rtf/plugin.xml URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/parse-rtf/plugin.xml?rev=326238&r1=326237&r2=326238&view=diff ============================================================================== --- lucene/nutch/branches/mapred/src/plugin/parse-rtf/plugin.xml (original) +++ lucene/nutch/branches/mapred/src/plugin/parse-rtf/plugin.xml Tue Oct 18 13:59:40 2005 @@ -12,6 +12,10 @@ <library name="rtf-parser.jar"/> </runtime> + <requires> + <import plugin="nutch-extensionpoints"/> + </requires> + <extension point="org.apache.nutch.parse.Parser" id="org.apache.nutch.parse.rtf" name="RTFParse"> Modified: lucene/nutch/branches/mapred/src/plugin/parse-rtf/src/test/org/apache/nutch/parse/rtf/TestRTFParser.java URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/parse-rtf/src/test/org/apache/nutch/parse/rtf/TestRTFParser.java?rev=326238&r1=326237&r2=326238&view=diff ============================================================================== --- lucene/nutch/branches/mapred/src/plugin/parse-rtf/src/test/org/apache/nutch/parse/rtf/TestRTFParser.java (original) +++ lucene/nutch/branches/mapred/src/plugin/parse-rtf/src/test/org/apache/nutch/parse/rtf/TestRTFParser.java Tue Oct 18 13:59:40 2005 @@ -18,8 +18,8 @@ import junit.framework.TestCase; import org.apache.nutch.parse.Parse; +import org.apache.nutch.parse.ParseUtil; import org.apache.nutch.parse.ParseException; -import org.apache.nutch.parse.Parser; import org.apache.nutch.parse.ParserFactory; import org.apache.nutch.protocol.Content; import org.apache.nutch.protocol.Protocol; @@ -58,15 +58,13 @@ String urlString; Protocol protocol; Content content; - Parser parser; Parse parse; urlString = "file:" + sampleDir + fileSeparator + rtfFile; protocol = ProtocolFactory.getProtocol(urlString); content = protocol.getContent(urlString); - parser = ParserFactory.getParser(content.getContentType(), urlString); - parse = parser.getParse(content); + parse = ParseUtil.parseByParserId("parse-rtf",content); String text = parse.getText(); assertEquals("The quick brown fox jumps over the lazy dog", text.trim()); Modified: lucene/nutch/branches/mapred/src/plugin/parse-text/plugin.xml URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/parse-text/plugin.xml?rev=326238&r1=326237&r2=326238&view=diff ============================================================================== --- lucene/nutch/branches/mapred/src/plugin/parse-text/plugin.xml (original) +++ lucene/nutch/branches/mapred/src/plugin/parse-text/plugin.xml Tue Oct 18 13:59:40 2005 @@ -12,6 +12,10 @@ </library> </runtime> + <requires> + <import plugin="nutch-extensionpoints"/> + </requires> + <extension id="org.apache.nutch.parse.text" name="TextParse" point="org.apache.nutch.parse.Parser"> Modified: lucene/nutch/branches/mapred/src/plugin/parse-zip/plugin.xml URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/parse-zip/plugin.xml?rev=326238&r1=326237&r2=326238&view=diff ============================================================================== --- lucene/nutch/branches/mapred/src/plugin/parse-zip/plugin.xml (original) +++ lucene/nutch/branches/mapred/src/plugin/parse-zip/plugin.xml Tue Oct 18 13:59:40 2005 @@ -11,6 +11,10 @@ </library> </runtime> + <requires> + <import plugin="nutch-extensionpoints"/> + </requires> + <extension id="org.apache.nutch.parse.zip" name="ZipParser" point="org.apache.nutch.parse.Parser"> Modified: lucene/nutch/branches/mapred/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipTextExtractor.java URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipTextExtractor.java?rev=326238&r1=326237&r2=326238&view=diff ============================================================================== --- lucene/nutch/branches/mapred/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipTextExtractor.java (original) +++ lucene/nutch/branches/mapred/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipTextExtractor.java Tue Oct 18 13:59:40 2005 @@ -28,9 +28,8 @@ // Nutch imports import org.apache.nutch.parse.Parse; -import org.apache.nutch.parse.Parser; +import org.apache.nutch.parse.ParseUtil; import org.apache.nutch.parse.ParseData; -import org.apache.nutch.parse.ParserFactory; import org.apache.nutch.parse.ParseException; import org.apache.nutch.parse.Outlink; import org.apache.nutch.protocol.Content; @@ -89,8 +88,7 @@ metadata.setProperty("Content-Length", Long.toString(entry.getSize())); metadata.setProperty("Content-Type", contentType); Content content = new Content(newurl, base, b, contentType, metadata); - Parser parser = ParserFactory.getParser(contentType, newurl); - Parse parse = parser.getParse(content); + Parse parse = ParseUtil.parse(content); ParseData theParseData = parse.getData(); Outlink[] theOutlinks = theParseData.getOutlinks(); Modified: lucene/nutch/branches/mapred/src/plugin/parse-zip/src/test/org/apache/nutch/parse/zip/TestZipParser.java URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/parse-zip/src/test/org/apache/nutch/parse/zip/TestZipParser.java?rev=326238&r1=326237&r2=326238&view=diff ============================================================================== --- lucene/nutch/branches/mapred/src/plugin/parse-zip/src/test/org/apache/nutch/parse/zip/TestZipParser.java (original) +++ lucene/nutch/branches/mapred/src/plugin/parse-zip/src/test/org/apache/nutch/parse/zip/TestZipParser.java Tue Oct 18 13:59:40 2005 @@ -21,9 +21,8 @@ import org.apache.nutch.protocol.Content; import org.apache.nutch.protocol.ProtocolException; -import org.apache.nutch.parse.ParserFactory; -import org.apache.nutch.parse.Parser; import org.apache.nutch.parse.Parse; +import org.apache.nutch.parse.ParseUtil; import org.apache.nutch.parse.ParseException; import junit.framework.TestCase; @@ -57,7 +56,6 @@ String urlString; Protocol protocol; Content content; - Parser parser; Parse parse; for (int i = 0; i < sampleFiles.length; i++) { @@ -65,9 +63,7 @@ protocol = ProtocolFactory.getProtocol(urlString); content = protocol.getProtocolOutput(urlString).getContent(); - - parser = ParserFactory.getParser(content.getContentType(), urlString); - parse = parser.getParse(content); + parse = ParseUtil.parseByParserId("parse-zip",content); assertTrue(parse.getText().equals(expectedText)); } } Modified: lucene/nutch/branches/mapred/src/plugin/protocol-file/plugin.xml URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/protocol-file/plugin.xml?rev=326238&r1=326237&r2=326238&view=diff ============================================================================== --- lucene/nutch/branches/mapred/src/plugin/protocol-file/plugin.xml (original) +++ lucene/nutch/branches/mapred/src/plugin/protocol-file/plugin.xml Tue Oct 18 13:59:40 2005 @@ -12,6 +12,10 @@ </library> </runtime> + <requires> + <import plugin="nutch-extensionpoints"/> + </requires> + <extension id="org.apache.nutch.protocol.file" name="FileProtocol" point="org.apache.nutch.protocol.Protocol"> Modified: lucene/nutch/branches/mapred/src/plugin/protocol-ftp/plugin.xml URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/protocol-ftp/plugin.xml?rev=326238&r1=326237&r2=326238&view=diff ============================================================================== --- lucene/nutch/branches/mapred/src/plugin/protocol-ftp/plugin.xml (original) +++ lucene/nutch/branches/mapred/src/plugin/protocol-ftp/plugin.xml Tue Oct 18 13:59:40 2005 @@ -5,14 +5,16 @@ version="1.0.0" provider-name="nutch.org"> - - <runtime> <library name="protocol-ftp.jar"> <export name="*"/> </library> <library name="commons-net-1.2.0-dev.jar"/> </runtime> + + <requires> + <import plugin="nutch-extensionpoints"/> + </requires> <extension id="org.apache.nutch.protocol.ftp" name="FtpProtocol" Modified: lucene/nutch/branches/mapred/src/plugin/protocol-http/plugin.xml URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/protocol-http/plugin.xml?rev=326238&r1=326237&r2=326238&view=diff ============================================================================== --- lucene/nutch/branches/mapred/src/plugin/protocol-http/plugin.xml (original) +++ lucene/nutch/branches/mapred/src/plugin/protocol-http/plugin.xml Tue Oct 18 13:59:40 2005 @@ -11,6 +11,10 @@ </library> </runtime> + <requires> + <import plugin="nutch-extensionpoints"/> + </requires> + <extension id="org.apache.nutch.protocol.http" name="HttpProtocol" point="org.apache.nutch.protocol.Protocol"> Modified: lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/plugin.xml URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/plugin.xml?rev=326238&r1=326237&r2=326238&view=diff ============================================================================== --- lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/plugin.xml (original) +++ lucene/nutch/branches/mapred/src/plugin/protocol-httpclient/plugin.xml Tue Oct 18 13:59:40 2005 @@ -13,6 +13,10 @@ <library name="commons-httpclient-3.0-rc2.jar" /> </runtime> + <requires> + <import plugin="nutch-extensionpoints"/> + </requires> + <extension id="org.apache.nutch.protocol.httpclient" name="HttpProtocol" point="org.apache.nutch.protocol.Protocol"> Modified: lucene/nutch/branches/mapred/src/plugin/query-basic/plugin.xml URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/query-basic/plugin.xml?rev=326238&r1=326237&r2=326238&view=diff ============================================================================== --- lucene/nutch/branches/mapred/src/plugin/query-basic/plugin.xml (original) +++ lucene/nutch/branches/mapred/src/plugin/query-basic/plugin.xml Tue Oct 18 13:59:40 2005 @@ -5,13 +5,15 @@ version="1.0.0" provider-name="nutch.org"> - - <runtime> <library name="query-basic.jar"> <export name="*"/> </library> </runtime> + + <requires> + <import plugin="nutch-extensionpoints"/> + </requires> <extension id="org.apache.nutch.searcher.basic" name="Nutch Basic Query Filter" Modified: lucene/nutch/branches/mapred/src/plugin/query-more/plugin.xml URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/query-more/plugin.xml?rev=326238&r1=326237&r2=326238&view=diff ============================================================================== --- lucene/nutch/branches/mapred/src/plugin/query-more/plugin.xml (original) +++ lucene/nutch/branches/mapred/src/plugin/query-more/plugin.xml Tue Oct 18 13:59:40 2005 @@ -5,13 +5,15 @@ version="1.0.0" provider-name="nutch.org"> - - <runtime> <library name="query-more.jar"> <export name="*"/> </library> </runtime> + + <requires> + <import plugin="nutch-extensionpoints"/> + </requires> <extension id="org.apache.nutch.searcher.more" name="Nutch More Query Filter" Modified: lucene/nutch/branches/mapred/src/plugin/query-site/plugin.xml URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/query-site/plugin.xml?rev=326238&r1=326237&r2=326238&view=diff ============================================================================== --- lucene/nutch/branches/mapred/src/plugin/query-site/plugin.xml (original) +++ lucene/nutch/branches/mapred/src/plugin/query-site/plugin.xml Tue Oct 18 13:59:40 2005 @@ -11,6 +11,10 @@ </library> </runtime> + <requires> + <import plugin="nutch-extensionpoints"/> + </requires> + <extension id="org.apache.nutch.searcher.site.SiteQueryFilter" name="Nutch Site Query Filter" point="org.apache.nutch.searcher.QueryFilter"> Modified: lucene/nutch/branches/mapred/src/plugin/query-url/plugin.xml URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/query-url/plugin.xml?rev=326238&r1=326237&r2=326238&view=diff ============================================================================== --- lucene/nutch/branches/mapred/src/plugin/query-url/plugin.xml (original) +++ lucene/nutch/branches/mapred/src/plugin/query-url/plugin.xml Tue Oct 18 13:59:40 2005 @@ -5,14 +5,15 @@ version="1.0.0" provider-name="nutch.org"> - - <runtime> <library name="query-url.jar"> <export name="*"/> </library> </runtime> + <requires> + <import plugin="nutch-extensionpoints"/> + </requires> <extension id="org.apache.nutch.searcher.url.URLQueryFilter" name="Nutch URL Query Filter" Modified: lucene/nutch/branches/mapred/src/plugin/urlfilter-prefix/plugin.xml URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/urlfilter-prefix/plugin.xml?rev=326238&r1=326237&r2=326238&view=diff ============================================================================== --- lucene/nutch/branches/mapred/src/plugin/urlfilter-prefix/plugin.xml (original) +++ lucene/nutch/branches/mapred/src/plugin/urlfilter-prefix/plugin.xml Tue Oct 18 13:59:40 2005 @@ -5,15 +5,17 @@ version="1.0.0" provider-name="nutch.org"> - - <runtime> <library name="urlfilter-prefix.jar"> <export name="*"/> </library> </runtime> - <extension id="org.apache.nutch.net.urlfiler" + <requires> + <import plugin="nutch-extensionpoints"/> + </requires> + + <extension id="org.apache.nutch.net.urlfilter" name="Nutch Prefix URL Filter" point="org.apache.nutch.net.URLFilter"> <implementation id="PrefixURLFilter" Modified: lucene/nutch/branches/mapred/src/plugin/urlfilter-regex/plugin.xml URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/plugin/urlfilter-regex/plugin.xml?rev=326238&r1=326237&r2=326238&view=diff ============================================================================== --- lucene/nutch/branches/mapred/src/plugin/urlfilter-regex/plugin.xml (original) +++ lucene/nutch/branches/mapred/src/plugin/urlfilter-regex/plugin.xml Tue Oct 18 13:59:40 2005 @@ -11,7 +11,11 @@ </library> </runtime> - <extension id="org.apache.nutch.net.urlfiler" + <requires> + <import plugin="nutch-extensionpoints"/> + </requires> + + <extension id="org.apache.nutch.net.urlfilter" name="Nutch Regex URL Filter" point="org.apache.nutch.net.URLFilter"> <implementation id="RegexURLFilter" Modified: lucene/nutch/branches/mapred/src/site/src/documentation/content/xdocs/site.xml URL: http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/site/src/documentation/content/xdocs/site.xml?rev=326238&r1=326237&r2=326238&view=diff ============================================================================== --- lucene/nutch/branches/mapred/src/site/src/documentation/content/xdocs/site.xml (original) +++ lucene/nutch/branches/mapred/src/site/src/documentation/content/xdocs/site.xml Tue Oct 18 13:59:40 2005 @@ -24,7 +24,7 @@ </project> <docs label="Documentation"> - <faq label="FAQ" href="faq.html" /> + <faq label="FAQ" href="ext:faq" /> <wiki label="Wiki" href="ext:wiki" /> <tutorial label="Tutorial" href="tutorial.html" /> <webmasters label="Robot " href="bot.html" /> @@ -46,6 +46,7 @@ <external-refs> <lucene href="http://lucene.apache.org/java/" /> <wiki href="http://wiki.apache.org/nutch/" /> + <faq href="http://wiki.apache.org/nutch/FAQ" /> <store href="http://www.cafepress.com/nutch/" /> </external-refs>
