Author: jerome
Date: Fri Apr 14 16:57:24 2006
New Revision: 394228
URL: http://svn.apache.org/viewcvs?rev=394228&view=rev
Log:
NUTCH-245 : Added a DTD for Nutch Plugin Manifest
- Add a commented DTD in src
- Add the DTD in javadoc
- Change the implementation element structure : uses name-value parameters
instead of proprietary attributes
- Fix unit tests regarding changes in DTD
- Fix the plugin.xml file in nutch plugins regarding changes in DTD
Added:
lucene/nutch/trunk/src/plugin/plugin.dtd (with props)
Modified:
lucene/nutch/trunk/build.xml
lucene/nutch/trunk/src/java/org/apache/nutch/plugin/PluginManifestParser.java
lucene/nutch/trunk/src/java/org/apache/nutch/plugin/package.html
lucene/nutch/trunk/src/plugin/analysis-de/plugin.xml
lucene/nutch/trunk/src/plugin/analysis-fr/plugin.xml
lucene/nutch/trunk/src/plugin/clustering-carrot2/plugin.xml
lucene/nutch/trunk/src/plugin/creativecommons/plugin.xml
lucene/nutch/trunk/src/plugin/index-basic/plugin.xml
lucene/nutch/trunk/src/plugin/index-more/plugin.xml
lucene/nutch/trunk/src/plugin/languageidentifier/plugin.xml
lucene/nutch/trunk/src/plugin/lib-commons-httpclient/plugin.xml
lucene/nutch/trunk/src/plugin/lib-http/plugin.xml
lucene/nutch/trunk/src/plugin/lib-jakarta-poi/plugin.xml
lucene/nutch/trunk/src/plugin/lib-log4j/plugin.xml
lucene/nutch/trunk/src/plugin/lib-lucene-analyzers/plugin.xml
lucene/nutch/trunk/src/plugin/lib-nekohtml/plugin.xml
lucene/nutch/trunk/src/plugin/lib-parsems/plugin.xml
lucene/nutch/trunk/src/plugin/lib-regex-filter/plugin.xml
lucene/nutch/trunk/src/plugin/lib-xml/plugin.xml
lucene/nutch/trunk/src/plugin/microformats-reltag/plugin.xml
lucene/nutch/trunk/src/plugin/nutch-extensionpoints/plugin.xml
lucene/nutch/trunk/src/plugin/ontology/plugin.xml
lucene/nutch/trunk/src/plugin/parse-ext/plugin.xml
lucene/nutch/trunk/src/plugin/parse-html/plugin.xml
lucene/nutch/trunk/src/plugin/parse-js/plugin.xml
lucene/nutch/trunk/src/plugin/parse-mp3/plugin.xml
lucene/nutch/trunk/src/plugin/parse-msexcel/plugin.xml
lucene/nutch/trunk/src/plugin/parse-mspowerpoint/plugin.xml
lucene/nutch/trunk/src/plugin/parse-msword/plugin.xml
lucene/nutch/trunk/src/plugin/parse-pdf/plugin.xml
lucene/nutch/trunk/src/plugin/parse-rss/plugin.xml
lucene/nutch/trunk/src/plugin/parse-rtf/plugin.xml
lucene/nutch/trunk/src/plugin/parse-swf/plugin.xml
lucene/nutch/trunk/src/plugin/parse-text/plugin.xml
lucene/nutch/trunk/src/plugin/parse-zip/plugin.xml
lucene/nutch/trunk/src/plugin/protocol-file/plugin.xml
lucene/nutch/trunk/src/plugin/protocol-ftp/plugin.xml
lucene/nutch/trunk/src/plugin/protocol-http/plugin.xml
lucene/nutch/trunk/src/plugin/protocol-httpclient/plugin.xml
lucene/nutch/trunk/src/plugin/query-basic/plugin.xml
lucene/nutch/trunk/src/plugin/query-more/plugin.xml
lucene/nutch/trunk/src/plugin/query-site/plugin.xml
lucene/nutch/trunk/src/plugin/query-url/plugin.xml
lucene/nutch/trunk/src/plugin/urlfilter-automaton/plugin.xml
lucene/nutch/trunk/src/plugin/urlfilter-prefix/plugin.xml
lucene/nutch/trunk/src/plugin/urlfilter-regex/plugin.xml
lucene/nutch/trunk/src/test/org/apache/nutch/plugin/TestPluginSystem.java
Modified: lucene/nutch/trunk/build.xml
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/build.xml?rev=394228&r1=394227&r2=394228&view=diff
==============================================================================
--- lucene/nutch/trunk/build.xml (original)
+++ lucene/nutch/trunk/build.xml Fri Apr 14 16:57:24 2006
@@ -279,6 +279,9 @@
<!-- Documentation -->
<!-- ================================================================== -->
<target name="javadoc" depends="compile">
+ <!-- Copy the plugin.dtd file to the plugin doc-files dir -->
+ <copy file="${plugins.dir}/plugin.dtd"
+ todir="${src.dir}/org/apache/nutch/plugin/doc-files"/>
<mkdir dir="${build.javadoc}"/>
<javadoc
overview="${src.dir}/overview.html"
@@ -353,6 +356,7 @@
<group title="Ontology Plugins" packages="${plugins.ontology}"/>
<group title="Misc. Plugins" packages="${plugins.misc}"/>
</javadoc>
+
</target>
<target name="default-doc">
Modified:
lucene/nutch/trunk/src/java/org/apache/nutch/plugin/PluginManifestParser.java
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/plugin/PluginManifestParser.java?rev=394228&r1=394227&r2=394228&view=diff
==============================================================================
---
lucene/nutch/trunk/src/java/org/apache/nutch/plugin/PluginManifestParser.java
(original)
+++
lucene/nutch/trunk/src/java/org/apache/nutch/plugin/PluginManifestParser.java
Fri Apr 14 16:57:24 2006
@@ -36,6 +36,8 @@
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
+import org.xml.sax.EntityResolver;
+import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
/**
@@ -294,6 +296,15 @@
+ extensionClass);
Extension extension = new Extension(pPluginDescriptor,
pointId, id, extensionClass, this.conf,
this.pluginRepository);
+ NodeList parameters =
oneImplementation.getElementsByTagName("parameter");
+ if (parameters != null) {
+ for (int k=0; k<parameters.getLength(); k++) {
+ Element param = (Element) parameters.item(k);
+ extension.addAttribute(param.getAttribute("name"),
+
param.getAttribute("value"));
+ }
+ }
+ /*
NamedNodeMap list = oneImplementation.getAttributes();
for (int k = 0; k < list.getLength(); k++) {
Node attribute = list.item(k);
@@ -302,11 +313,12 @@
continue;
String value = attribute.getNodeValue();
extension.addAttribute(name, value);
- }
+ }*/
pPluginDescriptor.addExtension(extension);
}
}
}
}
}
+
}
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/plugin/package.html
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/plugin/package.html?rev=394228&r1=394227&r2=394228&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/plugin/package.html (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/plugin/package.html Fri Apr 14
16:57:24 2006
@@ -16,6 +16,8 @@
listed in the [EMAIL PROTECTED] org.apache.nutch.plugin.Pluggable} interface.
</p>
[EMAIL PROTECTED] <a href="./doc-files/plugin.dtd">Nutch plugin manifest DTD</a>
+
@see <a href="http://wiki.apache.org/nutch/PluginCentral">
Plugin Central
</a>
Modified: lucene/nutch/trunk/src/plugin/analysis-de/plugin.xml
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/analysis-de/plugin.xml?rev=394228&r1=394227&r2=394228&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/analysis-de/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/analysis-de/plugin.xml Fri Apr 14 16:57:24
2006
@@ -1,4 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
+
<plugin
id="analysis-de"
name="German Analysis Plug-in"
@@ -21,8 +22,9 @@
point="org.apache.nutch.analysis.NutchAnalyzer">
<implementation id="org.apache.nutch.analysis.de.GermanAnalyzer"
- class="org.apache.nutch.analysis.de.GermanAnalyzer"
- lang="de"/>
+ class="org.apache.nutch.analysis.de.GermanAnalyzer">
+ <parameter name="lang" value="de"/>
+ </implementation>
</extension>
Modified: lucene/nutch/trunk/src/plugin/analysis-fr/plugin.xml
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/analysis-fr/plugin.xml?rev=394228&r1=394227&r2=394228&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/analysis-fr/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/analysis-fr/plugin.xml Fri Apr 14 16:57:24
2006
@@ -1,4 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
+
<plugin
id="analysis-fr"
name="French Analysis Plug-in"
@@ -21,8 +22,9 @@
point="org.apache.nutch.analysis.NutchAnalyzer">
<implementation id="org.apache.nutch.analysis.fr.FrenchAnalyzer"
- class="org.apache.nutch.analysis.fr.FrenchAnalyzer"
- lang="fr"/>
+ class="org.apache.nutch.analysis.fr.FrenchAnalyzer">
+ <parameter name="lang" value="fr"/>
+ </implementation>
</extension>
Modified: lucene/nutch/trunk/src/plugin/clustering-carrot2/plugin.xml
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/clustering-carrot2/plugin.xml?rev=394228&r1=394227&r2=394228&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/clustering-carrot2/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/clustering-carrot2/plugin.xml Fri Apr 14
16:57:24 2006
@@ -1,4 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
+
<plugin
id="clustering-carrot2"
name="Online Search Results Clustering using Carrot2's Lingo component"
Modified: lucene/nutch/trunk/src/plugin/creativecommons/plugin.xml
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/creativecommons/plugin.xml?rev=394228&r1=394227&r2=394228&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/creativecommons/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/creativecommons/plugin.xml Fri Apr 14
16:57:24 2006
@@ -1,4 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
+
<plugin
id="creativecommons"
name="Creative Commons Plugins"
@@ -33,8 +34,9 @@
name="Creative Commmons Query Filter"
point="org.apache.nutch.searcher.QueryFilter">
<implementation id="CCQueryFilter"
- class="org.creativecommons.nutch.CCQueryFilter"
- fields="cc"/>
+ class="org.creativecommons.nutch.CCQueryFilter">
+ <parameter name="fields" value="cc"/>
+ </implementation>
</extension>
</plugin>
Modified: lucene/nutch/trunk/src/plugin/index-basic/plugin.xml
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/index-basic/plugin.xml?rev=394228&r1=394227&r2=394228&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/index-basic/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/index-basic/plugin.xml Fri Apr 14 16:57:24
2006
@@ -1,4 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
+
<plugin
id="index-basic"
name="Basic Indexing Filter"
Modified: lucene/nutch/trunk/src/plugin/index-more/plugin.xml
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/index-more/plugin.xml?rev=394228&r1=394227&r2=394228&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/index-more/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/index-more/plugin.xml Fri Apr 14 16:57:24 2006
@@ -1,4 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
+
<plugin
id="index-more"
name="More Indexing Filter"
Modified: lucene/nutch/trunk/src/plugin/languageidentifier/plugin.xml
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/languageidentifier/plugin.xml?rev=394228&r1=394227&r2=394228&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/languageidentifier/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/languageidentifier/plugin.xml Fri Apr 14
16:57:24 2006
@@ -1,4 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
+
<plugin
id="language-identifier"
name="Language Identification Parser/Filter"
@@ -34,8 +35,9 @@
name="Nutch Language Query Filter"
point="org.apache.nutch.searcher.QueryFilter">
<implementation id="LanguageQueryFilter"
-
class="org.apache.nutch.analysis.lang.LanguageQueryFilter"
- raw-fields="lang"/>
+
class="org.apache.nutch.analysis.lang.LanguageQueryFilter">
+ <parameter name="raw-fields" value="lang"/>
+ </implementation>
</extension>
Modified: lucene/nutch/trunk/src/plugin/lib-commons-httpclient/plugin.xml
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/lib-commons-httpclient/plugin.xml?rev=394228&r1=394227&r2=394228&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/lib-commons-httpclient/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/lib-commons-httpclient/plugin.xml Fri Apr 14
16:57:24 2006
@@ -1,4 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
+
<!--
! Jakarta Commons HTTP Client
! (http://jakarta.apache.org/commons/httpclient/)
Modified: lucene/nutch/trunk/src/plugin/lib-http/plugin.xml
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/lib-http/plugin.xml?rev=394228&r1=394227&r2=394228&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/lib-http/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/lib-http/plugin.xml Fri Apr 14 16:57:24 2006
@@ -1,4 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
+
<!--
! A common framework for http protocol implementations
!-->
Modified: lucene/nutch/trunk/src/plugin/lib-jakarta-poi/plugin.xml
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/lib-jakarta-poi/plugin.xml?rev=394228&r1=394227&r2=394228&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/lib-jakarta-poi/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/lib-jakarta-poi/plugin.xml Fri Apr 14
16:57:24 2006
@@ -1,4 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
+
<!--
! Jakarta POI - Java API To Access Microsoft Format Files
! (http://jakarta.apache.org/poi/)
Modified: lucene/nutch/trunk/src/plugin/lib-log4j/plugin.xml
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/lib-log4j/plugin.xml?rev=394228&r1=394227&r2=394228&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/lib-log4j/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/lib-log4j/plugin.xml Fri Apr 14 16:57:24 2006
@@ -1,4 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
+
<!--
! Log4j library
! (http://logging.apache.org/log4j/)
Modified: lucene/nutch/trunk/src/plugin/lib-lucene-analyzers/plugin.xml
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/lib-lucene-analyzers/plugin.xml?rev=394228&r1=394227&r2=394228&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/lib-lucene-analyzers/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/lib-lucene-analyzers/plugin.xml Fri Apr 14
16:57:24 2006
@@ -1,4 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
+
<!--
! Lucene Analyzers
! (http://lucene.apache.org/java/docs/lucene-sandbox/)
Modified: lucene/nutch/trunk/src/plugin/lib-nekohtml/plugin.xml
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/lib-nekohtml/plugin.xml?rev=394228&r1=394227&r2=394228&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/lib-nekohtml/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/lib-nekohtml/plugin.xml Fri Apr 14 16:57:24
2006
@@ -1,4 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
+
<!--
! NekoHTML is a simple HTML scanner and tag balancer.
! (http://people.apache.org/~andyc/neko/doc/html/index.html)
Modified: lucene/nutch/trunk/src/plugin/lib-parsems/plugin.xml
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/lib-parsems/plugin.xml?rev=394228&r1=394227&r2=394228&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/lib-parsems/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/lib-parsems/plugin.xml Fri Apr 14 16:57:24
2006
@@ -1,4 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
+
<!--
! A common framework for microsoft documents parsers implementations
!-->
Modified: lucene/nutch/trunk/src/plugin/lib-regex-filter/plugin.xml
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/lib-regex-filter/plugin.xml?rev=394228&r1=394227&r2=394228&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/lib-regex-filter/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/lib-regex-filter/plugin.xml Fri Apr 14
16:57:24 2006
@@ -1,4 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
+
<!--
! A common framework for RegExp based URL filters
!-->
Modified: lucene/nutch/trunk/src/plugin/lib-xml/plugin.xml
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/lib-xml/plugin.xml?rev=394228&r1=394227&r2=394228&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/lib-xml/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/lib-xml/plugin.xml Fri Apr 14 16:57:24 2006
@@ -1,4 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
+
<!--
! XML library - Gathers many XML related libraries:
!
Modified: lucene/nutch/trunk/src/plugin/microformats-reltag/plugin.xml
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/microformats-reltag/plugin.xml?rev=394228&r1=394227&r2=394228&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/microformats-reltag/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/microformats-reltag/plugin.xml Fri Apr 14
16:57:24 2006
@@ -1,4 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
+
<plugin
id="microformats-reltag"
name="Rel-Tag microformat Parser/Indexer/Querier"
@@ -34,8 +35,10 @@
name="Rel-Tag query filter"
point="org.apache.nutch.searcher.QueryFilter">
<implementation id="RelTagQueryFilter"
-
class="org.apache.nutch.microformats.reltag.RelTagQueryFilter"
- raw-fields="tag"/>
+
class="org.apache.nutch.microformats.reltag.RelTagQueryFilter">
+ <parameter name="raw-fields" value="tag"/>
+ </implementation>
+
</extension>
Modified: lucene/nutch/trunk/src/plugin/nutch-extensionpoints/plugin.xml
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/nutch-extensionpoints/plugin.xml?rev=394228&r1=394227&r2=394228&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/nutch-extensionpoints/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/nutch-extensionpoints/plugin.xml Fri Apr 14
16:57:24 2006
@@ -1,4 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
+
<plugin
id="nutch-extensionpoints"
name="the nutch core extension points"
Modified: lucene/nutch/trunk/src/plugin/ontology/plugin.xml
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/ontology/plugin.xml?rev=394228&r1=394227&r2=394228&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/ontology/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/ontology/plugin.xml Fri Apr 14 16:57:24 2006
@@ -30,8 +30,9 @@
<!-- define all the classes that implement the point defined above -->
<implementation id="org.apache.nutch.ontology.jena.OntologyImpl"
- class="org.apache.nutch.ontology.jena.OntologyImpl"
- pathSuffix=""/>
+ class="org.apache.nutch.ontology.jena.OntologyImpl">
+ <parameter name="pathSuffix" value=""/>
+ </implementation>
</extension>
Modified: lucene/nutch/trunk/src/plugin/parse-ext/plugin.xml
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-ext/plugin.xml?rev=394228&r1=394227&r2=394228&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-ext/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/parse-ext/plugin.xml Fri Apr 14 16:57:24 2006
@@ -1,4 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
+
<plugin
id="parse-ext"
name="External Parser Plug-in"
@@ -20,18 +21,20 @@
point="org.apache.nutch.parse.Parser">
<implementation id="ExtParser"
- class="org.apache.nutch.parse.ext.ExtParser"
- contentType="application/vnd.nutch.example.cat"
- pathSuffix=""
- command="./build/plugins/parse-ext/command"
- timeout="10"/>
+ class="org.apache.nutch.parse.ext.ExtParser">
+ <parameter name="contentType"
value="application/vnd.nutch.example.cat"/>
+ <parameter name="pathSuffix" value=""/>
+ <parameter name="command"
value="./build/plugins/parse-ext/command"/>
+ <parameter name="timeout" value="10"/>
+ </implementation>
<implementation id="ExtParser"
- class="org.apache.nutch.parse.ext.ExtParser"
- contentType="application/vnd.nutch.example.md5sum"
- pathSuffix=""
- command="./build/plugins/parse-ext/command"
- timeout="20"/>
+ class="org.apache.nutch.parse.ext.ExtParser">
+ <parameter name="contentType"
value="application/vnd.nutch.example.md5sum"/>
+ <parameter name="pathSuffix" value=""/>
+ <parameter name="command"
value="./build/plugins/parse-ext/command"/>
+ <parameter name="timeout" value="20"/>
+ </implementation>
</extension>
Modified: lucene/nutch/trunk/src/plugin/parse-html/plugin.xml
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-html/plugin.xml?rev=394228&r1=394227&r2=394228&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-html/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/parse-html/plugin.xml Fri Apr 14 16:57:24 2006
@@ -1,4 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
+
<plugin
id="parse-html"
name="Html Parse Plug-in"
@@ -22,9 +23,10 @@
point="org.apache.nutch.parse.Parser">
<implementation id="org.apache.nutch.parse.html.HtmlParser"
- class="org.apache.nutch.parse.html.HtmlParser"
- contentType="text/html"
- pathSuffix=""/>
+ class="org.apache.nutch.parse.html.HtmlParser">
+ <parameter name="contentType" value="text/html"/>
+ <parameter name="pathSuffix" value=""/>
+ </implementation>
</extension>
Modified: lucene/nutch/trunk/src/plugin/parse-js/plugin.xml
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-js/plugin.xml?rev=394228&r1=394227&r2=394228&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-js/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/parse-js/plugin.xml Fri Apr 14 16:57:24 2006
@@ -1,4 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
+
<plugin
id="parse-js"
name="JavaScript Parser"
@@ -19,17 +20,19 @@
name="JS Parser"
point="org.apache.nutch.parse.Parser">
<implementation id="JSParser"
- class="org.apache.nutch.parse.js.JSParseFilter"
- contentType="application/x-javascript"
- pathSuffix="js"/>
+ class="org.apache.nutch.parse.js.JSParseFilter">
+ <parameter name="contentType" value="application/x-javascript"/>
+ <parameter name="pathSuffix" value="js"/>
+ </implementation>
</extension>
<extension id="org.apache.nutch.parse.js.JSParseFilter"
name="Parse JS Filter"
point="org.apache.nutch.parse.HtmlParseFilter">
<implementation id="JSParseFilter"
- class="org.apache.nutch.parse.js.JSParseFilter"
- contentType="application/x-javascript"
- pathSuffix=""/>
+ class="org.apache.nutch.parse.js.JSParseFilter">
+ <parameter name="contentType" value="application/x-javascript"/>
+ <parameter name="pathSuffix" value=""/>
+ </implementation>
</extension>
</plugin>
Modified: lucene/nutch/trunk/src/plugin/parse-mp3/plugin.xml
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-mp3/plugin.xml?rev=394228&r1=394227&r2=394228&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-mp3/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/parse-mp3/plugin.xml Fri Apr 14 16:57:24 2006
@@ -1,4 +1,5 @@
-<?xml version = '1.0' encoding = 'UTF-8'?>
+<?xml version="1.0" encoding="UTF-8"?>
+
<plugin
version="1.0.0"
provider-name="nutch.org"
@@ -21,9 +22,10 @@
name="MP3Parse">
<implementation class="org.apache.nutch.parse.mp3.MP3Parser"
- pathSuffix="mp3"
- id="org.apache.nutch.parse.mp3.MP3Parser"
- contentType="audio/mpeg"/>
+ id="org.apache.nutch.parse.mp3.MP3Parser">
+ <parameter name="pathSuffix" value="mp3"/>
+ <parameter name="contentType" value="audio/mpeg"/>
+ </implementation>
</extension>
</plugin>
Modified: lucene/nutch/trunk/src/plugin/parse-msexcel/plugin.xml
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-msexcel/plugin.xml?rev=394228&r1=394227&r2=394228&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-msexcel/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/parse-msexcel/plugin.xml Fri Apr 14 16:57:24
2006
@@ -1,4 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
+
<plugin
id="parse-msexcel"
name="MSExcel Parse Plug-in"
@@ -22,9 +23,10 @@
point="org.apache.nutch.parse.Parser">
<implementation id="org.apache.nutch.parse.msexcel.MSExcelParser"
- class="org.apache.nutch.parse.msexcel.MSExcelParser"
- contentType="application/vnd.ms-excel"
- pathSuffix="xls"/>
+ class="org.apache.nutch.parse.msexcel.MSExcelParser">
+ <parameter name="contentType" value="application/vnd.ms-excel"/>
+ <parameter name="pathSuffix" value="xls"/>
+ </implementation>
</extension>
</plugin>
Modified: lucene/nutch/trunk/src/plugin/parse-mspowerpoint/plugin.xml
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-mspowerpoint/plugin.xml?rev=394228&r1=394227&r2=394228&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-mspowerpoint/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/parse-mspowerpoint/plugin.xml Fri Apr 14
16:57:24 2006
@@ -1,4 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
+
<plugin
id="parse-mspowerpoint"
name="MSPowerPoint Parse Plug-in"
@@ -21,9 +22,10 @@
name="MSPowerPointParse"
point="org.apache.nutch.parse.Parser">
<implementation
id="org.apache.nutch.parse.mspowerpoint.MSPowerPointParser"
-
class="org.apache.nutch.parse.mspowerpoint.MSPowerPointParser"
- contentType="application/vnd.ms-powerpoint"
- pathSuffix=""/>
+
class="org.apache.nutch.parse.mspowerpoint.MSPowerPointParser">
+ <parameter name="contentType" value="application/vnd.ms-powerpoint"/>
+ <parameter name="pathSuffix" value=""/>
+ </implementation>
</extension>
</plugin>
Modified: lucene/nutch/trunk/src/plugin/parse-msword/plugin.xml
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-msword/plugin.xml?rev=394228&r1=394227&r2=394228&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-msword/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/parse-msword/plugin.xml Fri Apr 14 16:57:24
2006
@@ -1,4 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
+
<plugin
id="parse-msword"
name="MSWord Parse Plug-in"
@@ -22,10 +23,10 @@
point="org.apache.nutch.parse.Parser">
<implementation id="org.apache.nutch.parse.msword.MSWordParser"
- class="org.apache.nutch.parse.msword.MSWordParser"
- contentType="application/msword"
- pathSuffix=""/>
-
+ class="org.apache.nutch.parse.msword.MSWordParser">
+ <parameter name="contentType" value="application/msword"/>
+ <parameter name="pathSuffix" value=""/>
+ </implementation>
</extension>
</plugin>
Modified: lucene/nutch/trunk/src/plugin/parse-pdf/plugin.xml
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-pdf/plugin.xml?rev=394228&r1=394227&r2=394228&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-pdf/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/parse-pdf/plugin.xml Fri Apr 14 16:57:24 2006
@@ -1,4 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
+
<plugin
id="parse-pdf"
name="Pdf Parse Plug-in"
@@ -24,10 +25,10 @@
point="org.apache.nutch.parse.Parser">
<implementation id="org.apache.nutch.parse.pdf.PdfParser"
- class="org.apache.nutch.parse.pdf.PdfParser"
- contentType="application/pdf"
- pathSuffix=""/>
-
+ class="org.apache.nutch.parse.pdf.PdfParser">
+ <parameter name="contentType" value="application/pdf"/>
+ <parameter name="pathSuffix" value=""/>
+ </implementation>
</extension>
</plugin>
Modified: lucene/nutch/trunk/src/plugin/parse-rss/plugin.xml
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-rss/plugin.xml?rev=394228&r1=394227&r2=394228&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-rss/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/parse-rss/plugin.xml Fri Apr 14 16:57:24 2006
@@ -1,4 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
+
<plugin
id="parse-rss"
name="RSS Parse Plug-in"
@@ -26,10 +27,10 @@
point="org.apache.nutch.parse.Parser">
<implementation id="org.apache.nutch.parse.rss.RSSParser"
- class="org.apache.nutch.parse.rss.RSSParser"
- contentType="application/rss+xml"
- pathSuffix="rss"/>
-
+ class="org.apache.nutch.parse.rss.RSSParser">
+ <parameter name="contentType" value="application/rss+xml"/>
+ <parameter name="pathSuffix" value="rss"/>
+ </implementation>
</extension>
</plugin>
Modified: lucene/nutch/trunk/src/plugin/parse-rtf/plugin.xml
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-rtf/plugin.xml?rev=394228&r1=394227&r2=394228&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-rtf/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/parse-rtf/plugin.xml Fri Apr 14 16:57:24 2006
@@ -1,4 +1,5 @@
-<?xml version = '1.0' encoding = 'UTF-8'?>
+<?xml version="1.0" encoding="UTF-8"?>
+
<plugin
version="1.0.0"
provider-name="nutch.org"
@@ -20,8 +21,10 @@
id="org.apache.nutch.parse.rtf"
name="RTFParse">
<implementation class="org.apache.nutch.parse.rtf.RTFParseFactory"
- pathSuffix="rtf"
id="org.apache.nutch.parse.rtf.RTFParseFactory"
- contentType="application/rtf"/>
+ id="org.apache.nutch.parse.rtf.RTFParseFactory">
+ <parameter name="pathSuffix" value="rtf"/>
+ <parameter name="contentType" value="application/rtf"/>
+ </implementation>
</extension>
</plugin>
Modified: lucene/nutch/trunk/src/plugin/parse-swf/plugin.xml
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-swf/plugin.xml?rev=394228&r1=394227&r2=394228&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-swf/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/parse-swf/plugin.xml Fri Apr 14 16:57:24 2006
@@ -1,4 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
+
<plugin
id="parse-swf"
name="SWF Parse Plug-in"
@@ -18,9 +19,11 @@
point="org.apache.nutch.parse.Parser">
<implementation id="org.apache.nutch.parse.swf.SWFParser"
- class="org.apache.nutch.parse.swf.SWFParser"
- contentType="application/x-shockwave-flash"
- pathSuffix="swf"/>
+ class="org.apache.nutch.parse.swf.SWFParser">
+ <parameter name="contentType" value="application/x-shockwave-flash"/>
+ <parameter name="pathSuffix" value="swf"/>
+ </implementation>
+
</extension>
</plugin>
Modified: lucene/nutch/trunk/src/plugin/parse-text/plugin.xml
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-text/plugin.xml?rev=394228&r1=394227&r2=394228&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-text/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/parse-text/plugin.xml Fri Apr 14 16:57:24 2006
@@ -1,4 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
+
<plugin
id="parse-text"
name="Text Parse Plug-in"
@@ -21,9 +22,10 @@
point="org.apache.nutch.parse.Parser">
<implementation id="org.apache.nutch.parse.text.TextParser"
- class="org.apache.nutch.parse.text.TextParser"
- contentType="text/plain"
- pathSuffix="txt"/>
+ class="org.apache.nutch.parse.text.TextParser">
+ <parameter name="contentType" value="text/plain"/>
+ <parameter name="pathSuffix" value="txt"/>
+ </implementation>
</extension>
Modified: lucene/nutch/trunk/src/plugin/parse-zip/plugin.xml
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-zip/plugin.xml?rev=394228&r1=394227&r2=394228&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-zip/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/parse-zip/plugin.xml Fri Apr 14 16:57:24 2006
@@ -1,4 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
+
<plugin
id="parse-zip"
name="Zip Parse Plug-in"
@@ -20,9 +21,11 @@
point="org.apache.nutch.parse.Parser">
<implementation id="org.apache.nutch.parse.zip.ZipParser"
- class="org.apache.nutch.parse.zip.ZipParser"
- contentType="application/zip"
- pathSuffix="zip"/>
+ class="org.apache.nutch.parse.zip.ZipParser">
+ <parameter name="contentType" value="application/zip"/>
+ <parameter name="pathSuffix" value="zip"/>
+ </implementation>
+
</extension>
</plugin>
Added: lucene/nutch/trunk/src/plugin/plugin.dtd
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/plugin.dtd?rev=394228&view=auto
==============================================================================
--- lucene/nutch/trunk/src/plugin/plugin.dtd (added)
+++ lucene/nutch/trunk/src/plugin/plugin.dtd Fri Apr 14 16:57:24 2006
@@ -0,0 +1,190 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+ ! Document : plugin.dtd
+ ! Created on : 14 avril 2006, 22:14
+ ! Author : Jerome Charron
+ ! Description: Nutch plug-in manifest
+ !
+ ! PUBLIC ID : -//Apache Software Fundation//DTD Nutch Plugin Manifest
1.0//EN
+ ! SYSTEM ID : http://lucene.apache.org/nutch/plugin.dtd
+-->
+
+
+
+<!--
+ ! The <plugin> element defines the body of the manifest.
+ ! It optionally contains definitions for the plug-in runtime,
+ ! definitions of other plug-ins required by this one,
+ ! declarations of any new extension points being introduced by the plug-in,
+ ! as well as configuration of functional extensions
+ ! (configured into extension points defined by other plug-ins,
+ ! or introduced by this plug-in).
+ !-->
+<!ELEMENT plugin (runtime?, requires?, extension-point*, extension*)>
+
+<!-- A user displayable name for the plug-in -->
+<!ATTLIST plugin name CDATA #REQUIRED>
+
+<!--
+ ! A unique identifier for the plug-in.
+ ! To minimize potential for naming collisions,
+ ! the identifier should be derived from the internet domain id
+ ! of the supplying provider (reversing the domain name tokens and
+ ! appending additional name tokens separated by dot [.]).
+ ! For example, provider nutch.org could define plug-in identifier
+ ! org.nutch.myplugin
+ !-->
+<!ATTLIST plugin id CDATA #REQUIRED>
+
+<!--
+ ! The plug-in version number.
+ ! NOTE : Version numbers compatibility are not yet implemented.
+ !-->
+<!ATTLIST plugin version CDATA #REQUIRED>
+
+<!-- The user-displayable name of the provider supplying the plug-in. -->
+<!ATTLIST plugin provider-name CDATA #IMPLIED>
+
+<!--
+ ! The name of the plug-in class for this plug-in.
+ ! The class must be a subclass of org.apache.nutch.plugin.Plugin
+ !-->
+<!ATTLIST plugin class CDATA #IMPLIED>
+
+
+<!--
+ ! The <requires> section of the manifest declares
+ ! any dependencies on other plug-ins.
+ !-->
+<!ELEMENT requires (import+)>
+
+
+<!-- Each dependency is specified using an <import> element. -->
+<!ELEMENT import EMPTY>
+
+<!-- The identifier of the required plug-in. -->
+<!ATTLIST import plugin CDATA #REQUIRED>
+
+
+<!--
+ ! The <runtime> section of the manifest contains a definition of one or more
+ ! libraries that make up the plug-in runtime.
+ ! The referenced libraries are used by the plugin execution mechanisms
+ ! (the plug-in class loader) to load and execute the correct code required by
+ ! the plug-in.
+ !-->
+<!ELEMENT runtime (library+)>
+
+
+<!--
+ !The <library> elements collectively define the plug-in runtime.
+ ! At least one <library> must be specified.
+ !-->
+<!ELEMENT library (export*)>
+
+<!--
+ ! A string reference to a library file or directory containing classes
+ ! (relative to the plug-in install directory).
+ ! Directory references must contain trailing file separator.
+ !-->
+<!ATTLIST library name CDATA #REQUIRED>
+
+
+<!--
+ ! Each <library> element can specify which portion
+ ! of the library should be exported.
+ ! The export rules are specified as a set of export masks.
+ ! By default (no export rules specified),
+ ! the library is considered to be private.
+ ! Each export mask is specified using the name attribute.
+ !-->
+<!ELEMENT export EMPTY>
+
+<!--
+ ! The export mask can have the following values:
+ ! * - indicates all contents of library are exported (public)
+ ! package.name.* - indicates all classes in the specified package
+ ! are exported. The matching rules are the same as in the
+ ! Java import statement.
+ ! package.name.ClassName - fully qualified java class name
+ !
+ ! NOTE : export mask is not yet implemented in Nutch.
+ !-->
+<!ATTLIST export name CDATA #REQUIRED>
+
+
+<!--
+ ! Nutch's architecture is based on the notion of configurable extension
points.
+ ! Nutch itself predefines a set of extension points that cover the task of
+ ! extending it (for example, adding parser, indexing filter, ...).
+ ! In addition to the predefined extension points, each supplied plug-in can
+ ! declare additional extension points. By declaring an extension point the
+ ! plug-in is essentially advertising the ability to configure the plug-in
+ ! function with externally supplied extensions.
+ !-->
+<!ELEMENT extension-point EMPTY>
+
+<!-- A user-displayable name for the extension point. -->
+<!ATTLIST extension-point name CDATA #REQUIRED>
+
+<!-- A simple id, unique within this plug-in -->
+<!ATTLIST extension-point id CDATA #REQUIRED>
+
+
+<!--
+ ! Actual extensions are configured into extension points
+ ! (predefined, or newly declared in this plug-in) in the <extension> section.
+ !
+ ! The configuration information is specified by at least one implementation
+ ! with some parameters.
+ !-->
+<!ELEMENT extension (implementation+)>
+
+<!--
+ ! A reference to an extension point being configured.
+ ! The extension point can be one defined in this plug-in or another plug-in.
+ !-->
+<!ATTLIST extension point CDATA #REQUIRED>
+
+<!--
+ ! Optional identifier for this extension point configuration instance.
+ ! This is used by extension points that need to uniquely identify
+ ! (rather than just enumerate) the specific configured extensions.
+ ! The identifier is specified as a simple token unique within the definition
+ ! of the declaring plug-in. When used globally, the extension identifier
+ ! is qualified by the plug-in identifier.
+ ! FIXME : Seems it is never read in the code.
+ !-->
+<!ATTLIST extension id CDATA #IMPLIED>
+
+<!--
+ ! A user-displayable name for the extension.
+ ! FIXME : Seems it is never read in the code.
+ !-->
+<!ATTLIST extension name CDATA #IMPLIED>
+
+
+<!--
+ ! Defines a specific implementation for the extension.
+ ! This implementation can define some special name/value parameters
+ ! used at runtime.
+ !-->
+<!ELEMENT implementation (parameter*)>
+
+<!-- A unique identifier for this implementation -->
+<!ATTLIST implementation id CDATA #REQUIRED>
+
+<!-- The fully-qualified Java Class that implements this extension-point -->
+<!ATTLIST implementation class CDATA #REQUIRED>
+
+
+<!-- Defines a name/value parameter -->
+<!ELEMENT parameter EMPTY>
+
+<!-- The parameter's name (should be unique for an extension) -->
+<!ATTLIST parameter name CDATA #REQUIRED>
+
+<!-- The parameter's value -->
+<!ATTLIST parameter value CDATA #REQUIRED>
+
Propchange: lucene/nutch/trunk/src/plugin/plugin.dtd
------------------------------------------------------------------------------
svn:eol-style = native
Modified: lucene/nutch/trunk/src/plugin/protocol-file/plugin.xml
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/protocol-file/plugin.xml?rev=394228&r1=394227&r2=394228&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/protocol-file/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/protocol-file/plugin.xml Fri Apr 14 16:57:24
2006
@@ -1,4 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
+
<plugin
id="protocol-file"
name="File Protocol Plug-in"
@@ -21,8 +22,9 @@
point="org.apache.nutch.protocol.Protocol">
<implementation id="org.apache.nutch.protocol.file.File"
- class="org.apache.nutch.protocol.file.File"
- protocolName="file"/>
+ class="org.apache.nutch.protocol.file.File">
+ <parameter name="protocolName" value="file"/>
+ </implementation>
</extension>
Modified: lucene/nutch/trunk/src/plugin/protocol-ftp/plugin.xml
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/protocol-ftp/plugin.xml?rev=394228&r1=394227&r2=394228&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/protocol-ftp/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/protocol-ftp/plugin.xml Fri Apr 14 16:57:24
2006
@@ -1,4 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
+
<plugin
id="protocol-ftp"
name="Ftp Protocol Plug-in"
@@ -21,9 +22,10 @@
point="org.apache.nutch.protocol.Protocol">
<implementation id="org.apache.nutch.protocol.ftp.Ftp"
- class="org.apache.nutch.protocol.ftp.Ftp"
- protocolName="ftp"/>
-
+ class="org.apache.nutch.protocol.ftp.Ftp">
+ <parameter name="protocolName" value="ftp"/>
+ </implementation>
+
</extension>
</plugin>
Modified: lucene/nutch/trunk/src/plugin/protocol-http/plugin.xml
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/protocol-http/plugin.xml?rev=394228&r1=394227&r2=394228&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/protocol-http/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/protocol-http/plugin.xml Fri Apr 14 16:57:24
2006
@@ -1,4 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
+
<plugin
id="protocol-http"
name="Http Protocol Plug-in"
@@ -21,8 +22,9 @@
point="org.apache.nutch.protocol.Protocol">
<implementation id="org.apache.nutch.protocol.http.Http"
- class="org.apache.nutch.protocol.http.Http"
- protocolName="http"/>
+ class="org.apache.nutch.protocol.http.Http">
+ <parameter name="protocolName" value="http"/>
+ </implementation>
</extension>
Modified: lucene/nutch/trunk/src/plugin/protocol-httpclient/plugin.xml
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/protocol-httpclient/plugin.xml?rev=394228&r1=394227&r2=394228&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/protocol-httpclient/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/protocol-httpclient/plugin.xml Fri Apr 14
16:57:24 2006
@@ -1,4 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
+
<plugin
id="protocol-httpclient"
name="Http / Https Protocol Plug-in"
@@ -23,8 +24,9 @@
point="org.apache.nutch.protocol.Protocol">
<implementation id="org.apache.nutch.protocol.httpclient.Http"
- class="org.apache.nutch.protocol.httpclient.Http"
- protocolName="http"/>
+ class="org.apache.nutch.protocol.httpclient.Http">
+ <parameter name="protocolName" value="http"/>
+ </implementation>
</extension>
@@ -33,8 +35,9 @@
point="org.apache.nutch.protocol.Protocol">
<implementation id="org.apache.nutch.protocol.httpclient.Http"
- class="org.apache.nutch.protocol.httpclient.Http"
- protocolName="https"/>
+ class="org.apache.nutch.protocol.httpclient.Http">
+ <parameter name="protocolName" value="https"/>
+ </implementation>
</extension>
Modified: lucene/nutch/trunk/src/plugin/query-basic/plugin.xml
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/query-basic/plugin.xml?rev=394228&r1=394227&r2=394228&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/query-basic/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/query-basic/plugin.xml Fri Apr 14 16:57:24
2006
@@ -1,4 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
+
<plugin
id="query-basic"
name="Basic Query Filter"
@@ -19,8 +20,10 @@
name="Nutch Basic Query Filter"
point="org.apache.nutch.searcher.QueryFilter">
<implementation id="BasicQueryFilter"
- class="org.apache.nutch.searcher.basic.BasicQueryFilter"
- fields="DEFAULT"/>
+ class="org.apache.nutch.searcher.basic.BasicQueryFilter">
+ <parameter name="fields" value="DEFAULT"/>
+ </implementation>
+
</extension>
</plugin>
Modified: lucene/nutch/trunk/src/plugin/query-more/plugin.xml
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/query-more/plugin.xml?rev=394228&r1=394227&r2=394228&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/query-more/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/query-more/plugin.xml Fri Apr 14 16:57:24 2006
@@ -1,4 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
+
<plugin
id="query-more"
name="More Query Filter"
@@ -19,16 +20,20 @@
name="Nutch More Query Filter"
point="org.apache.nutch.searcher.QueryFilter">
<implementation id="TypeQueryFilter"
- class="org.apache.nutch.searcher.more.TypeQueryFilter"
- raw-fields="type"/>
+ class="org.apache.nutch.searcher.more.TypeQueryFilter">
+ <parameter name="raw-fields" value="type"/>
+ </implementation>
+
</extension>
<extension id="org.apache.nutch.searcher.more"
name="Nutch More Query Filter"
point="org.apache.nutch.searcher.QueryFilter">
<implementation id="DateQueryFilter"
- class="org.apache.nutch.searcher.more.DateQueryFilter"
- raw-fields="date"/>
+ class="org.apache.nutch.searcher.more.DateQueryFilter">
+ <parameter name="raw-fields" value="date"/>
+ </implementation>
+
</extension>
</plugin>
Modified: lucene/nutch/trunk/src/plugin/query-site/plugin.xml
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/query-site/plugin.xml?rev=394228&r1=394227&r2=394228&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/query-site/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/query-site/plugin.xml Fri Apr 14 16:57:24 2006
@@ -1,4 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
+
<plugin
id="query-site"
name="Site Query Filter"
@@ -19,8 +20,10 @@
name="Nutch Site Query Filter"
point="org.apache.nutch.searcher.QueryFilter">
<implementation id="SiteQueryFilter"
- class="org.apache.nutch.searcher.site.SiteQueryFilter"
- raw-fields="site"/>
+ class="org.apache.nutch.searcher.site.SiteQueryFilter">
+ <parameter name="raw-fields" value="site"/>
+ </implementation>
+
</extension>
</plugin>
Modified: lucene/nutch/trunk/src/plugin/query-url/plugin.xml
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/query-url/plugin.xml?rev=394228&r1=394227&r2=394228&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/query-url/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/query-url/plugin.xml Fri Apr 14 16:57:24 2006
@@ -1,4 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
+
<plugin
id="query-url"
name="URL Query Filter"
@@ -19,8 +20,10 @@
name="Nutch URL Query Filter"
point="org.apache.nutch.searcher.QueryFilter">
<implementation id="URLQueryFilter"
- class="org.apache.nutch.searcher.url.URLQueryFilter"
- fields="url"/>
+ class="org.apache.nutch.searcher.url.URLQueryFilter">
+ <parameter name="fields" value="url"/>
+ </implementation>
+
</extension>
</plugin>
Modified: lucene/nutch/trunk/src/plugin/urlfilter-automaton/plugin.xml
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/urlfilter-automaton/plugin.xml?rev=394228&r1=394227&r2=394228&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/urlfilter-automaton/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/urlfilter-automaton/plugin.xml Fri Apr 14
16:57:24 2006
@@ -1,4 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
+
<plugin
id="urlfilter-automaton"
name="Automaton URL Filter"
Modified: lucene/nutch/trunk/src/plugin/urlfilter-prefix/plugin.xml
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/urlfilter-prefix/plugin.xml?rev=394228&r1=394227&r2=394228&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/urlfilter-prefix/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/urlfilter-prefix/plugin.xml Fri Apr 14
16:57:24 2006
@@ -1,4 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
+
<plugin
id="urlfilter-prefix"
name="Prefix URL Filter"
@@ -22,8 +23,9 @@
class="org.apache.nutch.urlfilter.prefix.PrefixURLFilter"/>
<!-- by default, attribute "file" is undefined, to keep classic behavior.
<implementation id="PrefixURLFilter"
- class="org.apache.nutch.net.PrefixURLFilter"
- file="urlfilter-prefix.txt"/>
+ class="org.apache.nutch.net.PrefixURLFilter">
+ <parameter name="file" value="urlfilter-prefix.txt"/>
+ </implementation>
-->
</extension>
Modified: lucene/nutch/trunk/src/plugin/urlfilter-regex/plugin.xml
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/urlfilter-regex/plugin.xml?rev=394228&r1=394227&r2=394228&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/urlfilter-regex/plugin.xml (original)
+++ lucene/nutch/trunk/src/plugin/urlfilter-regex/plugin.xml Fri Apr 14
16:57:24 2006
@@ -1,4 +1,5 @@
<?xml version="1.0" encoding="UTF-8"?>
+
<plugin
id="urlfilter-regex"
name="Regex URL Filter"
@@ -23,8 +24,9 @@
class="org.apache.nutch.urlfilter.regex.RegexURLFilter"/>
<!-- by default, attribute "file" is undefined, to keep classic behavior.
<implementation id="RegexURLFilter"
- class="org.apache.nutch.net.RegexURLFilter"
- file="urlfilter-regex.txt"/>
+ class="org.apache.nutch.net.RegexURLFilter">
+ <parameter name="file" value="urlfilter-regex.txt"/>
+ </implementation>
-->
</extension>
Modified:
lucene/nutch/trunk/src/test/org/apache/nutch/plugin/TestPluginSystem.java
URL:
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/test/org/apache/nutch/plugin/TestPluginSystem.java?rev=394228&r1=394227&r2=394228&view=diff
==============================================================================
--- lucene/nutch/trunk/src/test/org/apache/nutch/plugin/TestPluginSystem.java
(original)
+++ lucene/nutch/trunk/src/test/org/apache/nutch/plugin/TestPluginSystem.java
Fri Apr 14 16:57:24 2006
@@ -108,7 +108,7 @@
for (int i = 0; i < extension1.length; i++) {
Extension extension2 = extension1[i];
String string = extension2.getAttribute(getGetConfigElementName());
- assertEquals(string, getAttributeValue());
+ assertEquals(string, getParameterValue());
}
}
@@ -240,18 +240,31 @@
throws IOException {
FileWriter out = new FileWriter(pFolderPath + File.separator
+ "plugin.xml");
- String xml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><!--this is
just a simple plugin for testing issues.--><nutch-plugin
id=\"org.apache.nutch.plugin."
+ String xml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
+ + "<!--this is just a simple plugin for testing issues.-->"
+ + "<plugin id=\"org.apache.nutch.plugin."
+ i
+ "\" name=\""
+ i
- + "\" version=\"1.0\" provider-name=\"joa23\"
class=\"org.apache.nutch.plugin.SimpleTestPlugin\"><extension-point
id=\"aExtensioID\" name=\"simple Parser Extension\"
schema=\"schema/testExtensionPoint.exsd\"/><runtime><library
name=\"libs/exported.jar\"><extport/></library><library
name=\"libs/not_exported.jar\"/></runtime><extension
point=\"aExtensioID\"><implementation name=\"simple Parser Extension\"
id=\"aExtensionId.\"
class=\"org.apache.nutch.plugin.HelloWorldExtension\"/></extension></nutch-plugin>";
+ + "\" version=\"1.0\" provider-name=\"joa23\" "
+ + "class=\"org.apache.nutch.plugin.SimpleTestPlugin\">"
+ + "<extension-point id=\"aExtensioID\" "
+ + "name=\"simple Parser Extension\" "
+ + "schema=\"schema/testExtensionPoint.exsd\"/>"
+ + "<runtime><library
name=\"libs/exported.jar\"><extport/></library>"
+ + "<library name=\"libs/not_exported.jar\"/></runtime>"
+ + "<extension point=\"aExtensioID\">"
+ + "<implementation name=\"simple Parser Extension\" "
+ + "id=\"aExtensionId.\"
class=\"org.apache.nutch.plugin.HelloWorldExtension\">"
+ + "<parameter name=\"dummy-name\" value=\"a simple param
value\"/>"
+ + "</implementation></extension></plugin>";
out.write(xml);
out.flush();
out.close();
}
- private String getAttributeValue() {
- return "simple Parser Extension";
+ private String getParameterValue() {
+ return "a simple param value";
}
private static String getGetExtensionId() {
@@ -259,7 +272,7 @@
}
private static String getGetConfigElementName() {
- return "name";
+ return "dummy-name";
}
public static void main(String[] args) throws IOException {