Hi All
I am using nutch 2.3.1+gora+mongoDB .I have created a plugin(for testing only) named "parse-boiler" which set the value of "text" field as "HELLO PLUGIN" .While updating the conf/parse-plugin.xml ,It is observed that while putting the line *"<plugin id="parse-boiler" />*" before the tika-parser i.e. <plugin id="parse-tika", plugin works although the value of "text" field is updated by default tika later.

while putting the line *"<plugin id="parse-boiler" />*" after the tika-parser i.e.
 <plugin id="parse-tika", plugin does not work.

I am giving here the snapshot of parse-plugin in both cases.

Case 1) When plugin works


<parse-plugins>

    <mimeType name="*">
      <plugin id="parse-tika" />
    </mimeType>

    <mimeType name="text/html">
        <plugin id="parse-html" />
    </mimeType>

        <mimeType name="application/xhtml+xml">
        <plugin id="parse-html" />
    </mimeType>

    <mimeType name="application/rss+xml">
*    <plugin id="parse-boiler" /> **
**   <plugin id="parse-tika" />*
    </mimeType>

    <mimeType name="application/x-bzip2">
        <!--  try and parse it with the zip parser -->
        <plugin id="parse-zip" />
    </mimeType>

    <mimeType name="application/x-gzip">
        <!--  try and parse it with the zip parser -->
        <plugin id="parse-zip" />
    </mimeType>

    <mimeType name="application/x-javascript">
        <plugin id="parse-js" />
    </mimeType>

    <mimeType name="application/x-shockwave-flash">
        <plugin id="parse-swf" />
    </mimeType>

    <mimeType name="application/zip">
        <plugin id="parse-zip" />
    </mimeType>

    <mimeType name="text/xml">
  <plugin id="parse-boiler" />
        <plugin id="parse-tika" />
          </mimeType>


<!-- Types for parse-ext plugin: required for unit tests to pass. -->

    <mimeType name="application/vnd.nutch.example.cat">
        <plugin id="parse-ext" />
    </mimeType>

    <mimeType name="application/vnd.nutch.example.md5sum">
        <plugin id="parse-ext" />
    </mimeType>

<!-- alias mappings for parse-xxx names to the actual extension implementation
    ids described in each plugin's plugin.xml file -->
    <aliases>
        <alias name="parse-html"
            extension-id="org.apache.nutch.parse.html.HtmlParser" />
        <alias name="parse-tika"
            extension-id="org.apache.nutch.parse.tika.TikaParser" />
        <alias name="parse-ext" extension-id="ExtParser" />
        <alias name="parse-js" extension-id="JSParser" />
        <alias name="feed"
            extension-id="org.apache.nutch.parse.feed.FeedParser" />
        <alias name="parse-swf"
            extension-id="org.apache.nutch.parse.swf.SWFParser" />
        <alias name="parse-zip"
            extension-id="org.apache.nutch.parse.zip.ZipParser" />
        <alias name="parse-boiler"
extension-id="org.apache.nutch.parse.boiler.BoilerPlateParser" />
    </aliases>

</parse-plugins>





Case 2: When plugin does not works
<parse-plugins>

    <mimeType name="*">
      <plugin id="parse-tika" />
    </mimeType>

    <mimeType name="text/html">
        <plugin id="parse-html" />
    </mimeType>

        <mimeType name="application/xhtml+xml">
        <plugin id="parse-html" />
    </mimeType>

    <mimeType name="application/rss+xml">
*    <plugin id="parse-boiler" /> **
**   <plugin id="parse-tika" />*
    </mimeType>

    <mimeType name="application/x-bzip2">
        <!--  try and parse it with the zip parser -->
        <plugin id="parse-zip" />
    </mimeType>

    <mimeType name="application/x-gzip">
        <!--  try and parse it with the zip parser -->
        <plugin id="parse-zip" />
    </mimeType>

    <mimeType name="application/x-javascript">
        <plugin id="parse-js" />
    </mimeType>

    <mimeType name="application/x-shockwave-flash">
        <plugin id="parse-swf" />
    </mimeType>

    <mimeType name="application/zip">
        <plugin id="parse-zip" />
    </mimeType>

    <mimeType name="text/xml">
*    <plugin id="parse-tika" />**
**  <plugin id="parse-boiler" />**
***
          </mimeType>


<!-- Types for parse-ext plugin: required for unit tests to pass. -->

    <mimeType name="application/vnd.nutch.example.cat">
        <plugin id="parse-ext" />
    </mimeType>

    <mimeType name="application/vnd.nutch.example.md5sum">
        <plugin id="parse-ext" />
    </mimeType>

<!-- alias mappings for parse-xxx names to the actual extension implementation
    ids described in each plugin's plugin.xml file -->
    <aliases>
        <alias name="parse-html"
            extension-id="org.apache.nutch.parse.html.HtmlParser" />
        <alias name="parse-tika"
            extension-id="org.apache.nutch.parse.tika.TikaParser" />
        <alias name="parse-ext" extension-id="ExtParser" />
        <alias name="parse-js" extension-id="JSParser" />
        <alias name="feed"
            extension-id="org.apache.nutch.parse.feed.FeedParser" />
        <alias name="parse-swf"
            extension-id="org.apache.nutch.parse.swf.SWFParser" />
        <alias name="parse-zip"
            extension-id="org.apache.nutch.parse.zip.ZipParser" />
        <alias name="parse-boiler"
extension-id="org.apache.nutch.parse.boiler.BoilerPlateParser" />
    </aliases>

</parse-plugins>






Why this happen when changing the oder of plugins. Is something missing???

Thanks
Harsh


Reply via email to