when I use Nutch1.2,it alwayls occurs the following error:
dtree.js: failed(2,0): Can't retrieve Tika parser for mime-type
text/javascript
main.js: failed(2,0): Can't retrieve Tika parser for mime-type
text/javascript
Progress.js: failed(2,0): Can't retrieve Tika parser for mime-type
text/javascript

my parse-plugins.xml is:
<mimeType name="text/html">
                <plugin id="parse-html" />
        </mimeType>

        <mimeType name="application/xhtml+xml">
                <plugin id="parse-html" />
        </mimeType>

        <mimeType name="application/rss+xml">
            <plugin id="parse-rss" />
            <plugin id="feed" />
        </mimeType>

        <mimeType name="application/x-bzip2">
                
                <plugin id="parse-zip" />
        </mimeType>

        <mimeType name="application/x-gzip">
                
                <plugin id="parse-zip" />
        </mimeType>

        <mimeType name="application/x-javascript">
                <plugin id="parse-js" />
        </mimeType>

        <mimeType name="application/x-shockwave-flash">
                <plugin id="parse-swf" />
        </mimeType>

        <mimeType name="application/zip">
                <plugin id="parse-zip" />
        </mimeType>

        <mimeType name="text/xml">
                <plugin id="parse-html" />
                <plugin id="parse-rss" />
        <plugin id="feed" />
        </mimeType>

       

        <mimeType name="application/vnd.nutch.example.cat">
                <plugin id="parse-ext" />
        </mimeType>

        <mimeType name="application/vnd.nutch.example.md5sum">
                <plugin id="parse-ext" />
        </mimeType>
        
        <mimeType name="application/javascript">
                <plugin id="parse-tika" />
        </mimeType>
        <mimeType name="text/javascript">
                <plugin id="parse-tika" />
        </mimeType>
        

        
        <aliases>
            <alias name="parse-tika" 
                extension-id="org.apache.nutch.parse.tika.Parser" />
                <alias name="parse-ext" extension-id="ExtParser" />
                <alias name="parse-html"
                        extension-id="org.apache.nutch.parse.html.HtmlParser" />
                <alias name="parse-js" extension-id="JSParser" />
                <alias name="parse-msexcel"
                        
extension-id="org.apache.nutch.parse.msexcel.MSExcelParser" />
                <alias name="parse-mspowerpoint"
                        
extension-id="org.apache.nutch.parse.mspowerpoint.MSPowerPointParser" />
                <alias name="parse-msword"
                        
extension-id="org.apache.nutch.parse.msword.MSWordParser" />
                <alias name="parse-oo"
                        
extension-id="org.apache.nutch.parse.oo.OpenDocument.Text" />
                <alias name="parse-pdf"
                        extension-id="org.apache.nutch.parse.pdf.PdfParser" />
                <alias name="parse-rss"
                        extension-id="org.apache.nutch.parse.rss.RSSParser" />
                <alias name="feed"
                        extension-id="org.apache.nutch.parse.feed.FeedParser" />
                <alias name="parse-swf"
                        extension-id="org.apache.nutch.parse.swf.SWFParser" />
                <alias name="parse-text"
                        extension-id="org.apache.nutch.parse.text.TextParser" />
                <alias name="parse-zip"
                        extension-id="org.apache.nutch.parse.zip.ZipParser" />
        </aliases>


and  nutch-site.xml is:
<property>
  <name>plugin.includes</name>
 
<value>protocol-httpclient|urlfilter-regex|parse-(text|html|js|tika)|index-(basic|anchor)|query-(basic|site|url)|response-(json|xml)|summary-basic|scoring-opic|urlnormalizer-(pass|regex|basic)</value>
 </property>



Who can help me ?

--
View this message in context: 
http://lucene.472066.n3.nabble.com/Can-t-retrieve-Tika-parser-for-mime-type-text-javascript-tp3983599.html
Sent from the Nutch - User mailing list archive at Nabble.com.

Reply via email to