when I use Nutch1.2,it alwayls occurs the following error:
dtree.js: failed(2,0): Can't retrieve Tika parser for mime-type
text/javascript
main.js: failed(2,0): Can't retrieve Tika parser for mime-type
text/javascript
Progress.js: failed(2,0): Can't retrieve Tika parser for mime-type
text/javascript
my parse-plugins.xml is:
<mimeType name="text/html">
<plugin id="parse-html" />
</mimeType>
<mimeType name="application/xhtml+xml">
<plugin id="parse-html" />
</mimeType>
<mimeType name="application/rss+xml">
<plugin id="parse-rss" />
<plugin id="feed" />
</mimeType>
<mimeType name="application/x-bzip2">
<plugin id="parse-zip" />
</mimeType>
<mimeType name="application/x-gzip">
<plugin id="parse-zip" />
</mimeType>
<mimeType name="application/x-javascript">
<plugin id="parse-js" />
</mimeType>
<mimeType name="application/x-shockwave-flash">
<plugin id="parse-swf" />
</mimeType>
<mimeType name="application/zip">
<plugin id="parse-zip" />
</mimeType>
<mimeType name="text/xml">
<plugin id="parse-html" />
<plugin id="parse-rss" />
<plugin id="feed" />
</mimeType>
<mimeType name="application/vnd.nutch.example.cat">
<plugin id="parse-ext" />
</mimeType>
<mimeType name="application/vnd.nutch.example.md5sum">
<plugin id="parse-ext" />
</mimeType>
<mimeType name="application/javascript">
<plugin id="parse-tika" />
</mimeType>
<mimeType name="text/javascript">
<plugin id="parse-tika" />
</mimeType>
<aliases>
<alias name="parse-tika"
extension-id="org.apache.nutch.parse.tika.Parser" />
<alias name="parse-ext" extension-id="ExtParser" />
<alias name="parse-html"
extension-id="org.apache.nutch.parse.html.HtmlParser" />
<alias name="parse-js" extension-id="JSParser" />
<alias name="parse-msexcel"
extension-id="org.apache.nutch.parse.msexcel.MSExcelParser" />
<alias name="parse-mspowerpoint"
extension-id="org.apache.nutch.parse.mspowerpoint.MSPowerPointParser" />
<alias name="parse-msword"
extension-id="org.apache.nutch.parse.msword.MSWordParser" />
<alias name="parse-oo"
extension-id="org.apache.nutch.parse.oo.OpenDocument.Text" />
<alias name="parse-pdf"
extension-id="org.apache.nutch.parse.pdf.PdfParser" />
<alias name="parse-rss"
extension-id="org.apache.nutch.parse.rss.RSSParser" />
<alias name="feed"
extension-id="org.apache.nutch.parse.feed.FeedParser" />
<alias name="parse-swf"
extension-id="org.apache.nutch.parse.swf.SWFParser" />
<alias name="parse-text"
extension-id="org.apache.nutch.parse.text.TextParser" />
<alias name="parse-zip"
extension-id="org.apache.nutch.parse.zip.ZipParser" />
</aliases>
and nutch-site.xml is:
<property>
<name>plugin.includes</name>
<value>protocol-httpclient|urlfilter-regex|parse-(text|html|js|tika)|index-(basic|anchor)|query-(basic|site|url)|response-(json|xml)|summary-basic|scoring-opic|urlnormalizer-(pass|regex|basic)</value>
</property>
Who can help me ?
--
View this message in context:
http://lucene.472066.n3.nabble.com/Can-t-retrieve-Tika-parser-for-mime-type-text-javascript-tp3983599.html
Sent from the Nutch - User mailing list archive at Nabble.com.