You're the third in a row with this error. Did we introduce something strange 
in 1.3? Anyhow, here's a patch to work around the problem:

https://issues.apache.org/jira/browse/NUTCH-1016

> Hello,
> 
> Finally I have finished crawling yesterday at midnight, and wanted to
> index to solr.
> 
> So I have used the command:
> 
> bin/nutch solrindex http://localhost:8983/solr /home/crawl/crawldb
> /home/crawl/linkdb -dir /home/crawl/segments
> 
> and next morning unfortunately the indexing was failed. I guess this
> is solr specific. And I also think that one of my crawled page
> contains illegal characters or something.
> 
> Here is the log excerpt from hadoop.log:
> 
> Any ideas / help / recomendation greatly appreciated.
> Best Regards.
> 
> org.apache.solr.common.SolrException: [was class
> java.io.CharConversionException] Invalid UTF-8 character 0xfffe at
> char #1387083, byte #1464613)  java.lang.RuntimeException: [was class
> java.io.CharConversionException] Invalid UTF-8 character 0xfffe at
> char #1387083, byte #1464613)   at
> com.ctc.wstx.util.ExceptionUtil.throwRuntimeException(ExceptionUtil.java:18
> ) at com.ctc.wstx.sr.StreamScanner.throwLazyError(StreamScanner.java:731)
> at
> com.ctc.wstx.sr.BasicStreamReader.safeFinishToken(BasicStreamReader.java:3
> 657) at
> com.ctc.wstx.sr.BasicStreamReader.getText(BasicStreamReader.java:809) at
> org.apache.solr.handler.XMLLoader.readDoc(XMLLoader.java:287) at
> org.apache.solr.handler.XMLLoader.processUpdate(XMLLoader.java:146) at
> org.apache.solr.handler.XMLLoader.load(XMLLoader.java:77)
> at
> org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(Content
> StreamHandlerBase.java:67) at
> org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBas
> e.java:129) at org.apache.solr.core.SolrCore.execute(SolrCore.java:1368)   
> at
> org.apache.solr.servlet.SolrDispatchFilter.execute(SolrDispatchFilter.java
> :356) at
> org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.jav
> a:252) at
> org.mortbay.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandl
> er.java:1212) at
> org.mortbay.jetty.servlet.ServletHandler.handle(ServletHandler.java:399)
> at
> org.mortbay.jetty.security.SecurityHandler.handle(SecurityHandler.java:216
> ) at
> org.mortbay.jetty.servlet.SessionHandler.handle(SessionHandler.java:182)
> at
> org.mortbay.jetty.handler.ContextHandler.handle(ContextHandler.java:766)
> at org.mortbay.jetty.webapp.WebAppContext.handle(WebAppContext.java:450)
> at
> org.mortbay.jetty.handler.ContextHandlerCollection.handle(ContextHandlerCo
> llection.java:230) at
> org.mortbay.jetty.handler.HandlerCollection.handle(HandlerCollection.java:
> 114) at
> org.mortbay.jetty.handler.HandlerWrapper.handle(HandlerWrapper.java:152)
> at org.mortbay.jetty.Server.handle(Server.java:326)     at
> org.mortbay.jetty.HttpConnection.handleRequest(HttpConnection.java:542)
>      at
> 
> [was class java.io.CharConversionException] Invalid UTF-8 character
> 0xfffe at char #1387083, byte #1464613)  java.lang.RuntimeException:
> [was class java.io.CharConversionException] Invalid UTF-8 character
> 0xfffe at char #1387083, byte #1464613)         at
> com.ctc.wstx.util.ExceptionUtil.throwRuntimeException(ExceptionUtil.java:18
> ) at com.ctc.wstx.sr.StreamScanner.throwLazyError(StreamScanner.java:731)
> at
> com.ctc.wstx.sr.BasicStreamReader.safeFinishToken(BasicStreamReader.java:3
> 657) at
> com.ctc.wstx.sr.BasicStreamReader.getText(BasicStreamReader.java:809) at
> org.apache.solr.handler.XMLLoader.readDoc(XMLLoader.java:287) at
> org.apache.solr.handler.XMLLoader.processUpdate(XMLLoader.java:146) at
> org.apache.solr.handler.XMLLoader.load(XMLLoader.java:77)
>         at
> org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(Content
> StreamHandlerBase.java:67) at
> org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBas
> e.java:129) at org.apache.solr.core.SolrCore.execute(SolrCore.java:1368)   
> at
> org.apache.solr.servlet.SolrDispatchFilter.execute(SolrDispatchFilter.java
> :356) at
> org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.jav
> a:252) at
> org.mortbay.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandl
> er.java:1212) at
> org.mortbay.jetty.servlet.ServletHandler.handle(ServletHandler.java:399)
> at
> org.mortbay.jetty.security.SecurityHandler.handle(SecurityHandler.java:216
> ) at
> org.mortbay.jetty.servlet.SessionHandler.handle(SessionHandler.java:182)
> at
> org.mortbay.jetty.handler.ContextHandler.handle(ContextHandler.java:766)
> at org.mortbay.jetty.webapp.WebAppContext.handle(WebAppContext.java:450)
> at
> org.mortbay.jetty.handler.ContextHandlerCollection.handle(ContextHandlerCo
> llection.java:230) at
> org.mortbay.jetty.handler.HandlerCollection.handle(HandlerCollection.java:
> 114) at
> org.mortbay.jetty.handler.HandlerWrapper.handle(HandlerWrapper.java:152)
> at org.mortbay.jetty.Server.handle(Server.java:326)     at
> org.mortbay.jetty.HttpConnection.handleRequest(HttpConnection.java:542)
>      at
> 
> request: http://localhost:8983/solr/update?wt=javabin&version=2
>         at
> org.apache.solr.client.solrj.impl.CommonsHttpSolrServer.request(CommonsHtt
> pSolrServer.java:436) at
> org.apache.solr.client.solrj.impl.CommonsHttpSolrServer.request(CommonsHtt
> pSolrServer.java:245) at
> org.apache.solr.client.solrj.request.AbstractUpdateRequest.process(Abstrac
> tUpdateRequest.java:105) at
> org.apache.solr.client.solrj.SolrServer.add(SolrServer.java:49) at
> org.apache.nutch.indexer.solr.SolrWriter.write(SolrWriter.java:71) at
> org.apache.nutch.indexer.IndexerOutputFormat$1.write(IndexerOutputFormat.j
> ava:54) at
> org.apache.nutch.indexer.IndexerOutputFormat$1.write(IndexerOutputFormat.j
> ava:44) at
> org.apache.hadoop.mapred.ReduceTask$3.collect(ReduceTask.java:440) at
> org.apache.nutch.indexer.IndexerMapReduce.reduce(IndexerMapReduce.java:159
> ) at
> org.apache.nutch.indexer.IndexerMapReduce.reduce(IndexerMapReduce.java:50)
> at org.apache.hadoop.mapred.ReduceTask.runOldReducer(ReduceTask.java:463)
> at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:411) at
> org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:216)
> 2011-07-08 00:50:53,508 ERROR solr.SolrIndexer - java.io.IOException: Job
> failed!

Reply via email to