I'm getting an error on some of the nodes in my solr cloud cluster under
heavy indexing load. Once the error happens, that node, just repeatedly
gets this error over and over and will no longer index documents until a
restart. I believe the root cause of the error is:
File /solr7.1.0/UNCLASS/core_node39/data/index/_4i92_Lucene50_0.pos
could only be replicated to 0 nodes instead of minReplication (=1).
There are 41 datanode(s) running and no node(s) are excluded in this
operation.
Which seems to be related to HDFS. Any ideas?
Thank you!
The error is:
org.apache.solr.common.SolrException: Exception writing document id
COLLECT266786891620 to the index; possible analysis error.
at
org.apache.solr.update.DirectUpdateHandler2.addDoc(DirectUpdateHandler2.java:233)
at
org.apache.solr.update.processor.RunUpdateProcessor.processAdd(RunUpdateProcessorFactory.java:67)
at
org.apache.solr.update.processor.UpdateRequestProcessor.processAdd(UpdateRequestProcessor.java:55)
at
org.apache.solr.update.processor.DistributedUpdateProcessor.doLocalAdd(DistributedUpdateProcessor.java:950)
at
org.apache.solr.update.processor.DistributedUpdateProcessor.versionAdd(DistributedUpdateProcessor.java:1163)
at
org.apache.solr.update.processor.DistributedUpdateProcessor.processAdd(DistributedUpdateProcessor.java:633)
at
org.apache.solr.update.processor.LogUpdateProcessorFactory$LogUpdateProcessor.processAdd(LogUpdateProcessorFactory.java:103)
at
org.apache.solr.handler.loader.JavabinLoader$1.update(JavabinLoader.java:98)
at
org.apache.solr.client.solrj.request.JavaBinUpdateRequestCodec$1.readOuterMostDocIterator(JavaBinUpdateRequestCodec.java:188)
at
org.apache.solr.client.solrj.request.JavaBinUpdateRequestCodec$1.readIterator(JavaBinUpdateRequestCodec.java:144)
at
org.apache.solr.common.util.JavaBinCodec.readObject(JavaBinCodec.java:311)
at
org.apache.solr.common.util.JavaBinCodec.readVal(JavaBinCodec.java:256)
at
org.apache.solr.client.solrj.request.JavaBinUpdateRequestCodec$1.readNamedList(JavaBinUpdateRequestCodec.java:130)
at
org.apache.solr.common.util.JavaBinCodec.readObject(JavaBinCodec.java:276)
at
org.apache.solr.common.util.JavaBinCodec.readVal(JavaBinCodec.java:256)
at
org.apache.solr.common.util.JavaBinCodec.unmarshal(JavaBinCodec.java:178)
at
org.apache.solr.client.solrj.request.JavaBinUpdateRequestCodec.unmarshal(JavaBinUpdateRequestCodec.java:195)
at
org.apache.solr.handler.loader.JavabinLoader.parseAndLoadDocs(JavabinLoader.java:109)
at
org.apache.solr.handler.loader.JavabinLoader.load(JavabinLoader.java:55)
at
org.apache.solr.handler.UpdateRequestHandler$1.load(UpdateRequestHandler.java:97)
at
org.apache.solr.handler.ContentStreamHandlerBase.handleRequestBody(ContentStreamHandlerBase.java:68)
at
org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:195)
at org.apache.solr.core.SolrCore.execute(SolrCore.java:2503)
at org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:711)
at org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:517)
at
org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:384)
at
org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:330)
at
org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1629)
at
org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:533)
at
org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143)
at
org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548)
at
org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
at
org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:190)
at
org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:1595)
at
org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:188)
at
org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1253)
at
org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:168)
at
org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:473)
at
org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:1564)
at
org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:166)
at
org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1155)
at
org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141)
at
org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:219)
at
org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:126)
at
org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
at
org.eclipse.jetty.rewrite.handler.RewriteHandler.handle(RewriteHandler.java:335)
at
org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:132)
at org.eclipse.jetty.server.Server.handle(Server.java:530)
at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:347)
at
org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:256)
at
org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:279)
at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:102)
at org.eclipse.jetty.io.ChannelEndPoint$2.run(ChannelEndPoint.java:124)
at
org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.doProduce(EatWhatYouKill.java:247)
at
org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.produce(EatWhatYouKill.java:140)
at
org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.run(EatWhatYouKill.java:131)
at
org.eclipse.jetty.util.thread.ReservedThreadExecutor$ReservedThread.run(ReservedThreadExecutor.java:382)
at
org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:708)
at
org.eclipse.jetty.util.thread.QueuedThreadPool$2.run(QueuedThreadPool.java:626)
at java.lang.Thread.run(Thread.java:748)
Caused by: org.apache.lucene.store.AlreadyClosedException: this
IndexWriter is closed
at org.apache.lucene.index.IndexWriter.ensureOpen(IndexWriter.java:897)
at org.apache.lucene.index.IndexWriter.ensureOpen(IndexWriter.java:911)
at
org.apache.lucene.index.IndexWriter.updateDocument(IndexWriter.java:1725)
at
org.apache.solr.update.DirectUpdateHandler2.updateDocument(DirectUpdateHandler2.java:965)
at
org.apache.solr.update.DirectUpdateHandler2.updateDocOrDocValues(DirectUpdateHandler2.java:954)
at
org.apache.solr.update.DirectUpdateHandler2.doNormalUpdate(DirectUpdateHandler2.java:334)
at
org.apache.solr.update.DirectUpdateHandler2.addDoc0(DirectUpdateHandler2.java:271)
at
org.apache.solr.update.DirectUpdateHandler2.addDoc(DirectUpdateHandler2.java:221)
... 59 more
Caused by: org.apache.hadoop.ipc.RemoteException(java.io.IOException):
File /solr7.1.0/UNCLASS/core_node39/data/index/_4i92_Lucene50_0.pos
could only be replicated to 0 nodes instead of minReplication (=1).
There are 41 datanode(s) running and no node(s) are excluded in this
operation.
at
org.apache.hadoop.hdfs.server.blockmanagement.BlockManager.chooseTarget4NewBlock(BlockManager.java:1724)
at
org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getAdditionalBlock(FSNamesystem.java:3449)
at
org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.addBlock(NameNodeRpcServer.java:692)
at
org.apache.hadoop.hdfs.server.namenode.AuthorizationProviderProxyClientProtocol.addBlock(AuthorizationProviderProxyClientProtocol.java:217)
at
org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.addBlock(ClientNamenodeProtocolServerSideTranslatorPB.java:506)
at
org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
at
org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:617)
at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1073)
at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2281)
at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2277)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1920)
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2275)
at org.apache.hadoop.ipc.Client.call(Client.java:1504)
at org.apache.hadoop.ipc.Client.call(Client.java:1441)
at
org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:230)
at com.sun.proxy.$Proxy11.addBlock(Unknown Source)
at
org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.addBlock(ClientNamenodeProtocolTranslatorPB.java:423)
at sun.reflect.GeneratedMethodAccessor25.invoke(Unknown Source)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at
org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:258)
at
org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:104)
at com.sun.proxy.$Proxy12.addBlock(Unknown Source)
at
org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.locateFollowingBlock(DFSOutputStream.java:1860)
at
org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.nextBlockOutputStream(DFSOutputStream.java:1656)
at
org.apache.hadoop.hdfs.DFSOutputStream$DataStreamer.run(DFSOutputStream.java:790)
Suppressed: java.lang.IllegalArgumentException: Self-suppression
not permitted
at java.lang.Throwable.addSuppressed(Throwable.java:1043)
at java.io.FilterOutputStream.close(FilterOutputStream.java:159)
at
org.apache.lucene.store.OutputStreamIndexOutput.close(OutputStreamIndexOutput.java:70)
at
org.apache.lucene.store.RateLimitedIndexOutput.close(RateLimitedIndexOutput.java:49)
at
org.apache.lucene.util.IOUtils.closeWhileHandlingException(IOUtils.java:123)
at
org.apache.lucene.util.IOUtils.closeWhileHandlingException(IOUtils.java:112)
at
org.apache.lucene.codecs.lucene50.Lucene50PostingsWriter.close(Lucene50PostingsWriter.java:482)
at org.apache.lucene.util.IOUtils.close(IOUtils.java:89)
at org.apache.lucene.util.IOUtils.close(IOUtils.java:76)
at
org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter.close(BlockTreeTermsWriter.java:1026)
at
org.apache.lucene.util.IOUtils.closeWhileHandlingException(IOUtils.java:123)
at
org.apache.lucene.codecs.perfield.PerFieldPostingsFormat$FieldsWriter.merge(PerFieldPostingsFormat.java:170)
at
org.apache.lucene.index.SegmentMerger.mergeTerms(SegmentMerger.java:230)
at
org.apache.lucene.index.SegmentMerger.merge(SegmentMerger.java:115)
at
org.apache.lucene.index.IndexWriter.mergeMiddle(IndexWriter.java:4443)
at org.apache.lucene.index.IndexWriter.merge(IndexWriter.java:4083)
at
org.apache.solr.update.SolrIndexWriter.merge(SolrIndexWriter.java:190)
at
org.apache.lucene.index.ConcurrentMergeScheduler.doMerge(ConcurrentMergeScheduler.java:624)
at
org.apache.lucene.index.ConcurrentMergeScheduler$MergeThread.run(ConcurrentMergeScheduler.java:661)
[CIRCULAR
REFERENCE:org.apache.hadoop.ipc.RemoteException(java.io.IOException):
File /solr7.1.0/UNCLASS/core_node39/data/index/_4i92_Lucene50_0.pos
could only be replicated to 0 nodes instead of minReplication (=1).
There are 41 datanode(s) running and no node(s) are excluded in this
operation.
at
org.apache.hadoop.hdfs.server.blockmanagement.BlockManager.chooseTarget4NewBlock(BlockManager.java:1724)
at
org.apache.hadoop.hdfs.server.namenode.FSNamesystem.getAdditionalBlock(FSNamesystem.java:3449)
at
org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.addBlock(NameNodeRpcServer.java:692)
at
org.apache.hadoop.hdfs.server.namenode.AuthorizationProviderProxyClientProtocol.addBlock(AuthorizationProviderProxyClientProtocol.java:217)
at
org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.addBlock(ClientNamenodeProtocolServerSideTranslatorPB.java:506)
at
org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
at
org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:617)
at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:1073)
at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2281)
at org.apache.hadoop.ipc.Server$Handler$1.run(Server.java:2277)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1920)
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2275)
-Joe