Hi, We have a SolrCloud 4.7.1 setup having some leader and some replica. If a leader goes down then it tries to elect the leader between the replica`s. Between the replica`s some replica`s gets into recovery mode. In this activity an error is thrown "we are not the leader". The server went into 40 minute loop to recover and still did not recover completely.
,,"INFO - 2014-06-18 01:26:45.820; org.apache.solr.cloud.RecoveryStrategy; Wait 2.0 seconds before trying to recover again (1)","2014-06-18T01:26:45.820+0000",,,,,,,,,,,,,1,18,26,june,45,wednesday,2014,local,,,,"nix-all-logs",,,,,,,,"renew-sdb-1.int.ssi-cloud.com",database,1,,,,,,,,,,,,,,,,,,,,,"__-_--_::.;_....;__._______()",,,,,,"/var/log/tomcat7/solr.log",sdb,"splunkindexer-1.int.ssi-cloud.com",,,,,,31,,8,,,,,,,,,,, ,,"ERROR - 2014-06-18 01:26:45.820; org.apache.solr.cloud.RecoveryStrategy; Recovery failed - trying again... (0) core=app.quotes_shard1_replica1","2014-06-18T01:26:45.820+0000",,,,,,,,,,,"app.quotes_shard1_replica1",,1,18,26,june,45,wednesday,2014,local,,,,"nix-all-logs nix_errors",,,,,,,,"renew-sdb-1.int.ssi-cloud.com",database,1,,,,,,,,,,,,,,,,,,,,,"_-_--_::.;_....;___-__..._()_=.",,,,,,"/var/log/tomcat7/solr.log",sdb,"splunkindexer-1.int.ssi-cloud.com",,,,error,error,31,,8,,,,,,,,,,, ,,"ERROR - 2014-06-18 01:26:45.820; org.apache.solr.common.SolrException; Error while trying to recover. core=app.quotes_shard1_replica1:org.apache.solr.client.solrj.impl.HttpSolrServer$RemoteSolrException: We are not the leader at org.apache.solr.client.solrj.impl.HttpSolrServer.request(HttpSolrServer.java:495) at org.apache.solr.client.solrj.impl.HttpSolrServer.request(HttpSolrServer.java:199) at org.apache.solr.cloud.RecoveryStrategy.sendPrepRecoveryCmd(RecoveryStrategy.java:224) at org.apache.solr.cloud.RecoveryStrategy.doRecovery(RecoveryStrategy.java:371) at org.apache.solr.cloud.RecoveryStrategy.run(RecoveryStrategy.java:247)","2014-06-18T01:26:45.820+0000",,,,,,,,,,,"app.quotes_shard1_replica1:org.apache.solr.client.solrj.impl.HttpSolrServer$RemoteSolrException:",,1,18,26,june,45,wednesday,2014,local,,,,"nix-all-logs nix_errors",,,,,,,,"renew-sdb-1.int.ssi-cloud.com",database,6,,,,,,,,,,,,,,,,,,,,,"_-_--_::.;_....;_____._=.:......$:_____t_.......(.",,,,,,"/var/log/tomcat7/solr.log",sdb,"splunkindexer-1.int.ssi-cloud.com",,,,error,error,31,,8,,,,,,,,,,, ,0,"INFO - 2014-06-18 01:26:45.818; org.apache.solr.servlet.SolrDispatchFilter; [admin] webapp=null path=/admin/cores params={coreNodeName=core_node1&onlyIfLeaderActive=true&state=recovering&nodeName=10.4.30.89:8080_solr&action=PREPRECOVERY&checkLive=true&core=app.quotes_shard1_replica2&wt=javabin&onlyIfLeader=true&version=2} status=400 QTime=0 ","2014-06-18T01:26:45.818+0000",PREPRECOVERY,,,,true,,,,,,"app.quotes_shard1_replica2","core_node1",1,18,26,june,45,wednesday,2014,local,,,,"nix-all-logs",,,,,,,,"renew-sdb-3.int.ssi-cloud.com",database,1,,,,,,,"10.4.30.89:8080_solr",,,true,true,,,,,,,"{coreNodeName=core_node1&onlyIfLeaderActive=true&state=recovering&nodeName=10.4.30.89:8080_solr&action=PREPRECOVERY&checkLive=true&core=app.quotes_shard1_replica2&wt=javabin&onlyIfLeader=true&version=2}","/admin/cores",,"__-_--_::.;_....;_[]_=_=//_={=&=&=&=...:&=&=&=.&=&",,,,,,"/var/log/tomcat7/solr.log",sdb,"splunkindexer-3.int.ssi-cloud.com",recovering,400,,,,31,,8,,,,,,2,,,null,javabin, ,,"ERROR - 2014-06-18 01:26:45.818; org.apache.solr.common.SolrException; org.apache.solr.common.SolrException: We are not the leader at org.apache.solr.handler.admin.CoreAdminHandler.handleWaitForStateAction(CoreAdminHandler.java:905) at org.apache.solr.handler.admin.CoreAdminHandler.handleRequestBody(CoreAdminHandler.java:198) at org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:135) at org.apache.solr.servlet.SolrDispatchFilter.handleAdminRequest(SolrDispatchFilter.java:732) at org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:268) at org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:217) at org.apache.catalina.core.ApplicationFilterChain.internalDoFilter(ApplicationFilterChain.java:243) at org.apache.catalina.core.ApplicationFilterChain.doFilter(ApplicationFilterChain.java:210) at org.apache.catalina.core.StandardWrapperValve.invoke(StandardWrapperValve.java:222) at org.apache.catalina.core.StandardContextValve.invoke(StandardContextValve.java:123) at org.apache.catalina.core.StandardHostValve.invoke(StandardHostValve.java:171) at org.apache.catalina.valves.ErrorReportValve.invoke(ErrorReportValve.java:99) at org.apache.catalina.core.StandardEngineValve.invoke(StandardEngineValve.java:118) at org.apache.catalina.connector.CoyoteAdapter.service(CoyoteAdapter.java:407) at org.apache.coyote.http11.AbstractHttp11Processor.process(AbstractHttp11Processor.java:1004) at org.apache.coyote.AbstractProtocol$AbstractConnectionHandler.process(AbstractProtocol.java:589) at org.apache.tomcat.util.net.JIoEndpoint$SocketProcessor.run(JIoEndpoint.java:310) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:744)","2014-06-18T01:26:45.818+0000",,,,,,,,,,,,,1,18,26,june,45,wednesday,2014,local,,,,"nix-all-logs nix_errors",,,,,,,,"renew-sdb-3.int.ssi-cloud.com",database,21,,,,,,,,,,,,,,,,,,,,,"_-_--_::.;_....;_....:_____t_......(.:)t_......(.:",,,,,,"/var/log/tomcat7/solr.log",sdb,"splunkindexer-3.int.ssi-cloud.com",,,,error,error,31,,8,,,,,,,,,,, ,,"INFO - 2014-06-18 01:26:45.818; org.apache.solr.handler.admin.CoreAdminHandler; Going to wait for coreNodeName: core_node1, state: recovering, checkLive: true, onlyIfLeader: true","2014-06-18T01:26:45.818+0000",,,,,,,,,,,,,1,18,26,june,45,wednesday,2014,local,,,,"nix-all-logs",,,,,,,,"renew-sdb-3.int.ssi-cloud.com",database,1,,,,,,,,,,,,,,,,,,,,,"__-_--_::.;_.....;_____:_,_:_,_:_,_:_",,,,,,"/var/log/tomcat7/solr.log",sdb,"splunkindexer-3.int.ssi-cloud.com",,,,,,31,,8,,,,,,,,,,, ,,"INFO - 2014-06-18 01:26:45.814; org.apache.solr.cloud.Overseer$ClusterStateUpdater; Update state numShards=2 message={ Appreciate any pointer on it. Thanks, --Gurfan -- View this message in context: http://lucene.472066.n3.nabble.com/Leader-Selection-Error-tp4142529.html Sent from the Solr - User mailing list archive at Nabble.com.