This is an automated email from the ASF dual-hosted git repository.

noble pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git


The following commit(s) were added to refs/heads/main by this push:
     new bc7fceeb7d7 SOLR-17271: PerReplicaState: Shard leader elections still 
impact state.json (#2443)
bc7fceeb7d7 is described below

commit bc7fceeb7d7dc6e75b9e79ddf8cddb57eb8f452f
Author: Noble Paul <[email protected]>
AuthorDate: Sat Jun 1 09:52:03 2024 +1000

    SOLR-17271: PerReplicaState: Shard leader elections still impact state.json 
(#2443)
---
 .../apache/solr/cloud/overseer/SliceMutator.java   |  4 ++
 .../cloud/PerReplicaStatesIntegrationTest.java     | 48 ++++++++++++++++++++--
 2 files changed, 48 insertions(+), 4 deletions(-)

diff --git 
a/solr/core/src/java/org/apache/solr/cloud/overseer/SliceMutator.java 
b/solr/core/src/java/org/apache/solr/cloud/overseer/SliceMutator.java
index c016fa5489b..d5170c66474 100644
--- a/solr/core/src/java/org/apache/solr/cloud/overseer/SliceMutator.java
+++ b/solr/core/src/java/org/apache/solr/cloud/overseer/SliceMutator.java
@@ -159,6 +159,10 @@ public class SliceMutator {
       log.error("Could not mark shard leader for non existing collection: {}", 
collectionName);
       return ZkStateWriter.NO_OP;
     }
+    if (coll.isPerReplicaState()) {
+      log.debug("Do not mark shard leader for PRS collection: {}", 
collectionName);
+      return ZkStateWriter.NO_OP;
+    }
 
     Map<String, Slice> slices = coll.getSlicesMap();
     Slice slice = slices.get(sliceName);
diff --git 
a/solr/solrj/src/test/org/apache/solr/common/cloud/PerReplicaStatesIntegrationTest.java
 
b/solr/solrj/src/test/org/apache/solr/common/cloud/PerReplicaStatesIntegrationTest.java
index e75100f1a50..3512e74c6f3 100644
--- 
a/solr/solrj/src/test/org/apache/solr/common/cloud/PerReplicaStatesIntegrationTest.java
+++ 
b/solr/solrj/src/test/org/apache/solr/common/cloud/PerReplicaStatesIntegrationTest.java
@@ -40,6 +40,7 @@ import org.slf4j.LoggerFactory;
 /** This test would be faster if we simulated the zk state instead. */
 @LogLevel(
     "org.apache.solr.common.cloud.ZkStateReader=DEBUG;"
+        + "org.apache.solr.cloud.overseer.ZkStateWriter=DEBUG;"
         + "org.apache.solr.handler.admin.CollectionsHandler=DEBUG;"
         + "org.apache.solr.common.cloud.PerReplicaStatesOps=DEBUG;"
         + "org.apache.solr.cloud.Overseer=INFO;"
@@ -315,7 +316,8 @@ public class PerReplicaStatesIntegrationTest extends 
SolrCloudTestCase {
       CollectionAdminRequest.createCollection(PRS_COLL, "conf", 10, 1)
           .setPerReplicaState(Boolean.TRUE)
           .process(cluster.getSolrClient());
-      stat = 
cluster.getZkClient().exists(DocCollection.getCollectionPath(PRS_COLL), null, 
true);
+      String PRS_PATH = DocCollection.getCollectionPath(PRS_COLL);
+      stat = cluster.getZkClient().exists(PRS_PATH, null, true);
       // +1 after all replica are added with on state.json write to 
CreateCollectionCmd.setData()
       assertEquals(1, stat.getVersion());
       // For each replica:
@@ -330,7 +332,7 @@ public class PerReplicaStatesIntegrationTest extends 
SolrCloudTestCase {
           CollectionAdminRequest.addReplicaToShard(PRS_COLL, "shard1")
               .process(cluster.getSolrClient());
       cluster.waitForActiveCollection(PRS_COLL, 10, 11);
-      stat = 
cluster.getZkClient().exists(DocCollection.getCollectionPath(PRS_COLL), null, 
true);
+      stat = cluster.getZkClient().exists(PRS_PATH, null, true);
       // For the new replica:
       // +2 for state.json overseer writes, even though there's no longer PRS 
updates from
       // overseer, current code would still do a "TOUCH" on the PRS entry
@@ -350,7 +352,7 @@ public class PerReplicaStatesIntegrationTest extends 
SolrCloudTestCase {
       CollectionAdminRequest.deleteReplica(PRS_COLL, "shard1", 
addedReplica.getName())
           .process(cluster.getSolrClient());
       cluster.waitForActiveCollection(PRS_COLL, 10, 10);
-      stat = 
cluster.getZkClient().exists(DocCollection.getCollectionPath(PRS_COLL), null, 
true);
+      stat = cluster.getZkClient().exists(PRS_PATH, null, true);
       // For replica deletion
       // +1 for ZkController#unregister, which delete the PRS entry from data 
node
       // overseer, current code would still do a "TOUCH" on the PRS entry
@@ -359,11 +361,49 @@ public class PerReplicaStatesIntegrationTest extends 
SolrCloudTestCase {
       for (JettySolrRunner j : cluster.getJettySolrRunners()) {
         j.stop();
         j.start(true);
-        stat = 
cluster.getZkClient().exists(DocCollection.getCollectionPath(PRS_COLL), null, 
true);
+        stat = cluster.getZkClient().exists(PRS_PATH, null, true);
         // ensure restart does not update the state.json, after 
addReplica/deleteReplica, 2 more
         // updates hence at version 3 on state.json version
         assertEquals(3, stat.getVersion());
       }
+
+      // test for leader election
+      Replica leader =
+          
cluster.getZkStateReader().clusterState.getCollection(PRS_COLL).getLeader("shard2");
+
+      JettySolrRunner j2 = cluster.startJettySolrRunner();
+      response =
+          CollectionAdminRequest.addReplicaToShard(PRS_COLL, "shard2")
+              .setNode(j2.getNodeName())
+              .process(cluster.getSolrClient());
+
+      // wait for the new replica to be active
+      cluster.waitForActiveCollection(PRS_COLL, 10, 11);
+      stat = cluster.getZkClient().exists(PRS_PATH, null, true);
+      // +1 for a new replica
+      assertEquals(4, stat.getVersion());
+      DocCollection c = cluster.getZkStateReader().getCollection(PRS_COLL);
+      Replica newreplica = c.getReplica((s, replica) -> 
replica.node.equals(j2.getNodeName()));
+
+      // let's stop the old leader
+      JettySolrRunner oldJetty = cluster.getReplicaJetty(leader);
+      oldJetty.stop();
+
+      cluster
+          .getZkStateReader()
+          .waitForState(
+              PRS_COLL,
+              10,
+              TimeUnit.SECONDS,
+              (liveNodes, collectionState) ->
+                  PerReplicaStatesOps.fetch(PRS_PATH, cluster.getZkClient(), 
null)
+                      .states
+                      .get(newreplica.name)
+                      .isLeader);
+      PerReplicaStates prs = PerReplicaStatesOps.fetch(PRS_PATH, 
cluster.getZkClient(), null);
+      stat = cluster.getZkClient().exists(PRS_PATH, null, true);
+      // the version should not have updated
+      assertEquals(4, stat.getVersion());
     } finally {
       cluster.shutdown();
     }

Reply via email to