(solr) branch branch_9x updated: SOLR-17656: New 'skipLeaderRecovery' replica property allows PULL replicas with existing indexes to immediately become ACTIVE

hossman Fri, 14 Feb 2025 15:10:59 -0800

This is an automated email from the ASF dual-hosted git repository.

hossman pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/solr.git



The following commit(s) were added to refs/heads/branch_9x by this push:
     new 9b3baab5b46 SOLR-17656: New 'skipLeaderRecovery' replica property 
allows PULL replicas with existing indexes to immediately become ACTIVE
9b3baab5b46 is described below

commit 9b3baab5b460c9a14270642998a610c3bec286c2
Author: Chris Hostetter <[email protected]>
AuthorDate: Fri Feb 14 11:45:02 2025 -0700

    SOLR-17656: New 'skipLeaderRecovery' replica property allows PULL replicas 
with existing indexes to immediately become ACTIVE
    
    (cherry picked from commit e775fd26dbf0d5967c06f226d650bd8a6b896c4e)
---
 solr/CHANGES.txt                                   |   2 +
 .../java/org/apache/solr/cloud/ZkController.java   |  63 ++++++++++
 .../org/apache/solr/cloud/TestPullReplica.java     | 133 ++++++++++++++++++++-
 .../pages/solrcloud-shards-indexing.adoc           |  26 ++--
 4 files changed, 216 insertions(+), 8 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 7dede6a34ae..eb179816e72 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -13,6 +13,8 @@ New Features
 
 * SOLR-17626: Add RawTFSimilarityFactory class. (Christine Poerschke)
 
+* SOLR-17656: New 'skipLeaderRecovery' replica property allows PULL replicas 
with existing indexes to immediately become ACTIVE (hossman)
+
 Improvements
 ---------------------
 * SOLR-15751: The v2 API now has parity with the v1 "COLSTATUS" and "segments" 
APIs, which can be used to fetch detailed information about 
diff --git a/solr/core/src/java/org/apache/solr/cloud/ZkController.java 
b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
index 3f85011c7c9..4c6ea26a4df 100644
--- a/solr/core/src/java/org/apache/solr/cloud/ZkController.java
+++ b/solr/core/src/java/org/apache/solr/cloud/ZkController.java
@@ -98,6 +98,7 @@ import org.apache.solr.common.cloud.ZkMaintenanceUtils;
 import org.apache.solr.common.cloud.ZkNodeProps;
 import org.apache.solr.common.cloud.ZkStateReader;
 import org.apache.solr.common.cloud.ZooKeeperException;
+import org.apache.solr.common.params.CollectionAdminParams;
 import org.apache.solr.common.params.CommonParams;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.util.Compressor;
@@ -1376,6 +1377,12 @@ public class ZkController implements Closeable {
           }
         }
 
+        // If we don't already have a reason to skipRecovery, check if we 
should skip
+        // due to replica property
+        if (!skipRecovery) {
+          skipRecovery = checkSkipRecoveryReplicaProp(core, replica);
+        }
+
         boolean didRecovery =
             checkRecovery(
                 recoverReloadedCores,
@@ -1418,6 +1425,62 @@ public class ZkController implements Closeable {
     }
   }
 
+  static final String SKIP_LEADER_RECOVERY_PROP = "skipLeaderRecovery";
+
+  /**
+   * Note: internally, property names are always lowercase
+   *
+   * @see #SKIP_LEADER_RECOVERY_PROP
+   */
+  static final String SKIP_LEADER_RECOVERY_PROP_KEY =
+      CollectionAdminParams.PROPERTY_PREFIX + 
SKIP_LEADER_RECOVERY_PROP.toLowerCase(Locale.ROOT);
+
+  /**
+   * Returns true if and only if this replica has a replica property 
indicating that leader recovery
+   * should be skipped <em>AND</em> the replica meets the neccessary criteria 
to respect that
+   * property.
+   *
+   * @see #SKIP_LEADER_RECOVERY_PROP_KEY
+   */
+  private boolean checkSkipRecoveryReplicaProp(final SolrCore core, final 
Replica replica) {
+
+    if (!replica.getBool(SKIP_LEADER_RECOVERY_PROP_KEY, false)) {
+      // Property is not set (or set to false) so we are definitely not 
skipping recovery
+      return false;
+    }
+
+    // else: Sanity check if we should respect the property ...
+
+    if (replica.getType() != Type.PULL) {
+      if (log.isWarnEnabled()) {
+        log.warn(
+            "Ignoring {} replica property for replica {} because replica type 
{} requires transaction logs",
+            SKIP_LEADER_RECOVERY_PROP,
+            replica.getName(),
+            replica.getType());
+      }
+      return false;
+    }
+
+    if (null == ReplicateFromLeader.getCommitVersion(core)) {
+      if (log.isWarnEnabled()) {
+        log.warn(
+            "Ignoring {} replica property for replica {} because there is no 
local index commit",
+            SKIP_LEADER_RECOVERY_PROP,
+            replica.getName());
+      }
+      return false;
+    }
+
+    if (log.isInfoEnabled()) {
+      log.info(
+          "Skipping recovery from leader for replica {} due to {} replica 
property",
+          replica.getName(),
+          SKIP_LEADER_RECOVERY_PROP);
+    }
+    return true;
+  }
+
   private Replica getReplicaOrNull(DocCollection docCollection, String shard, 
String coreNodeName) {
     if (docCollection == null) return null;
 
diff --git a/solr/core/src/test/org/apache/solr/cloud/TestPullReplica.java 
b/solr/core/src/test/org/apache/solr/cloud/TestPullReplica.java
index cb551cc0d70..540de2f2480 100644
--- a/solr/core/src/test/org/apache/solr/cloud/TestPullReplica.java
+++ b/solr/core/src/test/org/apache/solr/cloud/TestPullReplica.java
@@ -695,7 +695,138 @@ public class TestPullReplica extends SolrCloudTestCase {
     waitForNumDocsInAllActiveReplicas(2);
   }
 
-  public void testSearchWhileReplicationHappens() {}
+  public void testSkipLeaderRecoveryProperty() throws Exception {
+    final int numDocsAdded = 13;
+
+    assertTrue(
+        "Test has been broken, not enough jetties", 
cluster.getJettySolrRunners().size() >= 2);
+
+    // Track the two jetty instances we're going to (re)use w/specific replica 
types
+    final JettySolrRunner tlogLeaderyJetty = 
cluster.getJettySolrRunners().get(0);
+    final JettySolrRunner pullFollowerJetty = 
cluster.getJettySolrRunners().get(1);
+    assertNotEquals(tlogLeaderyJetty, pullFollowerJetty);
+
+    // Start with a single tlog replic on the leader jetty
+    CollectionAdminRequest.createCollection(collectionName, "conf", 1, 0, 1, 0)
+        .setCreateNodeSet(tlogLeaderyJetty.getNodeName())
+        // NOTE: we restart the leader, so we need a non-ephemeral index
+        .setProperties(Map.of("solr.directoryFactory", 
"solr.StandardDirectoryFactory"))
+        .process(cluster.getSolrClient());
+
+    // Add 2 PULL replicas on the follower jetty
+    CollectionAdminRequest.addReplicaToShard(collectionName, "shard1", 
Replica.Type.PULL)
+        .setCreateNodeSet(pullFollowerJetty.getNodeName())
+        .setPullReplicas(2)
+        .process(cluster.getSolrClient());
+
+    waitForState("Collection init never finished?", collectionName, 
activeReplicaCount(0, 1, 2));
+
+    assertEquals(
+        2, 
getCollectionState(collectionName).getReplicas(EnumSet.of(Replica.Type.PULL)).size());
+
+    // set our 'skip' property on one of the PULL replicas, and keep track of 
this replica
+    final String pullThatSkipsRecovery =
+        getCollectionState(collectionName)
+            .getReplicas(EnumSet.of(Replica.Type.PULL))
+            .get(0)
+            .getName();
+    CollectionAdminRequest.addReplicaProperty(
+            collectionName,
+            "shard1",
+            pullThatSkipsRecovery,
+            ZkController.SKIP_LEADER_RECOVERY_PROP,
+            "true")
+        .process(cluster.getSolrClient());
+
+    // index a few docs and wait to ensure everything is in sync with our 
expectations
+    addDocs(numDocsAdded);
+    waitForNumDocsInAllReplicas(numDocsAdded, 
getCollectionState(collectionName).getReplicas());
+    waitForState(
+        "Replica prop never added?",
+        collectionName,
+        (liveNodes, docState) -> {
+          return docState
+              .getReplica(pullThatSkipsRecovery)
+              .getBool(ZkController.SKIP_LEADER_RECOVERY_PROP_KEY, false);
+        });
+
+    // Now shutdown our leader node and confirm all our PULL replicas are 
still active and serving
+    // requests
+    tlogLeaderyJetty.stop();
+    cluster.waitForJettyToStop(tlogLeaderyJetty);
+    waitForState(
+        "Leader should be down, PULLs should be active",
+        collectionName,
+        activeReplicaCount(0, 0, 2));
+    waitForNumDocsInAllReplicas(
+        numDocsAdded,
+        
getCollectionState(collectionName).getReplicas(EnumSet.of(Replica.Type.PULL)));
+
+    // Add yetanother PULL replica while the leader is down.
+    // This new replica will immediately stall going into recoveery, since the 
leader is down.
+    CollectionAdminRequest.addReplicaToShard(collectionName, "shard1", 
Replica.Type.PULL)
+        .setCreateNodeSet(pullFollowerJetty.getNodeName())
+        .process(cluster.getSolrClient());
+    waitForState(
+        "3rd PULL replica should be down",
+        collectionName,
+        (liveNodes, colState) -> {
+          int active = 0;
+          int down = 0;
+          for (Replica r : 
colState.getReplicas(EnumSet.of(Replica.Type.PULL))) {
+            if (r.getState().equals(Replica.State.ACTIVE)) {
+              active++;
+            } else if (r.getState().equals(Replica.State.DOWN)) {
+              down++;
+            }
+          }
+          return ((2 == active) && (1 == down));
+        });
+
+    // But even if when set our 'skip' property on this new PULL replica, it's 
*next* (re)start
+    // should still block waiting for RECOVERY since it won't have an active 
index.
+    final String pullThatWantsToSkipRecoveryButMustRecoverOnce =
+        
getCollectionState(collectionName).getReplicas(EnumSet.of(Replica.Type.PULL)).stream()
+            .filter(r -> r.getState().equals(Replica.State.DOWN))
+            .map(r -> r.getName())
+            .findFirst()
+            .get();
+    CollectionAdminRequest.addReplicaProperty(
+            collectionName,
+            "shard1",
+            pullThatWantsToSkipRecoveryButMustRecoverOnce,
+            ZkController.SKIP_LEADER_RECOVERY_PROP,
+            "true")
+        .process(cluster.getSolrClient());
+
+    // Restart the node all of our PULL replicas are one, and confirm that our 
special REPLICA goes
+    // ACTIVE while the others all stay DOWN
+    // (Note: Other PULL replica can't start RECOVERING until the leader comes 
back)
+    pullFollowerJetty.stop();
+    cluster.waitForJettyToStop(pullFollowerJetty);
+    pullFollowerJetty.start();
+    waitForState(
+        "Special PULL should be ACTIVE, all others should be DOWN",
+        collectionName,
+        (liveNodes, colState) -> {
+          for (Replica r : colState.getReplicas()) {
+            if (r.getName().equals(pullThatSkipsRecovery)) {
+              if (!r.getState().equals(Replica.State.ACTIVE)) {
+                return false;
+              }
+            } else if (!r.getState().equals(Replica.State.DOWN)) {
+              return false;
+            }
+          }
+          return true;
+        });
+
+    // Restart our leader, eventually all replicas should be ACTIVE and happy
+    tlogLeaderyJetty.start();
+    waitForState(
+        "Leader should be back, all replicas active", collectionName, 
activeReplicaCount(0, 1, 3));
+    waitForNumDocsInAllReplicas(numDocsAdded, 
getCollectionState(collectionName).getReplicas());
+  }
 
   private void waitForNumDocsInAllActiveReplicas(int numDocs)
       throws IOException, SolrServerException, InterruptedException {
diff --git 
a/solr/solr-ref-guide/modules/deployment-guide/pages/solrcloud-shards-indexing.adoc
 
b/solr/solr-ref-guide/modules/deployment-guide/pages/solrcloud-shards-indexing.adoc
index b665c537fe4..11fd160abf5 100644
--- 
a/solr/solr-ref-guide/modules/deployment-guide/pages/solrcloud-shards-indexing.adoc
+++ 
b/solr/solr-ref-guide/modules/deployment-guide/pages/solrcloud-shards-indexing.adoc
@@ -104,16 +104,28 @@ If more than one replica in the shard is writing its own 
index instead of replic
 
 === Recovery with PULL Replicas
 
-If a PULL replica goes down or leaves the cluster, there are a few scenarios 
to consider.
 
-If the PULL replica cannot sync to the leader because the leader is down, 
replication would not occur.
-However, it would continue to serve queries.
-Once it can connect to the leader again, replication would resume.
+There are a few 
xref:solrcloud-recoveries-and-write-tolerance.adoc#recovery[Recovery] related 
scenarios to consider when using PULL replicas:
 
-If the PULL replica cannot connect to ZooKeeper, it would be removed from the 
cluster and queries would not be routed to it from the cluster.
+* If a PULL replica cannot sync to the leader because the leader is down, or 
due to network paritioning, replication will not occur.
+However, the PULL replica will continue to serve queries.  Once it can connect 
to the leader again, replication will resume.
+
+* If a PULL replica cannot connect to ZooKeeper, it will stop replicating 
because it will no longer be able to confidently know which replica to treat as 
the Leader.  The PULL replica will also be removed from the cluster status and 
distributed queries will not be routed to it from other replias the cluster (or 
from SolrJ).
+
+* If a PULL replica dies or is unreachable for any other reason, it will no 
longer be query-able.
+When it rejoins the cluster, it will first attempt to recover from the current 
leader, and only when that is complete, will it be ready to serve queries again.
+
+[NOTE]
+====
+It is important to realize that when PULL replicas join (or re-join) a 
cluster, they will not be query-able until they do an initial recovery from the 
current leader.
+
+This means that if a Solr node hosting an existing PULL replica is started (or 
restarted) at a moment in time where there is no active leader for that shard 
-- either because all leader eligible replicas are currently offline, or 
because of the leader eligible replicas are not yet active due to replaying 
their own transaction logs -- then that PULL replica will not be query-able 
until the leader election is complete.
+
+This behavior will differ from other existing PULL replicas if they were 
already active & serving queries before the current leader election.  These 
PULL replicas will continue to be query-able using the last index fetched by 
these PULL replicas from the last known leader.
+
+This behavior can be customized with an expert level 
xref:replica-management.adoc#addreplicaprop[replica property] named 
`skipLeaderRecovery`.  If this property is set to `true` on a PULL replica, 
then this replica will skip it's initial `RECOVERING` phase on node start (or 
restart), and immediately begin serving queries using it's local index (which 
it will update through normal periodic replication from the leader -- if & when 
a leader is available)
+====
 
-If the PULL replica dies or is unreachable for any other reason, it won't be 
query-able.
-When it rejoins the cluster, it would replicate from the leader and when that 
is complete, it would be ready to serve queries again.
 
 === Queries with Preferred Replica Types

(solr) branch branch_9x updated: SOLR-17656: New 'skipLeaderRecovery' replica property allows PULL replicas with existing indexes to immediately become ACTIVE

Reply via email to