This is an automated email from the ASF dual-hosted git repository.

broustant pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/solr.git


The following commit(s) were added to refs/heads/branch_9x by this push:
     new 6d68598cce1 SOLR-16473: Fix race condition in shard split when a 
sub-shard is put in recovery state.
6d68598cce1 is described below

commit 6d68598cce17f691f020b4e99b1a9f775bb71b7f
Author: Bruno Roustant <[email protected]>
AuthorDate: Mon Dec 5 18:03:13 2022 +0100

    SOLR-16473: Fix race condition in shard split when a sub-shard is put in 
recovery state.
    
    Co-authored-by: Andy Vuong <[email protected]>
---
 solr/CHANGES.txt                                          |  1 +
 .../apache/solr/cloud/api/collections/SplitShardCmd.java  | 15 +++++++++++++++
 2 files changed, 16 insertions(+)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 01eb61d6a2d..e6310b6ee45 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -113,6 +113,7 @@ Bug Fixes
 
 * SOLR-16165: Rare Deadlock in SlotAcc initialization (Justin Sweeney, noble)
 
+* SOLR-16473: Fix race condition in shard split when a sub-shard is put in 
recovery state. (Andy Vuong via Bruno Roustant)
 
 * SOLR-10458: Fix followRedirect property on HttpSolrClient not set when using 
Builder pattern. (Eric Pugh)
 
diff --git 
a/solr/core/src/java/org/apache/solr/cloud/api/collections/SplitShardCmd.java 
b/solr/core/src/java/org/apache/solr/cloud/api/collections/SplitShardCmd.java
index 087e2bb6970..23473694b59 100644
--- 
a/solr/core/src/java/org/apache/solr/cloud/api/collections/SplitShardCmd.java
+++ 
b/solr/core/src/java/org/apache/solr/cloud/api/collections/SplitShardCmd.java
@@ -39,6 +39,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.NoSuchElementException;
 import java.util.Set;
+import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicReference;
 import org.apache.solr.client.solrj.cloud.DistribStateManager;
@@ -785,6 +786,20 @@ public class SplitShardCmd implements 
CollApiCmds.CollectionApiCommand {
         } else {
           ccc.offerStateUpdate(m);
         }
+        // Wait for the sub-shards to change to the RECOVERY state before 
creating the replica
+        // cores. Otherwise, there is a race condition and some recovery 
updates may be lost.
+        zkStateReader.waitForState(
+            collectionName,
+            60,
+            TimeUnit.SECONDS,
+            (collectionState) -> {
+              for (String subSlice : subSlices) {
+                if 
(!collectionState.getSlice(subSlice).getState().equals(Slice.State.RECOVERY)) {
+                  return false;
+                }
+              }
+              return true;
+            });
       }
 
       t = timings.sub("createCoresForReplicas");

Reply via email to