This is an automated email from the ASF dual-hosted git repository.

HoustonPutman pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git


The following commit(s) were added to refs/heads/main by this push:
     new 4e5c83d2f07 SOLR-17821: Fix error scenario for ShardInstall or Restore 
(#3434)
4e5c83d2f07 is described below

commit 4e5c83d2f07e3c1be51dcdcf5004665df33fb419
Author: Houston Putman <[email protected]>
AuthorDate: Wed May 20 15:00:27 2026 -0700

    SOLR-17821: Fix error scenario for ShardInstall or Restore (#3434)
---
 .../solr-17821-fix-restore-error-scenario.yml      |   9 ++
 .../api/model/InstallShardDataRequestBody.java     |   4 +
 .../java/org/apache/solr/cloud/SyncStrategy.java   |  13 +-
 .../cloud/api/collections/InstallShardDataCmd.java | 122 ++++++++++++++++---
 .../solr/cloud/api/collections/RestoreCmd.java     |  40 +++++--
 .../solr/handler/admin/CollectionsHandler.java     |   3 +
 .../solr/handler/admin/api/InstallCoreData.java    |   6 -
 .../solr/handler/admin/api/InstallShardData.java   |   6 +-
 .../apache/solr/handler/admin/api/RestoreCore.java |   6 -
 .../solr/handler/component/ShardRequest.java       |   3 +
 .../apache/solr/cloud/CollectionsAPISolrJTest.java |  72 ++++++-----
 .../solr/cloud/ZkShardTermsRecoveryTest.java       |   8 ++
 .../LocalFSCloudIncrementalBackupTest.java         |   6 +
 .../api/collections/LocalFSInstallShardTest.java   |   8 +-
 .../apache/solr/gcs/GCSIncrementalBackupTest.java  |   6 +
 .../org/apache/solr/gcs/GCSInstallShardTest.java   |   9 +-
 .../apache/solr/s3/S3IncrementalBackupTest.java    |  15 +++
 .../org/apache/solr/s3/S3InstallShardTest.java     |   9 +-
 .../apache/solr/cloud/MiniSolrCloudCluster.java    |   9 ++
 .../collections/AbstractIncrementalBackupTest.java | 133 ++++++++++++++++++++-
 .../api/collections/AbstractInstallShardTest.java  |  69 +++++++++--
 21 files changed, 456 insertions(+), 100 deletions(-)

diff --git a/changelog/unreleased/solr-17821-fix-restore-error-scenario.yml 
b/changelog/unreleased/solr-17821-fix-restore-error-scenario.yml
new file mode 100644
index 00000000000..dbadb3d6fe3
--- /dev/null
+++ b/changelog/unreleased/solr-17821-fix-restore-error-scenario.yml
@@ -0,0 +1,9 @@
+# See https://github.com/apache/solr/blob/main/dev-docs/changelog.adoc
+title: Fix error scenario in InstallShardData and Restore
+type: fixed # added, changed, fixed, deprecated, removed, dependency_update, 
security, other
+authors:
+  - name: Houston Putman
+    nick: HoustonPutman
+links:
+  - name: SOLR-17821
+    url: https://issues.apache.org/jira/browse/SOLR-17821
diff --git 
a/solr/api/src/java/org/apache/solr/client/api/model/InstallShardDataRequestBody.java
 
b/solr/api/src/java/org/apache/solr/client/api/model/InstallShardDataRequestBody.java
index 31bec8eb434..05b27f1dcab 100644
--- 
a/solr/api/src/java/org/apache/solr/client/api/model/InstallShardDataRequestBody.java
+++ 
b/solr/api/src/java/org/apache/solr/client/api/model/InstallShardDataRequestBody.java
@@ -24,5 +24,9 @@ public class InstallShardDataRequestBody {
 
   @JsonProperty public String repository;
 
+  @JsonProperty public String name;
+
+  @JsonProperty public String shardBackupId;
+
   @JsonProperty public String async;
 }
diff --git a/solr/core/src/java/org/apache/solr/cloud/SyncStrategy.java 
b/solr/core/src/java/org/apache/solr/cloud/SyncStrategy.java
index cfbad7cd7e1..d400e90d14d 100644
--- a/solr/core/src/java/org/apache/solr/cloud/SyncStrategy.java
+++ b/solr/core/src/java/org/apache/solr/cloud/SyncStrategy.java
@@ -74,11 +74,6 @@ public class SyncStrategy {
     updateExecutor = updateShardHandler.getUpdateExecutor();
   }
 
-  private static class ShardCoreRequest extends ShardRequest {
-    String coreName;
-    public String baseUrl;
-  }
-
   public PeerSync.PeerSyncResult sync(
       ZkController zkController, SolrCore core, ZkNodeProps leaderProps) {
     return sync(zkController, core, leaderProps, false, false);
@@ -322,8 +317,8 @@ public class SyncStrategy {
         } else {
           RecoveryRequest rr = new RecoveryRequest();
           rr.leaderProps = leaderProps;
-          rr.baseUrl = ((ShardCoreRequest) srsp.getShardRequest()).baseUrl;
-          rr.coreName = ((ShardCoreRequest) srsp.getShardRequest()).coreName;
+          rr.baseUrl = srsp.getShardRequest().nodeName;
+          rr.coreName = srsp.getShardRequest().coreName;
           recoveryRequests.add(rr);
         }
       } else {
@@ -355,9 +350,9 @@ public class SyncStrategy {
   private void requestSync(
       String baseUrl, String replica, String leaderUrl, String coreName, int 
nUpdates) {
     // TODO should we use peerSyncWithLeader instead?
-    ShardCoreRequest sreq = new ShardCoreRequest();
+    ShardRequest sreq = new ShardRequest();
     sreq.coreName = coreName;
-    sreq.baseUrl = baseUrl;
+    sreq.nodeName = baseUrl;
     sreq.purpose = ShardRequest.PURPOSE_PRIVATE;
     sreq.shards = new String[] {replica};
     sreq.actualShards = sreq.shards;
diff --git 
a/solr/core/src/java/org/apache/solr/cloud/api/collections/InstallShardDataCmd.java
 
b/solr/core/src/java/org/apache/solr/cloud/api/collections/InstallShardDataCmd.java
index ca654a150ae..9e48cba893a 100644
--- 
a/solr/core/src/java/org/apache/solr/cloud/api/collections/InstallShardDataCmd.java
+++ 
b/solr/core/src/java/org/apache/solr/cloud/api/collections/InstallShardDataCmd.java
@@ -17,20 +17,26 @@
 
 package org.apache.solr.cloud.api.collections;
 
-import static org.apache.solr.cloud.Overseer.QUEUE_OPERATION;
-import static org.apache.solr.common.params.CommonAdminParams.ASYNC;
-
+import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
 import com.fasterxml.jackson.annotation.JsonProperty;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import java.lang.invoke.MethodHandles;
-import java.util.HashMap;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.List;
 import java.util.Locale;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.TimeUnit;
+import java.util.stream.Collectors;
+import org.apache.solr.cloud.ZkShardTerms;
+import org.apache.solr.common.SolrErrorWrappingException;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.cloud.ClusterState;
 import org.apache.solr.common.cloud.DocCollection;
+import org.apache.solr.common.cloud.Replica;
 import org.apache.solr.common.cloud.Slice;
 import org.apache.solr.common.cloud.ZkNodeProps;
-import org.apache.solr.common.params.CollectionParams;
 import org.apache.solr.common.params.CoreAdminParams;
 import org.apache.solr.common.params.ModifiableSolrParams;
 import org.apache.solr.common.util.NamedList;
@@ -80,27 +86,112 @@ public class InstallShardDataCmd implements 
CollApiCmds.CollectionApiCommand {
     // Build the core-admin request
     final ModifiableSolrParams coreApiParams = new ModifiableSolrParams();
     coreApiParams.set(
-        CoreAdminParams.ACTION, 
CoreAdminParams.CoreAdminAction.INSTALLCOREDATA.toString());
-    typedMessage.toMap(new HashMap<>()).forEach((k, v) -> coreApiParams.set(k, 
v.toString()));
+        CoreAdminParams.ACTION, 
CoreAdminParams.CoreAdminAction.RESTORECORE.toString());
+    coreApiParams.set(CoreAdminParams.BACKUP_LOCATION, typedMessage.location);
+    coreApiParams.set(CoreAdminParams.BACKUP_REPOSITORY, 
typedMessage.repository);
+    coreApiParams.set(CoreAdminParams.NAME, typedMessage.name);
+    coreApiParams.set(CoreAdminParams.SHARD_BACKUP_ID, 
typedMessage.shardBackupId);
 
     // Send the core-admin request to each replica in the slice
     final ShardHandler shardHandler = ccc.newShardHandler();
-    shardRequestTracker.sliceCmd(clusterState, coreApiParams, null, 
installSlice, shardHandler);
+    List<Replica> notLiveReplicas =
+        shardRequestTracker.sliceCmd(clusterState, coreApiParams, null, 
installSlice, shardHandler);
     final String errorMessage =
         String.format(
             Locale.ROOT,
-            "Could not install data to collection [%s] and shard [%s]",
+            "Could not install data to collection [%s] and shard [%s] on any 
leader-eligible replicas",
             typedMessage.collection,
             typedMessage.shard);
-    shardRequestTracker.processResponses(results, shardHandler, true, 
errorMessage);
+    shardRequestTracker.processResponses(results, shardHandler, false, 
errorMessage);
+    Collection<Replica> allReplicas =
+        clusterState
+            .getCollection(typedMessage.collection)
+            .getSlice(typedMessage.shard)
+            .getReplicas();
+
+    // Ensure that terms are correct for this shard after the execution is done
+    // We only care about leader eligible replicas, all others will eventually 
get updated.
+    List<Replica> leaderEligibleReplicas =
+        allReplicas.stream().filter(r -> 
r.getType().leaderEligible).collect(Collectors.toList());
+
+    NamedList<Object> failures = (NamedList<Object>) results.get("failure");
+    Set<Replica> successfulReplicas =
+        leaderEligibleReplicas.stream()
+            .filter(replica -> !notLiveReplicas.contains(replica))
+            .filter(
+                replica ->
+                    failures == null
+                        || 
failures.get(CollectionHandlingUtils.requestKey(replica)) == null)
+            .collect(Collectors.toSet());
+
+    if (successfulReplicas.isEmpty()) {
+      // No leader-eligible replicas succeeded, return failure
+      if (failures == null) {
+        throw new SolrException(
+            SolrException.ErrorCode.SERVER_ERROR,
+            errorMessage + ". No leader-eligible replicas are live.");
+      } else {
+        throw new SolrErrorWrappingException(
+            SolrException.ErrorCode.SERVER_ERROR, errorMessage, 
List.of(failures.asMap(1)));
+      }
+    } else if (successfulReplicas.size() < leaderEligibleReplicas.size()) {
+      // Some, but not all, leader-eligible replicas succeeded.
+      // Ensure the shard terms are correct so that the non-successful 
replicas go into recovery
+      ZkShardTerms shardTerms =
+          ccc.getCoreContainer()
+              .getZkController()
+              .getShardTerms(typedMessage.collection, typedMessage.shard);
+      final Set<String> replicasToStartRecovery = new HashSet<>();
+      leaderEligibleReplicas.stream()
+          .filter(r -> !successfulReplicas.contains(r))
+          .map(Replica::getName)
+          .forEach(replicasToStartRecovery::add);
+      log.info("Putting the unsuccessful replicas into recovery: {}", 
replicasToStartRecovery);
+      shardTerms.ensureHighestTerms(
+          installCollection,
+          
successfulReplicas.stream().map(Replica::getName).collect(Collectors.toSet()));
+      ccc.getZkStateReader()
+          .waitForState(
+              typedMessage.collection,
+              30,
+              TimeUnit.SECONDS,
+              (liveNodes, collectionState) -> {
+                
collectionState.getSlice(typedMessage.shard).getReplicas().stream()
+                    .filter(r -> Replica.State.RECOVERING.equals(r.getState()))
+                    .map(Replica::getName)
+                    .forEach(replicasToStartRecovery::remove);
+                return replicasToStartRecovery.isEmpty();
+              });
+
+      // In order for the async request to succeed, we need to ensure that 
there is no failure
+      // message
+      NamedList<Object> successes = (NamedList<Object>) results.get("success");
+      failures.forEach(
+          (replicaKey, value) -> {
+            successes.add(
+                replicaKey,
+                new NamedList<>(
+                    Map.of(
+                        "explanation",
+                        "Core install failed, but is now recovering from the 
leader",
+                        "failure",
+                        value)));
+          });
+      results.remove("failure");
+    } else {
+      // other replicas to-be-created will know that they are out of date by
+      // looking at their term : 0 compare to term of this core : 1
+      ccc.getCoreContainer()
+          .getZkController()
+          .getShardTerms(typedMessage.collection, typedMessage.shard)
+          .ensureHighestTermsAreNotZero();
+    }
   }
 
   /** A value-type representing the message received by {@link 
InstallShardDataCmd} */
+  @JsonIgnoreProperties(ignoreUnknown = true)
   public static class RemoteMessage implements JacksonReflectMapWriter {
 
-    @JsonProperty(QUEUE_OPERATION)
-    public String operation = 
CollectionParams.CollectionAction.INSTALLSHARDDATA.toLower();
-
     @JsonProperty public String collection;
 
     @JsonProperty public String shard;
@@ -109,8 +200,9 @@ public class InstallShardDataCmd implements 
CollApiCmds.CollectionApiCommand {
 
     @JsonProperty public String location;
 
-    @JsonProperty(ASYNC)
-    public String asyncId;
+    @JsonProperty public String name = "";
+
+    @JsonProperty public String shardBackupId;
 
     public void validate() {
       if (StrUtils.isBlank(collection)) {
diff --git 
a/solr/core/src/java/org/apache/solr/cloud/api/collections/RestoreCmd.java 
b/solr/core/src/java/org/apache/solr/cloud/api/collections/RestoreCmd.java
index c4ef360fa58..dabaf64420e 100644
--- a/solr/core/src/java/org/apache/solr/cloud/api/collections/RestoreCmd.java
+++ b/solr/core/src/java/org/apache/solr/cloud/api/collections/RestoreCmd.java
@@ -24,6 +24,7 @@ import static 
org.apache.solr.common.cloud.ZkStateReader.SHARD_ID_PROP;
 import static 
org.apache.solr.common.params.CollectionParams.CollectionAction.ADDREPLICA;
 import static 
org.apache.solr.common.params.CollectionParams.CollectionAction.CREATE;
 import static 
org.apache.solr.common.params.CollectionParams.CollectionAction.CREATESHARD;
+import static 
org.apache.solr.common.params.CollectionParams.CollectionAction.INSTALLSHARDDATA;
 import static 
org.apache.solr.common.params.CollectionParams.CollectionAction.MODIFYCOLLECTION;
 import static org.apache.solr.common.params.CommonParams.NAME;
 
@@ -108,7 +109,7 @@ public class RestoreCmd implements 
CollApiCmds.CollectionApiCommand {
     }
   }
 
-  private void requestReplicasToRestore(
+  private void requestShardsToRestore(
       NamedList<Object> results,
       DocCollection restoreCollection,
       AdminCmdContext adminCmdContext,
@@ -117,11 +118,13 @@ public class RestoreCmd implements 
CollApiCmds.CollectionApiCommand {
       String repo,
       ShardHandler shardHandler) {
     ShardRequestTracker shardRequestTracker =
-        CollectionHandlingUtils.asyncRequestTracker(adminCmdContext, ccc);
+        CollectionHandlingUtils.asyncRequestTracker(adminCmdContext, 
"/admin/collections", ccc);
     // Copy data from backed up index to each replica
     for (Slice slice : restoreCollection.getSlices()) {
       ModifiableSolrParams params = new ModifiableSolrParams();
-      params.set(CoreAdminParams.ACTION, 
CoreAdminParams.CoreAdminAction.RESTORECORE.toString());
+      params.set(CollectionAdminParams.COLLECTION, slice.getCollection());
+      params.set(CollectionAdminParams.SHARD, slice.getName());
+      params.set(CoreAdminParams.ACTION, INSTALLSHARDDATA.toString());
       Optional<ShardBackupId> shardBackupId = 
backupProperties.getShardBackupIdFor(slice.getName());
       if (shardBackupId.isPresent()) {
         params.set(CoreAdminParams.SHARD_BACKUP_ID, 
shardBackupId.get().getIdAsString());
@@ -130,11 +133,24 @@ public class RestoreCmd implements 
CollApiCmds.CollectionApiCommand {
       }
       params.set(CoreAdminParams.BACKUP_LOCATION, backupPath.toASCIIString());
       params.set(CoreAdminParams.BACKUP_REPOSITORY, repo);
-      shardRequestTracker.sliceCmd(
-          adminCmdContext.getClusterState(), params, null, slice, 
shardHandler);
+      Replica replica = slice.getLeader();
+      if (replica == null) {
+        replica =
+            slice.getReplicas().stream()
+                .findFirst()
+                .orElseThrow(
+                    () ->
+                        new SolrException(
+                            ErrorCode.INVALID_STATE,
+                            String.format(
+                                Locale.ROOT,
+                                "No replicas for shard %s in collection %s. 
Cannot restore to a shard with no replicas",
+                                slice.getName(),
+                                slice.getCollection())));
+      }
+      shardRequestTracker.sendShardRequest(replica, params, shardHandler);
     }
-    shardRequestTracker.processResponses(
-        new NamedList<>(), shardHandler, true, "Could not restore core");
+    shardRequestTracker.processResponses(results, shardHandler, true, "Could 
not restore shard");
   }
 
   /** Encapsulates the parsing and access for common parameters restore 
parameters and values */
@@ -273,7 +289,7 @@ public class RestoreCmd implements 
CollApiCmds.CollectionApiCommand {
       // refresh the location copy of collection state
       restoreCollection =
           
rc.zkStateReader.getClusterState().getCollection(rc.restoreCollectionName);
-      requestReplicasToRestore(
+      requestShardsToRestore(
           results,
           restoreCollection,
           
rc.adminCmdContext.withClusterState(rc.zkStateReader.getClusterState()),
@@ -625,7 +641,7 @@ public class RestoreCmd implements 
CollApiCmds.CollectionApiCommand {
           
rc.adminCmdContext.withClusterState(rc.zkStateReader.getClusterState()),
           restoreCollection);
       try {
-        requestReplicasToRestore(
+        requestShardsToRestore(
             results,
             restoreCollection,
             
rc.adminCmdContext.withClusterState(rc.zkStateReader.getClusterState()),
@@ -649,8 +665,7 @@ public class RestoreCmd implements 
CollApiCmds.CollectionApiCommand {
               ZkStateReader.COLLECTION_PROP, restoreCollection.getName(),
               ZkStateReader.READ_ONLY, null);
       new CollApiCmds.ModifyCollectionCmd(ccc)
-          .call(
-              adminCmdContext.subRequestContext(MODIFYCOLLECTION, null), 
params, new NamedList<>());
+          .call(adminCmdContext.subRequestContext(MODIFYCOLLECTION), params, 
new NamedList<>());
     }
 
     private void enableReadOnly(AdminCmdContext adminCmdContext, DocCollection 
restoreCollection)
@@ -662,8 +677,7 @@ public class RestoreCmd implements 
CollApiCmds.CollectionApiCommand {
               ZkStateReader.COLLECTION_PROP, restoreCollection.getName(),
               ZkStateReader.READ_ONLY, "true");
       new CollApiCmds.ModifyCollectionCmd(ccc)
-          .call(
-              adminCmdContext.subRequestContext(MODIFYCOLLECTION, null), 
params, new NamedList<>());
+          .call(adminCmdContext.subRequestContext(MODIFYCOLLECTION), params, 
new NamedList<>());
     }
   }
 }
diff --git 
a/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java 
b/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java
index be9f394d26c..6e3c9e453d7 100644
--- a/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/admin/CollectionsHandler.java
@@ -101,6 +101,7 @@ import static 
org.apache.solr.common.params.CommonParams.TIMING;
 import static org.apache.solr.common.params.CommonParams.VALUE_LONG;
 import static org.apache.solr.common.params.CoreAdminParams.BACKUP_LOCATION;
 import static org.apache.solr.common.params.CoreAdminParams.BACKUP_REPOSITORY;
+import static org.apache.solr.common.params.CoreAdminParams.SHARD_BACKUP_ID;
 import static org.apache.solr.common.util.StrUtils.formatString;
 
 import java.lang.invoke.MethodHandles;
@@ -1068,6 +1069,8 @@ public class CollectionsHandler extends 
RequestHandlerBase implements Permission
           reqBody.async = req.getParams().get(ASYNC);
           reqBody.repository = req.getParams().get(BACKUP_REPOSITORY);
           reqBody.location = req.getParams().get(BACKUP_LOCATION);
+          reqBody.name = req.getParams().get(NAME);
+          reqBody.shardBackupId = req.getParams().get(SHARD_BACKUP_ID);
 
           final InstallShardData installApi = new 
InstallShardData(h.coreContainer, req, rsp);
           final SolrJerseyResponse installResponse =
diff --git 
a/solr/core/src/java/org/apache/solr/handler/admin/api/InstallCoreData.java 
b/solr/core/src/java/org/apache/solr/handler/admin/api/InstallCoreData.java
index a91a0688e0a..5d6290e1497 100644
--- a/solr/core/src/java/org/apache/solr/handler/admin/api/InstallCoreData.java
+++ b/solr/core/src/java/org/apache/solr/handler/admin/api/InstallCoreData.java
@@ -95,12 +95,6 @@ public class InstallCoreData extends CoreAdminAPIBase 
implements InstallCoreData
             SolrException.ErrorCode.SERVER_ERROR,
             "Failed to install data to core=" + core.getName());
       }
-
-      // other replicas to-be-created will know that they are out of date by
-      // looking at their term : 0 compare to term of this core : 1
-      zkController
-          .getShardTerms(cd.getCollectionName(), cd.getShardId())
-          .ensureHighestTermsAreNotZero();
     }
 
     return response;
diff --git 
a/solr/core/src/java/org/apache/solr/handler/admin/api/InstallShardData.java 
b/solr/core/src/java/org/apache/solr/handler/admin/api/InstallShardData.java
index 840bbeab4a5..4033fa6f097 100644
--- a/solr/core/src/java/org/apache/solr/handler/admin/api/InstallShardData.java
+++ b/solr/core/src/java/org/apache/solr/handler/admin/api/InstallShardData.java
@@ -78,10 +78,10 @@ public class InstallShardData extends AdminAPIBase 
implements InstallShardDataAp
     // Only install data to shards which belong to a collection in read-only 
mode
     final DocCollection dc =
         
coreContainer.getZkController().getZkStateReader().getCollection(collName);
-    if (!dc.isReadOnly()) {
+    if (dc.getSlice(shardName).getReplicas().size() > 1 && !dc.isReadOnly()) {
       throw new SolrException(
           SolrException.ErrorCode.BAD_REQUEST,
-          "Collection must be in readOnly mode before installing data to 
shard");
+          "Collection must be in readOnly mode before installing data to shard 
with more than 1 replica");
     }
 
     submitRemoteMessageAndHandleResponse(
@@ -112,6 +112,8 @@ public class InstallShardData extends AdminAPIBase 
implements InstallShardDataAp
     if (requestBody != null) {
       messageTyped.location = requestBody.location;
       messageTyped.repository = requestBody.repository;
+      messageTyped.name = requestBody.name;
+      messageTyped.shardBackupId = requestBody.shardBackupId;
     }
 
     messageTyped.validate();
diff --git 
a/solr/core/src/java/org/apache/solr/handler/admin/api/RestoreCore.java 
b/solr/core/src/java/org/apache/solr/handler/admin/api/RestoreCore.java
index 3997b1971b4..dcf1cfe85c1 100644
--- a/solr/core/src/java/org/apache/solr/handler/admin/api/RestoreCore.java
+++ b/solr/core/src/java/org/apache/solr/handler/admin/api/RestoreCore.java
@@ -132,12 +132,6 @@ public class RestoreCore extends CoreAdminAPIBase 
implements RestoreCoreApi {
         throw new SolrException(
             SolrException.ErrorCode.SERVER_ERROR, "Failed to restore core=" + 
core.getName());
       }
-      // other replicas to-be-created will know that they are out of date by
-      // looking at their term : 0 compare to term of this core : 1
-      coreContainer
-          .getZkController()
-          .getShardTerms(cd.getCollectionName(), cd.getShardId())
-          .ensureHighestTermsAreNotZero();
 
       // transitions state of update log to ACTIVE
       UpdateLog updateLog = core.getUpdateHandler().getUpdateLog();
diff --git 
a/solr/core/src/java/org/apache/solr/handler/component/ShardRequest.java 
b/solr/core/src/java/org/apache/solr/handler/component/ShardRequest.java
index 5222b38abee..ecaee01c7fa 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/ShardRequest.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/ShardRequest.java
@@ -60,6 +60,9 @@ public class ShardRequest {
   /** may be null */
   public String coreNodeName;
 
+  /** may be null */
+  public String coreName;
+
   /** may be null */
   public Map<String, String> headers;
 
diff --git 
a/solr/core/src/test/org/apache/solr/cloud/CollectionsAPISolrJTest.java 
b/solr/core/src/test/org/apache/solr/cloud/CollectionsAPISolrJTest.java
index c9598a8a19e..17cb7e51ebb 100644
--- a/solr/core/src/test/org/apache/solr/cloud/CollectionsAPISolrJTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/CollectionsAPISolrJTest.java
@@ -113,8 +113,7 @@ public class CollectionsAPISolrJTest extends 
SolrCloudTestCase {
       assertEquals(0, (int) status.get("status"));
       assertTrue(status.get("QTime") > 0);
     }
-    // Sometimes multiple cores land on the same node so it's less than 4
-    int nodesCreated = response.getCollectionNodesStatus().size();
+
     // Use of _default configset should generate a warning for data-driven 
functionality in
     // production use
     assertTrue(
@@ -126,7 +125,7 @@ public class CollectionsAPISolrJTest extends 
SolrCloudTestCase {
     assertEquals(0, response.getStatus());
     assertTrue(response.isSuccess());
     Map<String, NamedList<Integer>> nodesStatus = 
response.getCollectionNodesStatus();
-    assertEquals(nodesStatus.toString(), nodesCreated, nodesStatus.size());
+    assertEquals(nodesStatus.toString(), 4, nodesStatus.size());
 
     waitForState(
         "Expected " + collectionName + " to disappear from cluster state",
@@ -246,36 +245,43 @@ public class CollectionsAPISolrJTest extends 
SolrCloudTestCase {
       assertTrue(status.get("QTime") > 0);
     }
 
-    // Sometimes multiple cores land on the same node so it's less than 4
-    //    int nodesCreated = response.getCollectionNodesStatus().size();
-    //    response =
-    //
-    // 
CollectionAdminRequest.deleteCollection(collectionName).process(cluster.getSolrClient());
-    //
-    //    assertEquals(0, response.getStatus());
-    //    assertTrue(response.isSuccess());
-    //    Map<String, NamedList<Integer>> nodesStatus = 
response.getCollectionNodesStatus();
-    //    // Delete could have been sent before the collection was finished 
coming online
-    //    assertEquals(nodesStatus.toString(), nodesCreated, 
nodesStatus.size());
-    //
-    //    waitForState(
-    //        "Expected " + collectionName + " to disappear from cluster 
state",
-    //        collectionName,
-    //        Objects::isNull);
-    //
-    //    // Test Creating a new collection.
-    //    collectionName = "solrj_test2";
-    //
-    //    response =
-    //        CollectionAdminRequest.createCollection(collectionName, "conf", 
2, 2)
-    //            .process(cluster.getSolrClient());
-    //    assertEquals(0, response.getStatus());
-    //    assertTrue(response.isSuccess());
-    //
-    //    waitForState(
-    //        "Expected " + collectionName + " to appear in cluster state",
-    //        collectionName,
-    //        Objects::nonNull);
+    waitForState(
+        "Expected " + collectionName + " to disappear from cluster state",
+        collectionName,
+        ((liveNodes, collectionState) ->
+            collectionState.getSlices().stream()
+                .flatMap(
+                    s -> s.getReplicas(r -> 
!r.getState().equals(Replica.State.ACTIVE)).stream())
+                .findAny()
+                .isEmpty()));
+
+    response =
+        
CollectionAdminRequest.deleteCollection(collectionName).process(cluster.getSolrClient());
+
+    assertEquals(0, response.getStatus());
+    assertTrue(response.isSuccess());
+    Map<String, NamedList<Integer>> nodesStatus = 
response.getCollectionNodesStatus();
+    // Delete could have been sent before the collection was finished coming 
online
+    assertEquals(nodesStatus.toString(), 4, nodesStatus.size());
+
+    waitForState(
+        "Expected " + collectionName + " to disappear from cluster state",
+        collectionName,
+        Objects::isNull);
+
+    // Test Creating a new collection.
+    collectionName = "solrj_test2";
+
+    response =
+        CollectionAdminRequest.createCollection(collectionName, "conf", 2, 2)
+            .process(cluster.getSolrClient());
+    assertEquals(0, response.getStatus());
+    assertTrue(response.isSuccess());
+
+    waitForState(
+        "Expected " + collectionName + " to appear in cluster state",
+        collectionName,
+        Objects::nonNull);
   }
 
   @Test
diff --git 
a/solr/core/src/test/org/apache/solr/cloud/ZkShardTermsRecoveryTest.java 
b/solr/core/src/test/org/apache/solr/cloud/ZkShardTermsRecoveryTest.java
index ac1e9177fd0..10385f2c54c 100644
--- a/solr/core/src/test/org/apache/solr/cloud/ZkShardTermsRecoveryTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/ZkShardTermsRecoveryTest.java
@@ -52,10 +52,18 @@ public class ZkShardTermsRecoveryTest extends 
SolrCloudTestCase {
         CollectionAdminRequest.createCollection(COLLECTION, "conf", 
NUM_SHARDS, NUM_REPLICAS)
             .process(cluster.getSolrClient())
             .getStatus());
+
     waitForState(
         "Timeout waiting for collection to be active after creation",
         COLLECTION,
         clusterShape(NUM_SHARDS, NUM_SHARDS * NUM_REPLICAS));
+
+    UpdateRequest up = new UpdateRequest();
+    for (int i = 0; i < 200; i++) {
+      up.add("id", "id-" + i);
+    }
+    up.commit(cluster.getSolrClient(), COLLECTION);
+    NUM_DOCS += 200;
   }
 
   @Before
diff --git 
a/solr/core/src/test/org/apache/solr/cloud/api/collections/LocalFSCloudIncrementalBackupTest.java
 
b/solr/core/src/test/org/apache/solr/cloud/api/collections/LocalFSCloudIncrementalBackupTest.java
index eabd8101480..6cf4e994d68 100644
--- 
a/solr/core/src/test/org/apache/solr/cloud/api/collections/LocalFSCloudIncrementalBackupTest.java
+++ 
b/solr/core/src/test/org/apache/solr/cloud/api/collections/LocalFSCloudIncrementalBackupTest.java
@@ -58,6 +58,12 @@ public class LocalFSCloudIncrementalBackupTest extends 
AbstractIncrementalBackup
           + "  </solrcloud>\n"
           + "  \n"
           + "  <backup>\n"
+          + "    <repository name=\"errorBackupRepository\" class=\""
+          + ErrorThrowingTrackingBackupRepository.class.getName()
+          + "\"> \n"
+          + "      <str name=\"delegateRepoName\">localfs</str>\n"
+          + "      <str name=\"hostPort\">${hostPort:8983}</str>\n"
+          + "    </repository>\n"
           + "    <repository name=\"trackingBackupRepository\" 
class=\"org.apache.solr.core.TrackingBackupRepository\"> \n"
           + "      <str name=\"delegateRepoName\">localfs</str>\n"
           + "    </repository>\n"
diff --git 
a/solr/core/src/test/org/apache/solr/cloud/api/collections/LocalFSInstallShardTest.java
 
b/solr/core/src/test/org/apache/solr/cloud/api/collections/LocalFSInstallShardTest.java
index 690ff447194..989c894b978 100644
--- 
a/solr/core/src/test/org/apache/solr/cloud/api/collections/LocalFSInstallShardTest.java
+++ 
b/solr/core/src/test/org/apache/solr/cloud/api/collections/LocalFSInstallShardTest.java
@@ -30,6 +30,12 @@ public class LocalFSInstallShardTest extends 
AbstractInstallShardTest {
           + "    <repository name=\"trackingBackupRepository\" 
class=\"org.apache.solr.core.TrackingBackupRepository\"> \n"
           + "      <str name=\"delegateRepoName\">localfs</str>\n"
           + "    </repository>\n"
+          + "    <repository name=\"errorBackupRepository\" class=\""
+          + 
AbstractIncrementalBackupTest.ErrorThrowingTrackingBackupRepository.class.getName()
+          + "\"> \n"
+          + "      <str name=\"delegateRepoName\">localfs</str>\n"
+          + "      <str name=\"hostPort\">${hostPort:8983}</str>\n"
+          + "    </repository>\n"
           + "    <repository name=\"localfs\" 
class=\"org.apache.solr.core.backup.repository.LocalFileSystemRepository\"> \n"
           + "    </repository>\n"
           + "  </backup>\n";
@@ -43,7 +49,7 @@ public class LocalFSInstallShardTest extends 
AbstractInstallShardTest {
     final String tmpDirPrefix = whitespacesInPath ? "my install" : "myinstall";
     final String backupLocation = 
createTempDir(tmpDirPrefix).toAbsolutePath().toString();
 
-    configureCluster(1) // nodes
+    configureCluster(2) // nodes
         .addConfig(
             "conf1", 
TEST_PATH().resolve("configsets").resolve("cloud-minimal").resolve("conf"))
         .withSolrXml(SOLR_XML.replace("ALLOWPATHS_TEMPLATE_VAL", 
backupLocation))
diff --git 
a/solr/modules/gcs-repository/src/test/org/apache/solr/gcs/GCSIncrementalBackupTest.java
 
b/solr/modules/gcs-repository/src/test/org/apache/solr/gcs/GCSIncrementalBackupTest.java
index 846563b929f..d955da11e1e 100644
--- 
a/solr/modules/gcs-repository/src/test/org/apache/solr/gcs/GCSIncrementalBackupTest.java
+++ 
b/solr/modules/gcs-repository/src/test/org/apache/solr/gcs/GCSIncrementalBackupTest.java
@@ -55,6 +55,12 @@ public class GCSIncrementalBackupTest extends 
AbstractIncrementalBackupTest {
           + "  </solrcloud>\n"
           + "  \n"
           + "  <backup>\n"
+          + "    <repository name=\"errorBackupRepository\" class=\""
+          + ErrorThrowingTrackingBackupRepository.class.getName()
+          + "\"> \n"
+          + "      <str name=\"delegateRepoName\">localfs</str>\n"
+          + "      <str name=\"hostPort\">${hostPort:8983}</str>\n"
+          + "    </repository>\n"
           + "    <repository name=\"trackingBackupRepository\" 
class=\"org.apache.solr.core.TrackingBackupRepository\"> \n"
           + "      <str name=\"delegateRepoName\">localfs</str>\n"
           + "    </repository>\n"
diff --git 
a/solr/modules/gcs-repository/src/test/org/apache/solr/gcs/GCSInstallShardTest.java
 
b/solr/modules/gcs-repository/src/test/org/apache/solr/gcs/GCSInstallShardTest.java
index ecb08fa0192..4b78c0cc805 100644
--- 
a/solr/modules/gcs-repository/src/test/org/apache/solr/gcs/GCSInstallShardTest.java
+++ 
b/solr/modules/gcs-repository/src/test/org/apache/solr/gcs/GCSInstallShardTest.java
@@ -19,6 +19,7 @@ package org.apache.solr.gcs;
 
 import com.carrotsearch.randomizedtesting.annotations.ThreadLeakLingering;
 import org.apache.lucene.tests.util.LuceneTestCase;
+import org.apache.solr.cloud.api.collections.AbstractIncrementalBackupTest;
 import org.apache.solr.cloud.api.collections.AbstractInstallShardTest;
 import org.apache.solr.handler.admin.api.InstallShardData;
 import org.junit.AfterClass;
@@ -40,6 +41,12 @@ public class GCSInstallShardTest extends 
AbstractInstallShardTest {
           + "    <repository name=\"trackingBackupRepository\" 
class=\"org.apache.solr.core.TrackingBackupRepository\"> \n"
           + "      <str name=\"delegateRepoName\">localfs</str>\n"
           + "    </repository>\n"
+          + "    <repository name=\"errorBackupRepository\" class=\""
+          + 
AbstractIncrementalBackupTest.ErrorThrowingTrackingBackupRepository.class.getName()
+          + "\"> \n"
+          + "      <str name=\"delegateRepoName\">localfs</str>\n"
+          + "      <str name=\"hostPort\">${hostPort:8983}</str>\n"
+          + "    </repository>\n"
           + "    <repository name=\"localfs\" 
class=\"org.apache.solr.gcs.LocalStorageGCSBackupRepository\"> \n"
           + "      <str name=\"gcsBucket\">someBucketName</str>\n"
           + "      <str name=\"location\">backup1</str>\n"
@@ -51,7 +58,7 @@ public class GCSInstallShardTest extends 
AbstractInstallShardTest {
   @BeforeClass
   public static void setupClass() throws Exception {
 
-    configureCluster(1) // nodes
+    configureCluster(2) // nodes
         .addConfig("conf1", getFile("conf/solrconfig.xml").getParent())
         .withSolrXml(SOLR_XML)
         .configure();
diff --git 
a/solr/modules/s3-repository/src/test/org/apache/solr/s3/S3IncrementalBackupTest.java
 
b/solr/modules/s3-repository/src/test/org/apache/solr/s3/S3IncrementalBackupTest.java
index 80c5207505b..c35dbc17ab0 100644
--- 
a/solr/modules/s3-repository/src/test/org/apache/solr/s3/S3IncrementalBackupTest.java
+++ 
b/solr/modules/s3-repository/src/test/org/apache/solr/s3/S3IncrementalBackupTest.java
@@ -22,6 +22,7 @@ import 
com.carrotsearch.randomizedtesting.annotations.ThreadLeakLingering;
 import java.lang.invoke.MethodHandles;
 import org.apache.lucene.tests.util.LuceneTestCase;
 import org.apache.solr.cloud.api.collections.AbstractIncrementalBackupTest;
+import org.apache.solr.util.LogLevel;
 import org.junit.BeforeClass;
 import org.junit.ClassRule;
 import org.slf4j.Logger;
@@ -31,6 +32,9 @@ import software.amazon.awssdk.regions.Region;
 // Backups do checksum validation against a footer value not present in 
'SimpleText'
 @LuceneTestCase.SuppressCodecs({"SimpleText"})
 @ThreadLeakLingering(linger = 10)
+@LogLevel(
+    value =
+        
"org.apache.solr.cloud=DEBUG;org.apache.solr.cloud.api.collections=DEBUG;org.apache.solr.cloud.overseer=DEBUG")
 public class S3IncrementalBackupTest extends AbstractIncrementalBackupTest {
   private static final Logger log = 
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
 
@@ -64,6 +68,12 @@ public class S3IncrementalBackupTest extends 
AbstractIncrementalBackupTest {
           + "  </solrcloud>\n"
           + "  \n"
           + "  <backup>\n"
+          + "    <repository name=\"errorBackupRepository\" class=\""
+          + ErrorThrowingTrackingBackupRepository.class.getName()
+          + "\"> \n"
+          + "      <str name=\"delegateRepoName\">s3</str>\n"
+          + "      <str name=\"hostPort\">${hostPort:8983}</str>\n"
+          + "    </repository>\n"
           + "    <repository name=\"trackingBackupRepository\" 
class=\"org.apache.solr.core.TrackingBackupRepository\"> \n"
           + "      <str name=\"delegateRepoName\">s3</str>\n"
           + "    </repository>\n"
@@ -107,6 +117,11 @@ public class S3IncrementalBackupTest extends 
AbstractIncrementalBackupTest {
         .addConfig("conf1", getFile("conf/solrconfig.xml").getParent())
         .withSolrXml(
             SOLR_XML
+                // Only a single node will have a bad bucket name, all else 
should succeed.
+                // The bad node will be added later
+                .replace("BAD_BUCKET_ALL_BUT_ONE", "non-existent")
+                .replace("BAD_BUCKET_ONE", BUCKET_NAME)
+                .replace("BAD_BUCKET", BUCKET_NAME)
                 .replace("BUCKET", BUCKET_NAME)
                 .replace("REGION", Region.US_EAST_1.id())
                 .replace("ENDPOINT", "http://localhost:"; + 
S3_MOCK_RULE.getHttpPort()))
diff --git 
a/solr/modules/s3-repository/src/test/org/apache/solr/s3/S3InstallShardTest.java
 
b/solr/modules/s3-repository/src/test/org/apache/solr/s3/S3InstallShardTest.java
index 194b2ffddc6..c44e2170a39 100644
--- 
a/solr/modules/s3-repository/src/test/org/apache/solr/s3/S3InstallShardTest.java
+++ 
b/solr/modules/s3-repository/src/test/org/apache/solr/s3/S3InstallShardTest.java
@@ -20,6 +20,7 @@ package org.apache.solr.s3;
 import com.adobe.testing.s3mock.junit4.S3MockRule;
 import com.carrotsearch.randomizedtesting.annotations.ThreadLeakLingering;
 import org.apache.lucene.tests.util.LuceneTestCase;
+import org.apache.solr.cloud.api.collections.AbstractIncrementalBackupTest;
 import org.apache.solr.cloud.api.collections.AbstractInstallShardTest;
 import org.apache.solr.handler.admin.api.InstallShardData;
 import org.junit.BeforeClass;
@@ -44,6 +45,12 @@ public class S3InstallShardTest extends 
AbstractInstallShardTest {
           + "    <repository name=\"trackingBackupRepository\" 
class=\"org.apache.solr.core.TrackingBackupRepository\"> \n"
           + "      <str name=\"delegateRepoName\">s3</str>\n"
           + "    </repository>\n"
+          + "    <repository name=\"errorBackupRepository\" class=\""
+          + 
AbstractIncrementalBackupTest.ErrorThrowingTrackingBackupRepository.class.getName()
+          + "\"> \n"
+          + "      <str name=\"delegateRepoName\">s3</str>\n"
+          + "      <str name=\"hostPort\">${hostPort:8983}</str>\n"
+          + "    </repository>\n"
           + "    <repository name=\"s3\" 
class=\"org.apache.solr.s3.S3BackupRepository\"> \n"
           + "      <str name=\"s3.bucket.name\">BUCKET</str>\n"
           + "      <str name=\"s3.region\">REGION</str>\n"
@@ -65,7 +72,7 @@ public class S3InstallShardTest extends 
AbstractInstallShardTest {
 
     AbstractS3ClientTest.setS3ConfFile();
 
-    configureCluster(1) // nodes
+    configureCluster(2) // nodes
         .addConfig("conf1", getFile("conf/solrconfig.xml").getParent())
         .withSolrXml(
             SOLR_XML
diff --git 
a/solr/test-framework/src/java/org/apache/solr/cloud/MiniSolrCloudCluster.java 
b/solr/test-framework/src/java/org/apache/solr/cloud/MiniSolrCloudCluster.java
index ae9a8fe87f6..121a212270d 100644
--- 
a/solr/test-framework/src/java/org/apache/solr/cloud/MiniSolrCloudCluster.java
+++ 
b/solr/test-framework/src/java/org/apache/solr/cloud/MiniSolrCloudCluster.java
@@ -468,6 +468,15 @@ public class MiniSolrCloudCluster implements SolrBackend {
     return startJettySolrRunner(newNodeName(), jettyConfig, null);
   }
 
+  /**
+   * Start a new Solr instance, using the default config but with a custom 
Solr xml
+   *
+   * @return a JettySolrRunner
+   */
+  public JettySolrRunner startJettySolrRunner(String solrXml) throws Exception 
{
+    return startJettySolrRunner(newNodeName(), jettyConfig, solrXml);
+  }
+
   /**
    * Add a previously stopped node back to the cluster on a different port
    *
diff --git 
a/solr/test-framework/src/java/org/apache/solr/cloud/api/collections/AbstractIncrementalBackupTest.java
 
b/solr/test-framework/src/java/org/apache/solr/cloud/api/collections/AbstractIncrementalBackupTest.java
index 5f62d6669f5..fb643e551a4 100644
--- 
a/solr/test-framework/src/java/org/apache/solr/cloud/api/collections/AbstractIncrementalBackupTest.java
+++ 
b/solr/test-framework/src/java/org/apache/solr/cloud/api/collections/AbstractIncrementalBackupTest.java
@@ -61,6 +61,7 @@ import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.common.cloud.Replica;
 import org.apache.solr.common.cloud.Slice;
 import org.apache.solr.common.cloud.ZkStateReader;
+import org.apache.solr.common.util.NamedList;
 import org.apache.solr.core.DirectoryFactory;
 import org.apache.solr.core.SolrCore;
 import org.apache.solr.core.TrackingBackupRepository;
@@ -89,12 +90,13 @@ public abstract class AbstractIncrementalBackupTest extends 
SolrCloudTestCase {
   private static final Logger log = 
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
 
   private static long docsSeed; // see indexDocs()
-  protected static final int NUM_NODES = 2;
+  protected static final int NUM_NODES = 3;
   protected static final int NUM_SHARDS = 2; // granted we sometimes shard 
split to get more
   protected static final int LARGE_NUM_SHARDS = 11; // Periodically chosen via 
randomization
   protected static final int REPL_FACTOR = 2;
   protected static final String BACKUPNAME_PREFIX = "mytestbackup";
   protected static final String BACKUP_REPO_NAME = "trackingBackupRepository";
+  protected static final String ERROR_BACKUP_REPO_NAME = 
"errorBackupRepository";
 
   protected String testSuffix = "test1";
 
@@ -491,6 +493,125 @@ public abstract class AbstractIncrementalBackupTest 
extends SolrCloudTestCase {
     }
   }
 
+  @Test
+  public void testRestoreToOriginalSucceedsWithErrors() throws Exception {
+    setTestSuffix("testRestoreToOriginalSucceedsOnASingleError");
+    final String backupCollectionName = getCollectionName();
+    final String backupName = BACKUPNAME_PREFIX + testSuffix;
+
+    // Bootstrap the backup collection with seed docs
+    CollectionAdminRequest.createCollection(backupCollectionName, "conf1", 
NUM_SHARDS, NUM_NODES)
+        .process(cluster.getSolrClient());
+    int backupDocs = indexDocs(backupCollectionName, true);
+
+    // Backup and immediately add more docs to the collection
+    try (BackupRepository repository =
+        cluster
+            .getJettySolrRunner(0)
+            .getCoreContainer()
+            .newBackupRepository(ERROR_BACKUP_REPO_NAME)) {
+      final String backupLocation = 
repository.getBackupLocation(getBackupLocation());
+      final RequestStatusState result =
+          CollectionAdminRequest.backupCollection(backupCollectionName, 
backupName)
+              .setBackupConfigset(false)
+              .setLocation(backupLocation)
+              .setRepositoryName(ERROR_BACKUP_REPO_NAME)
+              .processAndWait(cluster.getSolrClient(), 20);
+      assertEquals(RequestStatusState.COMPLETED, result);
+    }
+    assertEquals(backupDocs, getNumDocsInCollection(backupCollectionName));
+    clearDocs(backupCollectionName);
+    assertEquals(0, getNumDocsInCollection(backupCollectionName));
+
+    /*
+    Restore original docs and validate that doc count is correct
+    */
+    // Test a single bad node
+    try (BackupRepository repository =
+        cluster
+            .getJettySolrRunner(0)
+            .getCoreContainer()
+            .newBackupRepository(ERROR_BACKUP_REPO_NAME)) {
+      // Only the first jetty will fail
+      ErrorThrowingTrackingBackupRepository.portsToFailOn =
+          Set.of(cluster.getJettySolrRunner(0).getLocalPort());
+      final String backupLocation = 
repository.getBackupLocation(getBackupLocation());
+      final RequestStatusState result =
+          CollectionAdminRequest.restoreCollection(backupCollectionName, 
backupName)
+              .setLocation(backupLocation)
+              .setRepositoryName(ERROR_BACKUP_REPO_NAME)
+              .processAndWait(cluster.getSolrClient(), 30);
+      assertEquals(RequestStatusState.COMPLETED, result);
+      waitForState(
+          "The failed core-install should recover and become healthy",
+          backupCollectionName,
+          30,
+          TimeUnit.SECONDS,
+          SolrCloudTestCase.activeClusterShape(NUM_SHARDS, NUM_SHARDS * 
NUM_NODES));
+    }
+    assertEquals(backupDocs, getNumDocsInCollection(backupCollectionName));
+    clearDocs(backupCollectionName);
+    assertEquals(0, getNumDocsInCollection(backupCollectionName));
+
+    // Test a single good node
+    try (BackupRepository repository =
+        cluster
+            .getJettySolrRunner(0)
+            .getCoreContainer()
+            .newBackupRepository(ERROR_BACKUP_REPO_NAME)) {
+      final String backupLocation = 
repository.getBackupLocation(getBackupLocation());
+      // All but the first jetty will fail
+      ErrorThrowingTrackingBackupRepository.portsToFailOn =
+          cluster.getJettySolrRunners().subList(1, NUM_NODES).stream()
+              .map(JettySolrRunner::getLocalPort)
+              .collect(Collectors.toSet());
+      final RequestStatusState result =
+          CollectionAdminRequest.restoreCollection(backupCollectionName, 
backupName)
+              .setLocation(backupLocation)
+              .setRepositoryName(ERROR_BACKUP_REPO_NAME)
+              .processAndWait(cluster.getSolrClient(), 30);
+      assertEquals(RequestStatusState.COMPLETED, result);
+      waitForState(
+          "The failed core-install should recover and become healthy",
+          backupCollectionName,
+          30,
+          TimeUnit.SECONDS,
+          SolrCloudTestCase.activeClusterShape(NUM_SHARDS, NUM_SHARDS * 
NUM_NODES));
+    }
+    assertEquals(backupDocs, getNumDocsInCollection(backupCollectionName));
+  }
+
+  public static class ErrorThrowingTrackingBackupRepository extends 
TrackingBackupRepository {
+
+    public static Set<Integer> portsToFailOn = new HashSet<>();
+
+    private int port;
+
+    @Override
+    public void init(NamedList<?> args) {
+      super.init(args);
+      port = Integer.parseInt((String) args.get("hostPort"));
+    }
+
+    @Override
+    public void copyFileTo(URI sourceRepo, String fileName, Directory dest) 
throws IOException {
+      if (portsToFailOn.contains(port)) {
+        throw new UnsupportedOperationException();
+      }
+      super.copyFileTo(sourceRepo, fileName, dest);
+    }
+
+    @Override
+    public void copyIndexFileTo(
+        URI sourceRepo, String sourceFileName, Directory dest, String 
destFileName)
+        throws IOException {
+      if (portsToFailOn.contains(port)) {
+        throw new UnsupportedOperationException();
+      }
+      super.copyIndexFileTo(sourceRepo, sourceFileName, dest, destFileName);
+    }
+  }
+
   protected void corruptIndexFiles() throws IOException {
     List<Slice> slices = new 
ArrayList<>(getCollectionState(getCollectionName()).getSlices());
     Replica leader = slices.get(random().nextInt(slices.size())).getLeader();
@@ -567,6 +688,14 @@ public abstract class AbstractIncrementalBackupTest 
extends SolrCloudTestCase {
     
CollectionAdminRequest.deleteCollection(restoreCollectionName).process(solrClient);
   }
 
+  protected void clearDocs(String collectionName) throws Exception {
+    
CollectionAdminRequest.deleteCollection(collectionName).process(cluster.getSolrClient());
+    CollectionAdminRequest.createCollection(collectionName, "conf1", 
NUM_SHARDS, NUM_NODES)
+        .process(cluster.getSolrClient());
+
+    log.info("Cleared all docs in collection: {}", collectionName);
+  }
+
   private void indexDocs(String collectionName, int numDocs, boolean useUUID) 
throws Exception {
     Random random = new Random(docsSeed);
 
@@ -605,7 +734,7 @@ public abstract class AbstractIncrementalBackupTest extends 
SolrCloudTestCase {
     }
   }
 
-  private long getNumDocsInCollection(String collectionName) throws Exception {
+  protected long getNumDocsInCollection(String collectionName) throws 
Exception {
     return new QueryRequest(new SolrQuery("*:*"))
         .process(cluster.getSolrClient(), collectionName)
         .getResults()
diff --git 
a/solr/test-framework/src/java/org/apache/solr/cloud/api/collections/AbstractInstallShardTest.java
 
b/solr/test-framework/src/java/org/apache/solr/cloud/api/collections/AbstractInstallShardTest.java
index ac1cc7b2b44..86d12f4a7bd 100644
--- 
a/solr/test-framework/src/java/org/apache/solr/cloud/api/collections/AbstractInstallShardTest.java
+++ 
b/solr/test-framework/src/java/org/apache/solr/cloud/api/collections/AbstractInstallShardTest.java
@@ -28,6 +28,7 @@ import java.util.Collection;
 import java.util.List;
 import java.util.Map;
 import java.util.Random;
+import java.util.Set;
 import java.util.UUID;
 import java.util.concurrent.Callable;
 import java.util.concurrent.ExecutionException;
@@ -72,6 +73,7 @@ public abstract class AbstractInstallShardTest extends 
SolrCloudTestCase {
   private static final Logger log = 
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
 
   protected static final String BACKUP_REPO_NAME = "trackingBackupRepository";
+  protected static final String ERROR_BACKUP_REPO_NAME = 
"errorBackupRepository";
 
   private static long docsSeed; // see indexDocs()
 
@@ -93,20 +95,20 @@ public abstract class AbstractInstallShardTest extends 
SolrCloudTestCase {
     }
   }
 
-  private String deleteAfterTest(String collName) {
+  protected String deleteAfterTest(String collName) {
     collectionsToDelete.add(collName);
     return collName;
   }
 
   // Populated by 'bootstrapBackupRepositoryData'
-  private static int singleShardNumDocs = -1;
-  private static int replicasPerShard = -1;
-  private static int multiShardNumDocs = -1;
-  private static URI singleShard1Uri = null;
-  private static URI nonExistentLocationUri = null;
-  private static URI[] multiShardUris = null;
+  protected static int singleShardNumDocs = -1;
+  protected static int replicasPerShard = -1;
+  protected static int multiShardNumDocs = -1;
+  protected static URI singleShard1Uri = null;
+  protected static URI nonExistentLocationUri = null;
+  protected static URI[] multiShardUris = null;
 
-  private List<String> collectionsToDelete;
+  protected List<String> collectionsToDelete;
 
   public static void bootstrapBackupRepositoryData(String 
baseRepositoryLocation) throws Exception {
     final int numShards = /*random().nextInt(3) + 2*/ 4;
@@ -175,6 +177,12 @@ public abstract class AbstractInstallShardTest extends 
SolrCloudTestCase {
     CollectionAdminRequest.installDataToShard(
             collectionName, "shard1", singleShardLocation, BACKUP_REPO_NAME)
         .process(cluster.getSolrClient());
+    waitForState(
+        "The failed core-install (previous leader) should recover and become 
healthy",
+        collectionName,
+        30,
+        TimeUnit.SECONDS,
+        SolrCloudTestCase.activeClusterShape(1, replicasPerShard));
 
     assertCollectionHasNumDocs(collectionName, singleShardNumDocs);
   }
@@ -238,6 +246,45 @@ public abstract class AbstractInstallShardTest extends 
SolrCloudTestCase {
     assertCollectionHasNumDocs(collectionName, multiShardNumDocs);
   }
 
+  @Test
+  public void testInstallSucceedsOnASingleError() throws Exception {
+    final String collectionName = createAndAwaitEmptyCollection(1, 2);
+    deleteAfterTest(collectionName);
+    enableReadOnly(collectionName);
+
+    
AbstractIncrementalBackupTest.ErrorThrowingTrackingBackupRepository.portsToFailOn
 =
+        Set.of(cluster.getJettySolrRunner(0).getLocalPort());
+    final String singleShardLocation = singleShard1Uri.toString();
+    { // Test synchronous request error reporting
+      CollectionAdminRequest.installDataToShard(
+              collectionName, "shard1", singleShardLocation, 
ERROR_BACKUP_REPO_NAME)
+          .process(cluster.getSolrClient());
+      waitForState(
+          "The failed core-install should recover and become healthy",
+          collectionName,
+          30,
+          TimeUnit.SECONDS,
+          SolrCloudTestCase.activeClusterShape(1, 2));
+      assertCollectionHasNumDocs(collectionName, singleShardNumDocs);
+    }
+
+    { // Test asynchronous request error reporting
+      final var requestStatusState =
+          CollectionAdminRequest.installDataToShard(
+                  collectionName, "shard1", singleShardLocation, 
ERROR_BACKUP_REPO_NAME)
+              .processAndWait(cluster.getSolrClient(), 15);
+
+      assertEquals(RequestStatusState.COMPLETED, requestStatusState);
+      waitForState(
+          "The failed core-install should recover and become healthy",
+          collectionName,
+          30,
+          TimeUnit.SECONDS,
+          SolrCloudTestCase.activeClusterShape(1, 2));
+      assertCollectionHasNumDocs(collectionName, singleShardNumDocs);
+    }
+  }
+
   /**
    * Builds a string representation of a valid solr.xml configuration, with 
the provided
    * backup-repository configuration inserted
@@ -272,7 +319,7 @@ public abstract class AbstractInstallShardTest extends 
SolrCloudTestCase {
         + "</solr>\n";
   }
 
-  private static void assertCollectionHasNumDocs(String collection, int 
expectedNumDocs)
+  protected static void assertCollectionHasNumDocs(String collection, int 
expectedNumDocs)
       throws Exception {
     final SolrClient solrClient = cluster.getSolrClient();
     assertEquals(
@@ -364,7 +411,7 @@ public abstract class AbstractInstallShardTest extends 
SolrCloudTestCase {
     log.info("Indexed {} docs to collection: {}", numDocs, collectionName);
   }
 
-  private static String createAndAwaitEmptyCollection(int numShards, int 
replicasPerShard)
+  protected static String createAndAwaitEmptyCollection(int numShards, int 
replicasPerShard)
       throws Exception {
     final SolrClient solrClient = cluster.getSolrClient();
 
@@ -377,7 +424,7 @@ public abstract class AbstractInstallShardTest extends 
SolrCloudTestCase {
     return collectionName;
   }
 
-  private static void enableReadOnly(String collectionName) throws Exception {
+  protected static void enableReadOnly(String collectionName) throws Exception 
{
     CollectionAdminRequest.modifyCollection(collectionName, Map.of("readOnly", 
true))
         .process(cluster.getSolrClient());
   }

Reply via email to