This is an automated email from the ASF dual-hosted git repository.

sodonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/master by this push:
     new 2747bf2fbf HDDS-9591. Replication Manager could incorrectly use 
QUASI_CLOSED replicas as replication sources for CLOSED containers (#5537)
2747bf2fbf is described below

commit 2747bf2fbfc6cb83b5a915fda8994246a874ede8
Author: Siddhant Sangwan <[email protected]>
AuthorDate: Tue Nov 7 21:47:36 2023 +0530

    HDDS-9591. Replication Manager could incorrectly use QUASI_CLOSED replicas 
as replication sources for CLOSED containers (#5537)
---
 .../replication/RatisUnderReplicationHandler.java  | 25 ++++++++++--
 .../TestRatisUnderReplicationHandler.java          | 47 ++++++++++++++++++++++
 2 files changed, 68 insertions(+), 4 deletions(-)

diff --git 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisUnderReplicationHandler.java
 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisUnderReplicationHandler.java
index 931da903ea..5af7a16df2 100644
--- 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisUnderReplicationHandler.java
+++ 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisUnderReplicationHandler.java
@@ -21,6 +21,7 @@ package org.apache.hadoop.hdds.scm.container.replication;
 import org.apache.hadoop.hdds.conf.ConfigurationSource;
 import org.apache.hadoop.hdds.conf.StorageUnit;
 import org.apache.hadoop.hdds.protocol.DatanodeDetails;
+import org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState;
 import 
org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State;
 import org.apache.hadoop.hdds.scm.PlacementPolicy;
 import org.apache.hadoop.hdds.scm.ScmConfigKeys;
@@ -244,8 +245,8 @@ public class RatisUnderReplicationHandler
    * @param pendingOps List of pending ContainerReplicaOp
    * @return List of healthy datanodes that have closed/quasi-closed replicas
    * (or UNHEALTHY replicas if they're the only ones available) and are not
-   * pending replica deletion. Sorted in descending order of
-   * sequence id.
+   * pending replica deletion. If there is a maximum sequence ID, then only
+   * replicas with that sequence ID are returned.
    */
   private List<DatanodeDetails> getSources(
       RatisContainerReplicaCount replicaCount,
@@ -259,8 +260,24 @@ public class RatisUnderReplicationHandler
     }
 
     Predicate<ContainerReplica> predicate =
-        replica -> replica.getState() == State.CLOSED ||
-        replica.getState() == State.QUASI_CLOSED;
+        replica -> replica.getState() == State.CLOSED;
+
+    /*
+    If no CLOSED replicas are available, or if the container itself is
+    QUASI_CLOSED, then QUASI_CLOSED replicas are allowed to be sources.
+    */
+    boolean hasClosedReplica = false;
+    for (ContainerReplica replica : replicaCount.getReplicas()) {
+      if (replica.getState() == State.CLOSED) {
+        hasClosedReplica = true;
+        break;
+      }
+    }
+    if (!hasClosedReplica ||
+        replicaCount.getContainer().getState() == LifeCycleState.QUASI_CLOSED) 
{
+      predicate =
+          predicate.or(replica -> replica.getState() == State.QUASI_CLOSED);
+    }
 
     if (replicaCount.getHealthyReplicaCount() == 0) {
       predicate = predicate.or(
diff --git 
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestRatisUnderReplicationHandler.java
 
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestRatisUnderReplicationHandler.java
index 3155f149a5..8e37aade06 100644
--- 
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestRatisUnderReplicationHandler.java
+++ 
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestRatisUnderReplicationHandler.java
@@ -59,6 +59,7 @@ import static 
org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalSt
 import static 
org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_MAINTENANCE;
 import static 
org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_SERVICE;
 import static 
org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor.THREE;
+import static 
org.apache.hadoop.hdds.scm.container.replication.ReplicationTestUtil.createContainer;
 import static 
org.apache.hadoop.hdds.scm.container.replication.ReplicationTestUtil.createContainerInfo;
 import static 
org.apache.hadoop.hdds.scm.container.replication.ReplicationTestUtil.createContainerReplica;
 import static 
org.apache.hadoop.hdds.scm.container.replication.ReplicationTestUtil.createReplicas;
@@ -561,6 +562,52 @@ public class TestRatisUnderReplicationHandler {
             command.getKey()));
   }
 
+  @Test
+  public void testOnlyClosedReplicasOfClosedContainersAreSources()
+      throws IOException {
+    container = createContainerInfo(RATIS_REPLICATION_CONFIG, 1,
+        HddsProtos.LifeCycleState.CLOSED, 1);
+
+    final Set<ContainerReplica> replicas = new HashSet<>(2);
+    final ContainerReplica closedReplica =
+        createContainerReplica(container.containerID(), 0, IN_SERVICE,
+            State.CLOSED, 1);
+    replicas.add(closedReplica);
+    replicas.add(createContainerReplica(container.containerID(), 0,
+            IN_SERVICE, State.QUASI_CLOSED, 1));
+
+    final Set<Pair<DatanodeDetails, SCMCommand<?>>> commands =
+        testProcessing(replicas, Collections.emptyList(),
+            getUnderReplicatedHealthResult(), 2, 1);
+    commands.forEach(
+        command -> Assert.assertEquals(closedReplica.getDatanodeDetails(),
+            command.getKey()));
+  }
+
+  @Test
+  public void testQuasiClosedReplicasAreSourcesWhenOnlyTheyAreAvailable()
+      throws IOException {
+    container = createContainerInfo(RATIS_REPLICATION_CONFIG, 1,
+        HddsProtos.LifeCycleState.CLOSED, 1);
+
+    Set<ContainerReplica> replicas = new HashSet<>(1);
+    replicas.add(createContainerReplica(container.containerID(), 0,
+        IN_SERVICE, State.QUASI_CLOSED, 1));
+
+    testProcessing(replicas, Collections.emptyList(),
+        getUnderReplicatedHealthResult(), 2, 2);
+
+    // test the same, but for a QUASI_CLOSED container
+    container = createContainer(HddsProtos.LifeCycleState.QUASI_CLOSED,
+        RATIS_REPLICATION_CONFIG);
+    replicas = new HashSet<>(1);
+    replicas.add(createContainerReplica(container.containerID(), 0,
+        IN_SERVICE, State.QUASI_CLOSED, container.getSequenceId()));
+
+    commandsSent.clear();
+    testProcessing(replicas, Collections.emptyList(),
+            getUnderReplicatedHealthResult(), 2, 2);
+  }
 
   /**
    * Tests whether the specified expectNumCommands number of commands are


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to