This is an automated email from the ASF dual-hosted git repository.
adoroszlai pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new be408031e3 HDDS-7804. UNHEALTHY replicas will not contribute to
sufficient replication in RatisContainerReplicaCount (#4192)
be408031e3 is described below
commit be408031e3434daec5c688994377b7216412a7d3
Author: Siddhant Sangwan <[email protected]>
AuthorDate: Fri Jan 20 19:36:11 2023 +0530
HDDS-7804. UNHEALTHY replicas will not contribute to sufficient replication
in RatisContainerReplicaCount (#4192)
---
.../LegacyRatisContainerReplicaCount.java | 2 +-
.../replication/RatisContainerReplicaCount.java | 102 +++++++++++++++++++--
.../TestRatisContainerReplicaCount.java | 46 ++++++++++
.../TestRatisOverReplicationHandler.java | 25 -----
4 files changed, 140 insertions(+), 35 deletions(-)
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/LegacyRatisContainerReplicaCount.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/LegacyRatisContainerReplicaCount.java
index a2139707ca..1cd9c37e29 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/LegacyRatisContainerReplicaCount.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/LegacyRatisContainerReplicaCount.java
@@ -46,6 +46,6 @@ public class LegacyRatisContainerReplicaCount extends
@Override
protected int healthyReplicaCountAdapter() {
- return 0;
+ return -getMisMatchedReplicaCount();
}
}
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisContainerReplicaCount.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisContainerReplicaCount.java
index ca30d2d5b8..e65ac74ed1 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisContainerReplicaCount.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisContainerReplicaCount.java
@@ -19,11 +19,13 @@ package org.apache.hadoop.hdds.scm.container.replication;
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
+import
org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto;
import org.apache.hadoop.hdds.scm.container.ContainerInfo;
import org.apache.hadoop.hdds.scm.container.ContainerReplica;
import java.util.ArrayList;
import java.util.Comparator;
+import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
@@ -44,10 +46,11 @@ public class RatisContainerReplicaCount implements
ContainerReplicaCount {
private int healthyReplicaCount;
private int unhealthyReplicaCount;
+ private int misMatchedReplicaCount;
private int decommissionCount;
private int maintenanceCount;
- private final int inFlightAdd;
- private final int inFlightDel;
+ private int inFlightAdd;
+ private int inFlightDel;
private final int repFactor;
private final int minHealthyForMaintenance;
private final ContainerInfo container;
@@ -60,6 +63,7 @@ public class RatisContainerReplicaCount implements
ContainerReplicaCount {
int minHealthyForMaintenance) {
this.unhealthyReplicaCount = 0;
this.healthyReplicaCount = 0;
+ this.misMatchedReplicaCount = 0;
this.decommissionCount = 0;
this.maintenanceCount = 0;
this.inFlightAdd = inFlightAdd;
@@ -84,11 +88,86 @@ public class RatisContainerReplicaCount implements
ContainerReplicaCount {
} else if (state == IN_MAINTENANCE || state == ENTERING_MAINTENANCE) {
maintenanceCount++;
} else {
- if (LegacyReplicationManager.compareState(container.getState(),
+ if (cr.getState() == ContainerReplicaProto.State.UNHEALTHY) {
+ unhealthyReplicaCount++;
+ } else if (!LegacyReplicationManager.compareState(container.getState(),
cr.getState())) {
healthyReplicaCount++;
+ misMatchedReplicaCount++;
} else {
+ healthyReplicaCount++;
+ }
+ }
+ }
+ }
+
+ public RatisContainerReplicaCount(ContainerInfo containerInfo,
+ Set<ContainerReplica> replicas,
+ List<ContainerReplicaOp> replicaPendingOps,
+ int minHealthyForMaintenance) {
+ // Iterate replicas in deterministic order to avoid potential data loss
+ // on delete.
+ // See https://issues.apache.org/jira/browse/HDDS-4589.
+ // N.B., sort replicas by (containerID, datanodeDetails).
+ this.replicas = replicas.stream()
+ .sorted(Comparator.comparingLong(ContainerReplica::hashCode))
+ .collect(Collectors.toList());
+
+ this.container = containerInfo;
+ this.repFactor = containerInfo.getReplicationFactor().getNumber();
+ this.minHealthyForMaintenance
+ = Math.min(this.repFactor, minHealthyForMaintenance);
+
+ this.unhealthyReplicaCount = 0;
+ this.healthyReplicaCount = 0;
+ this.misMatchedReplicaCount = 0;
+ this.decommissionCount = 0;
+ this.maintenanceCount = 0;
+ this.inFlightAdd = 0;
+ this.inFlightDel = 0;
+
+ // collect DNs that have UNHEALTHY replicas
+ Set<DatanodeDetails> unhealthyReplicaDNs = new HashSet<>();
+ for (ContainerReplica r : replicas) {
+ if (r.getState() == ContainerReplicaProto.State.UNHEALTHY) {
+ unhealthyReplicaDNs.add(r.getDatanodeDetails());
+ }
+ }
+
+ // count pending adds and deletes
+ for (ContainerReplicaOp op : replicaPendingOps) {
+ if (op.getOpType() == ContainerReplicaOp.PendingOpType.ADD) {
+ inFlightAdd++;
+ } else if (op.getOpType() == ContainerReplicaOp.PendingOpType.DELETE) {
+ if (!unhealthyReplicaDNs.contains(op.getTarget())) {
+ /*
+ We don't count UNHEALTHY replicas when considering sufficient
+ replication, so we also need to ignore pending deletes on
+ those unhealthy replicas, otherwise the pending delete will
+ decrement the healthy count and make the container appear
+ under-replicated when it is not.
+ */
+ inFlightDel++;
+ }
+ }
+ }
+
+ for (ContainerReplica cr : this.replicas) {
+ HddsProtos.NodeOperationalState state =
+ cr.getDatanodeDetails().getPersistedOpState();
+ if (state == DECOMMISSIONED || state == DECOMMISSIONING) {
+ decommissionCount++;
+ } else if (state == IN_MAINTENANCE || state == ENTERING_MAINTENANCE) {
+ maintenanceCount++;
+ } else {
+ if (cr.getState() == ContainerReplicaProto.State.UNHEALTHY) {
unhealthyReplicaCount++;
+ } else if (!LegacyReplicationManager.compareState(container.getState(),
+ cr.getState())) {
+ healthyReplicaCount++;
+ misMatchedReplicaCount++;
+ } else {
+ healthyReplicaCount++;
}
}
}
@@ -102,15 +181,20 @@ public class RatisContainerReplicaCount implements
ContainerReplicaCount {
return unhealthyReplicaCount;
}
+ public int getMisMatchedReplicaCount() {
+ return misMatchedReplicaCount;
+ }
+
/**
- * The new replication manager currently counts unhealthy and healthy
- * replicas together. This should be updated when changes from HDDS-6447
- * are integrated into the new replication manager. See
- * {@link LegacyRatisContainerReplicaCount}, which overrides this method, for
- * details.
+ * The new replication manager now does not consider replicas with
+ * UNHEALTHY state when counting sufficient replication. This method is
+ * overridden to ensure LegacyReplicationManager works as intended in
+ * HDDS-6447.
+ * See {@link LegacyRatisContainerReplicaCount}, which overrides this
+ * method, for details.
*/
protected int healthyReplicaCountAdapter() {
- return getUnhealthyReplicaCount();
+ return 0;
}
@Override
diff --git
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestRatisContainerReplicaCount.java
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestRatisContainerReplicaCount.java
index 7d4947dd64..0363af0e4e 100644
---
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestRatisContainerReplicaCount.java
+++
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestRatisContainerReplicaCount.java
@@ -17,6 +17,7 @@
*/
package org.apache.hadoop.hdds.scm.container.replication;
+import org.apache.hadoop.hdds.client.RatisReplicationConfig;
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.protocol.MockDatanodeDetails;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
@@ -26,7 +27,10 @@ import org.apache.hadoop.hdds.scm.container.ContainerReplica;
import org.junit.Assert;
import org.junit.jupiter.api.Test;
+import java.util.ArrayList;
+import java.util.Collections;
import java.util.HashSet;
+import java.util.List;
import java.util.Set;
import static
org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.DECOMMISSIONED;
@@ -35,7 +39,12 @@ import static
org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalSt
import static
org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_MAINTENANCE;
import static
org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_SERVICE;
import static
org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.CLOSED;
+import static
org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.CLOSING;
import static
org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.OPEN;
+import static
org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State.UNHEALTHY;
+import static
org.apache.hadoop.hdds.scm.container.replication.ReplicationTestUtil.createContainerInfo;
+import static
org.apache.hadoop.hdds.scm.container.replication.ReplicationTestUtil.createContainerReplica;
+import static
org.apache.hadoop.hdds.scm.container.replication.ReplicationTestUtil.createReplicas;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;
@@ -412,6 +421,43 @@ class TestRatisContainerReplicaCount {
assertFalse(rcnt.isHealthy());
}
+ @Test
+ void testSufficientReplicationWithMismatchedReplicaState() {
+ ContainerInfo container =
+ createContainerInfo(RatisReplicationConfig.getInstance(
+ HddsProtos.ReplicationFactor.THREE), 1L,
+ HddsProtos.LifeCycleState.CLOSED);
+ Set<ContainerReplica> replicas =
+ createReplicas(ContainerID.valueOf(1L), CLOSED, 0, 0);
+ replicas.add(createContainerReplica(ContainerID.valueOf(1L), 0,
+ IN_SERVICE, CLOSING));
+
+ RatisContainerReplicaCount rcnt =
+ new RatisContainerReplicaCount(container, replicas,
+ Collections.emptyList(), 2);
+ assertTrue(rcnt.isSufficientlyReplicated());
+ }
+
+ @Test
+ void testSufficientReplicationWithPendingDeleteOnUnhealthyReplica() {
+ ContainerInfo container =
+ createContainerInfo(RatisReplicationConfig.getInstance(
+ HddsProtos.ReplicationFactor.THREE), 1L,
+ HddsProtos.LifeCycleState.CLOSED);
+ Set<ContainerReplica> replicas =
+ createReplicas(ContainerID.valueOf(1L), CLOSED, 0, 0, 0);
+ ContainerReplica unhealthyReplica = createContainerReplica(
+ ContainerID.valueOf(1L), 0, IN_SERVICE, UNHEALTHY);
+ replicas.add(unhealthyReplica);
+
+ List<ContainerReplicaOp> ops = new ArrayList<>();
+ ops.add(ContainerReplicaOp.create(ContainerReplicaOp.PendingOpType.DELETE,
+ unhealthyReplica.getDatanodeDetails(), 0));
+ RatisContainerReplicaCount replicaCount =
+ new RatisContainerReplicaCount(container, replicas, ops, 2);
+ assertTrue(replicaCount.isSufficientlyReplicated());
+ }
+
@Test
void testIsHealthyWithMaintReplicaIsHealthy() {
Set<ContainerReplica> replica =
diff --git
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestRatisOverReplicationHandler.java
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestRatisOverReplicationHandler.java
index 0bf0f1d746..5b0b762307 100644
---
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestRatisOverReplicationHandler.java
+++
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestRatisOverReplicationHandler.java
@@ -149,31 +149,6 @@ public class TestRatisOverReplicationHandler {
getOverReplicatedHealthResult(), 0);
}
- /**
- * When a quasi closed container is over replicated, the handler should
- * prioritize creating delete commands for unhealthy replicas over quasi
- * closed replicas.
- */
- @Test
- public void testOverReplicatedQuasiClosedContainerWithUnhealthyReplica()
- throws IOException {
- container = createContainer(HddsProtos.LifeCycleState.QUASI_CLOSED,
- RATIS_REPLICATION_CONFIG);
- Set<ContainerReplica> replicas =
- createReplicasWithSameOrigin(container.containerID(),
- ContainerReplicaProto.State.QUASI_CLOSED, 0, 0, 0);
- ContainerReplica unhealthyReplica =
- createContainerReplica(container.containerID(), 0,
- HddsProtos.NodeOperationalState.IN_SERVICE,
- ContainerReplicaProto.State.UNHEALTHY);
- replicas.add(unhealthyReplica);
-
- Map<DatanodeDetails, SCMCommand<?>> commands = testProcessing(replicas,
- Collections.emptyList(), getOverReplicatedHealthResult(), 1);
- Assert.assertTrue(
- commands.containsKey(unhealthyReplica.getDatanodeDetails()));
- }
-
/**
* Handler should not create any delete commands if removing a replica
* makes the container mis replicated.
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]