This is an automated email from the ASF dual-hosted git repository.
sodonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new 4c6c1218a7 HDDS-7818. Modify Ratis Replication Handling in the new RM
(#4204)
4c6c1218a7 is described below
commit 4c6c1218a76b17de7d4b78a27a33f6c536f1c997
Author: Siddhant Sangwan <[email protected]>
AuthorDate: Thu Jan 26 15:42:56 2023 +0530
HDDS-7818. Modify Ratis Replication Handling in the new RM (#4204)
---
.../replication/ContainerHealthResult.java | 21 +++-
.../replication/RatisContainerReplicaCount.java | 15 +++
.../replication/RatisOverReplicationHandler.java | 24 ++--
.../replication/RatisUnderReplicationHandler.java | 12 +-
.../health/RatisReplicationCheckHandler.java | 82 +++++++------
.../replication/TestReplicationManager.java | 58 +++++++++
.../health/TestRatisReplicationCheckHandler.java | 131 +++++++++++++++++++++
7 files changed, 282 insertions(+), 61 deletions(-)
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerHealthResult.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerHealthResult.java
index d5b92935ce..259cf78360 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerHealthResult.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerHealthResult.java
@@ -80,7 +80,7 @@ public class ContainerHealthResult {
*/
public static class UnHealthyResult extends ContainerHealthResult {
- UnHealthyResult(ContainerInfo containerInfo) {
+ public UnHealthyResult(ContainerInfo containerInfo) {
super(containerInfo, HealthState.UNHEALTHY);
}
}
@@ -108,6 +108,7 @@ public class ContainerHealthResult {
private final boolean dueToDecommission;
private final boolean sufficientlyReplicatedAfterPending;
private final boolean unrecoverable;
+ private boolean hasHealthyReplicas;
private boolean hasUnReplicatedOfflineIndexes = false;
private int requeueCount = 0;
@@ -228,6 +229,14 @@ public class ContainerHealthResult {
public boolean hasUnreplicatedOfflineIndexes() {
return hasUnReplicatedOfflineIndexes;
}
+
+ public boolean hasHealthyReplicas() {
+ return hasHealthyReplicas;
+ }
+
+ public void setHasHealthyReplicas(boolean hasHealthyReplicas) {
+ this.hasHealthyReplicas = hasHealthyReplicas;
+ }
}
/**
@@ -268,7 +277,7 @@ public class ContainerHealthResult {
private final int excessRedundancy;
private final boolean sufficientlyReplicatedAfterPending;
-
+ private boolean hasMismatchedReplicas;
public OverReplicatedHealthResult(ContainerInfo containerInfo,
int excessRedundancy, boolean replicatedOkWithPending) {
@@ -300,5 +309,13 @@ public class ContainerHealthResult {
public boolean isReplicatedOkAfterPending() {
return sufficientlyReplicatedAfterPending;
}
+
+ public boolean hasMismatchedReplicas() {
+ return hasMismatchedReplicas;
+ }
+
+ public void setHasMismatchedReplicas(boolean hasMismatchedReplicas) {
+ this.hasMismatchedReplicas = hasMismatchedReplicas;
+ }
}
}
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisContainerReplicaCount.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisContainerReplicaCount.java
index e65ac74ed1..d76174c1a8 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisContainerReplicaCount.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisContainerReplicaCount.java
@@ -453,6 +453,21 @@ public class RatisContainerReplicaCount implements
ContainerReplicaCount {
return excessRedundancy;
}
+ /**
+ * Checks whether insufficient replication is because of some replicas
+ * being on datanodes that were decommissioned.
+ * @param includePendingAdd if pending adds should be considered
+ * @return true if there is insufficient replication and it's because of
+ * decommissioning.
+ */
+ public boolean inSufficientDueToDecommission(boolean includePendingAdd) {
+ if (isSufficientlyReplicated(includePendingAdd)) {
+ return false;
+ }
+ int delta = redundancyDelta(true, includePendingAdd);
+ return decommissionCount >= delta;
+ }
+
/**
* How many more replicas can be lost before the container is
* unreadable, assuming any infligh deletes will complete. For containers
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisOverReplicationHandler.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisOverReplicationHandler.java
index 856027a5af..13e71a0483 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisOverReplicationHandler.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisOverReplicationHandler.java
@@ -98,28 +98,22 @@ public class RatisOverReplicationHandler
)
.collect(Collectors.toSet());
- // count pending adds and deletes
- int pendingAdd = 0, pendingDelete = 0;
- for (ContainerReplicaOp op : pendingOps) {
- if (op.getOpType() == ContainerReplicaOp.PendingOpType.ADD) {
- pendingAdd++;
- } else if (op.getOpType() == ContainerReplicaOp.PendingOpType.DELETE) {
- pendingDelete++;
- }
- }
RatisContainerReplicaCount replicaCount =
new RatisContainerReplicaCount(containerInfo, healthyReplicas,
- pendingAdd, pendingDelete,
- containerInfo.getReplicationFactor().getNumber(),
- minHealthyForMaintenance);
+ pendingOps, minHealthyForMaintenance);
// verify that this container is actually over replicated
if (!verifyOverReplication(replicaCount)) {
return Collections.emptySet();
}
- // get number of excess replicas
- int excess = replicaCount.getExcessRedundancy(true);
+ // count pending deletes
+ int pendingDelete = 0;
+ for (ContainerReplicaOp op : pendingOps) {
+ if (op.getOpType() == ContainerReplicaOp.PendingOpType.DELETE) {
+ pendingDelete++;
+ }
+ }
LOG.info("Container {} is over replicated. Actual replica count is {}, " +
"with {} pending delete(s). Expected replica count is {}.",
containerInfo.containerID(),
@@ -135,6 +129,8 @@ public class RatisOverReplicationHandler
return Collections.emptySet();
}
+ // get number of excess replicas
+ int excess = replicaCount.getExcessRedundancy(true);
return createCommands(containerInfo, eligibleReplicas, excess);
}
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisUnderReplicationHandler.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisUnderReplicationHandler.java
index 6d5a2d266b..4a3819702e 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisUnderReplicationHandler.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisUnderReplicationHandler.java
@@ -91,18 +91,8 @@ public class RatisUnderReplicationHandler
ContainerInfo containerInfo = result.getContainerInfo();
LOG.debug("Handling under replicated Ratis container {}", containerInfo);
- // count pending adds and deletes
- int pendingAdd = 0, pendingDelete = 0;
- for (ContainerReplicaOp op : pendingOps) {
- if (op.getOpType() == ContainerReplicaOp.PendingOpType.ADD) {
- pendingAdd++;
- } else if (op.getOpType() == ContainerReplicaOp.PendingOpType.DELETE) {
- pendingDelete++;
- }
- }
RatisContainerReplicaCount replicaCount =
- new RatisContainerReplicaCount(containerInfo, replicas, pendingAdd,
- pendingDelete, containerInfo.getReplicationFactor().getNumber(),
+ new RatisContainerReplicaCount(containerInfo, replicas, pendingOps,
minHealthyForMaintenance);
// verify that this container is still under replicated and we don't have
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/RatisReplicationCheckHandler.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/RatisReplicationCheckHandler.java
index 91dd51a607..2634ea44a9 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/RatisReplicationCheckHandler.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/RatisReplicationCheckHandler.java
@@ -68,32 +68,45 @@ public class RatisReplicationCheckHandler extends
AbstractCheck {
ContainerHealthResult health = checkHealth(request);
LOG.debug("Checking container {} in RatisReplicationCheckHandler",
container);
- if (health.getHealthState() == ContainerHealthResult.HealthState.HEALTHY) {
+ if (health.getHealthState() == ContainerHealthResult.HealthState.HEALTHY ||
+ health.getHealthState() ==
+ ContainerHealthResult.HealthState.UNHEALTHY) {
// If the container is healthy, there is nothing else to do in this
// handler so return as unhandled so any further handlers will be tried.
return false;
}
+
if (health.getHealthState()
== ContainerHealthResult.HealthState.UNDER_REPLICATED) {
+ ContainerHealthResult.UnderReplicatedHealthResult underHealth
+ = ((ContainerHealthResult.UnderReplicatedHealthResult) health);
+ if (!underHealth.isUnrecoverable() && !underHealth.hasHealthyReplicas())
{
+ /*
+ If the container is recoverable but does not have healthy replicas,
+ return false. Unhealthy replication can be checked in a handler
+ further down the chain.
+ */
+ return false;
+ }
+
report.incrementAndSample(
ReplicationManagerReport.HealthState.UNDER_REPLICATED,
container.containerID());
- ContainerHealthResult.UnderReplicatedHealthResult underHealth
- = ((ContainerHealthResult.UnderReplicatedHealthResult) health);
+ LOG.debug("Container {} is Under Replicated. isReplicatedOkAfterPending"
+
+ " is [{}]. isUnrecoverable is [{}]. hasHealthyReplicas is [{}].",
+ container,
+ underHealth.isReplicatedOkAfterPending(),
+ underHealth.isUnrecoverable(), underHealth.hasHealthyReplicas());
+
if (underHealth.isUnrecoverable()) {
report.incrementAndSample(ReplicationManagerReport.HealthState.MISSING,
container.containerID());
+ return true;
}
- // TODO - if it is unrecoverable, should we return false to other
- // handlers can be tried?
- if (!underHealth.isUnrecoverable() &&
- !underHealth.isReplicatedOkAfterPending()) {
+ if (!underHealth.isReplicatedOkAfterPending() &&
+ underHealth.hasHealthyReplicas()) {
request.getReplicationQueue().enqueue(underHealth);
}
- LOG.debug("Container {} is Under Replicated. isReplicatedOkAfterPending"
+
- " is [{}]. isUnrecoverable is [{}]", container,
- underHealth.isReplicatedOkAfterPending(),
- underHealth.isUnrecoverable());
return true;
}
@@ -104,13 +117,17 @@ public class RatisReplicationCheckHandler extends
AbstractCheck {
container.containerID());
ContainerHealthResult.OverReplicatedHealthResult overHealth
= ((ContainerHealthResult.OverReplicatedHealthResult) health);
- if (!overHealth.isReplicatedOkAfterPending()) {
+ if (!overHealth.isReplicatedOkAfterPending() &&
+ !overHealth.hasMismatchedReplicas()) {
request.getReplicationQueue().enqueue(overHealth);
}
LOG.debug("Container {} is Over Replicated. isReplicatedOkAfterPending" +
- " is [{}]", container, overHealth.isReplicatedOkAfterPending());
+ " is [{}]. hasMismatchedReplicas is [{}]", container,
+ overHealth.isReplicatedOkAfterPending(),
+ overHealth.hasMismatchedReplicas());
return true;
}
+
if (health.getHealthState() ==
ContainerHealthResult.HealthState.MIS_REPLICATED) {
report.incrementAndSample(
@@ -139,42 +156,36 @@ public class RatisReplicationCheckHandler extends
AbstractCheck {
// Note that this setting is minReplicasForMaintenance. For EC the variable
// is defined as remainingRedundancy which is subtly different.
int minReplicasForMaintenance = request.getMaintenanceRedundancy();
- int pendingAdd = 0;
- int pendingDelete = 0;
- for (ContainerReplicaOp op : replicaPendingOps) {
- if (op.getOpType() == ContainerReplicaOp.PendingOpType.ADD) {
- pendingAdd++;
- } else if (op.getOpType() == ContainerReplicaOp.PendingOpType.DELETE) {
- pendingDelete++;
- }
- }
- int requiredNodes = container.getReplicationConfig().getRequiredNodes();
-
- // RatisContainerReplicaCount uses the minReplicasForMaintenance rather
- // than remainingRedundancy which ECContainerReplicaCount uses.
RatisContainerReplicaCount replicaCount =
- new RatisContainerReplicaCount(container, replicas, pendingAdd,
- pendingDelete, requiredNodes, minReplicasForMaintenance);
+ new RatisContainerReplicaCount(container, replicas, replicaPendingOps,
+ minReplicasForMaintenance);
boolean sufficientlyReplicated
= replicaCount.isSufficientlyReplicated(false);
if (!sufficientlyReplicated) {
ContainerHealthResult.UnderReplicatedHealthResult result =
new ContainerHealthResult.UnderReplicatedHealthResult(
- container, replicaCount.getRemainingRedundancy(),
- replicas.size() - pendingDelete >= requiredNodes,
- replicaCount.isSufficientlyReplicated(true),
- replicaCount.isUnrecoverable());
+ container, replicaCount.getRemainingRedundancy(),
+ replicaCount.inSufficientDueToDecommission(false),
+ replicaCount.isSufficientlyReplicated(true),
+ replicaCount.isUnrecoverable());
+ result.setHasHealthyReplicas(replicaCount.getHealthyReplicaCount() > 0);
return result;
}
boolean isOverReplicated = replicaCount.isOverReplicated(false);
if (isOverReplicated) {
boolean repOkWithPending = !replicaCount.isOverReplicated(true);
- return new ContainerHealthResult.OverReplicatedHealthResult(
- container, replicaCount.getExcessRedundancy(false),
repOkWithPending);
+ ContainerHealthResult.OverReplicatedHealthResult result =
+ new ContainerHealthResult.OverReplicatedHealthResult(
+ container, replicaCount.getExcessRedundancy(false),
+ repOkWithPending);
+ result.setHasMismatchedReplicas(
+ replicaCount.getMisMatchedReplicaCount() > 0);
+ return result;
}
+ int requiredNodes = container.getReplicationConfig().getRequiredNodes();
ContainerPlacementStatus placementStatus =
getPlacementStatus(replicas, requiredNodes, Collections.emptyList());
ContainerPlacementStatus placementStatusWithPending = placementStatus;
@@ -187,6 +198,9 @@ public class RatisReplicationCheckHandler extends
AbstractCheck {
container, placementStatusWithPending.isPolicySatisfied());
}
+ if (replicaCount.getUnhealthyReplicaCount() != 0) {
+ return new ContainerHealthResult.UnHealthyResult(container);
+ }
// No issues detected, just return healthy.
return new ContainerHealthResult.HealthyResult(container);
}
diff --git
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManager.java
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManager.java
index 2a2e6c68e6..594aad42f0 100644
---
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManager.java
+++
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManager.java
@@ -180,6 +180,64 @@ public class TestReplicationManager {
Assert.assertEquals(0, repQueue.overReplicatedQueueSize());
}
+ @Test
+ public void misMatchedReplicasOfRatisContainerShouldBeClosed()
+ throws ContainerNotFoundException {
+ RatisReplicationConfig ratisRepConfig =
+ RatisReplicationConfig.getInstance(THREE);
+ ContainerInfo container = createContainerInfo(ratisRepConfig, 1,
+ HddsProtos.LifeCycleState.CLOSED);
+ // Container is closed but replicas are open
+ Set<ContainerReplica> replicas =
+ addReplicas(container, ContainerReplicaProto.State.CLOSING, 0, 0, 0);
+ ContainerReplica closedReplica =
+ createContainerReplica(container.containerID(), 0, IN_SERVICE,
+ ContainerReplicaProto.State.CLOSED);
+ replicas.add(closedReplica);
+
+ replicationManager.processContainer(container, repQueue, repReport);
+
+ Mockito.verify(eventPublisher, Mockito.times(3))
+ .fireEvent(any(), any());
+ Assert.assertEquals(1, repReport.getStat(
+ ReplicationManagerReport.HealthState.OVER_REPLICATED));
+ Assert.assertEquals(0, repQueue.underReplicatedQueueSize());
+ /*
+ Though over replicated, this container should not be added to over
+ replicated queue until all replicas are closed.
+ */
+ Assert.assertEquals(0, repQueue.overReplicatedQueueSize());
+ }
+
+ @Test
+ public void testUnderReplicatedQuasiClosedContainerWithUnhealthyReplica()
+ throws ContainerNotFoundException {
+ RatisReplicationConfig ratisRepConfig =
+ RatisReplicationConfig.getInstance(THREE);
+ ContainerInfo container = createContainerInfo(ratisRepConfig, 1,
+ HddsProtos.LifeCycleState.QUASI_CLOSED);
+ // Container is closed but replicas are open
+ Set<ContainerReplica> replicas =
+ addReplicas(container, ContainerReplicaProto.State.CLOSING, 0);
+ ContainerReplica quasiClosedReplica =
+ createContainerReplica(container.containerID(), 0, IN_SERVICE,
+ ContainerReplicaProto.State.QUASI_CLOSED);
+ ContainerReplica unhealthyReplica =
+ createContainerReplica(container.containerID(), 0, IN_SERVICE,
+ ContainerReplicaProto.State.UNHEALTHY);
+ replicas.add(quasiClosedReplica);
+ replicas.add(unhealthyReplica);
+
+ replicationManager.processContainer(container, repQueue, repReport);
+
+ Mockito.verify(eventPublisher, Mockito.times(1))
+ .fireEvent(any(), any());
+ Assert.assertEquals(1, repReport.getStat(
+ ReplicationManagerReport.HealthState.UNDER_REPLICATED));
+ Assert.assertEquals(1, repQueue.underReplicatedQueueSize());
+ Assert.assertEquals(0, repQueue.overReplicatedQueueSize());
+ }
+
@Test
public void testHealthyContainer() throws ContainerNotFoundException {
ContainerInfo container = createContainerInfo(repConfig, 1,
diff --git
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestRatisReplicationCheckHandler.java
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestRatisReplicationCheckHandler.java
index 9cc2c6263d..33e7e979c6 100644
---
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestRatisReplicationCheckHandler.java
+++
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestRatisReplicationCheckHandler.java
@@ -22,6 +22,7 @@ import org.apache.hadoop.hdds.client.RatisReplicationConfig;
import org.apache.hadoop.hdds.client.ReplicationConfig;
import org.apache.hadoop.hdds.protocol.DatanodeDetails;
import org.apache.hadoop.hdds.protocol.MockDatanodeDetails;
+import
org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State;
import org.apache.hadoop.hdds.scm.PlacementPolicy;
import org.apache.hadoop.hdds.scm.container.ContainerInfo;
import org.apache.hadoop.hdds.scm.container.ContainerReplica;
@@ -54,6 +55,7 @@ import static
org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor
import static
org.apache.hadoop.hdds.scm.container.replication.ContainerReplicaOp.PendingOpType.ADD;
import static
org.apache.hadoop.hdds.scm.container.replication.ContainerReplicaOp.PendingOpType.DELETE;
import static
org.apache.hadoop.hdds.scm.container.replication.ReplicationTestUtil.createContainerInfo;
+import static
org.apache.hadoop.hdds.scm.container.replication.ReplicationTestUtil.createContainerReplica;
import static
org.apache.hadoop.hdds.scm.container.replication.ReplicationTestUtil.createReplicas;
/**
@@ -294,6 +296,80 @@ public class TestRatisReplicationCheckHandler {
ReplicationManagerReport.HealthState.MISSING));
}
+ /**
+ * Replicas with ContainerReplicaProto#State UNHEALTHY don't contribute to
+ * the redundancy of a container. This tests that a CLOSED container with {
+ * CLOSED, CLOSED, UNHEALTHY, UNHEALTHY} replicas is under replicated.
+ */
+ @Test
+ public void testUnderReplicatedWithUnhealthyReplicas() {
+ ContainerInfo container = createContainerInfo(repConfig);
+ Set<ContainerReplica> replicas
+ = createReplicas(container.containerID(), State.CLOSED, 0, 0);
+ Set<ContainerReplica> unhealthyReplicas =
+ createReplicas(container.containerID(), State.UNHEALTHY, 0, 0);
+ replicas.addAll(unhealthyReplicas);
+ requestBuilder.setContainerReplicas(replicas)
+ .setContainerInfo(container);
+ UnderReplicatedHealthResult result = (UnderReplicatedHealthResult)
+ healthCheck.checkHealth(requestBuilder.build());
+
+ Assert.assertEquals(HealthState.UNDER_REPLICATED, result.getHealthState());
+ Assert.assertEquals(1, result.getRemainingRedundancy());
+ Assert.assertFalse(result.isReplicatedOkAfterPending());
+ Assert.assertFalse(result.underReplicatedDueToDecommission());
+
+ Assert.assertTrue(healthCheck.handle(requestBuilder.build()));
+ Assert.assertEquals(1, repQueue.underReplicatedQueueSize());
+ Assert.assertEquals(0, repQueue.overReplicatedQueueSize());
+ Assert.assertEquals(1, report.getStat(
+ ReplicationManagerReport.HealthState.UNDER_REPLICATED));
+ }
+
+ @Test
+ public void testSufficientReplicationWithMismatchedReplicas() {
+ ContainerInfo container = createContainerInfo(repConfig);
+ Set<ContainerReplica> replicas
+ = createReplicas(container.containerID(), State.CLOSING, 0, 0, 0);
+
+ requestBuilder.setContainerReplicas(replicas)
+ .setContainerInfo(container);
+ ContainerHealthResult result =
+ healthCheck.checkHealth(requestBuilder.build());
+ Assert.assertEquals(HealthState.HEALTHY, result.getHealthState());
+
+ Assert.assertFalse(healthCheck.handle(requestBuilder.build()));
+ Assert.assertEquals(0, repQueue.underReplicatedQueueSize());
+ Assert.assertEquals(0, repQueue.overReplicatedQueueSize());
+ }
+
+ @Test
+ public void testHandlerReturnsFalseWhenAllReplicasAreUnhealthy() {
+ ContainerInfo container = createContainerInfo(repConfig);
+ Set<ContainerReplica> replicas =
+ createReplicas(container.containerID(), State.UNHEALTHY, 0, 0, 0, 0);
+ requestBuilder.setContainerReplicas(replicas)
+ .setContainerInfo(container);
+ UnderReplicatedHealthResult result = (UnderReplicatedHealthResult)
+ healthCheck.checkHealth(requestBuilder.build());
+
+ /*
+ Here, UNDER_REPLICATED health state simply means there aren't enough
+ healthy replicas. This handler cannot make a decision about
+ replication/deleting replicas when all of them are unhealthy.
+ */
+ Assert.assertEquals(HealthState.UNDER_REPLICATED, result.getHealthState());
+ Assert.assertEquals(0, result.getRemainingRedundancy());
+ Assert.assertFalse(result.isReplicatedOkAfterPending());
+ Assert.assertFalse(result.underReplicatedDueToDecommission());
+
+ Assert.assertFalse(healthCheck.handle(requestBuilder.build()));
+ Assert.assertEquals(0, repQueue.underReplicatedQueueSize());
+ Assert.assertEquals(0, repQueue.overReplicatedQueueSize());
+ Assert.assertEquals(0, report.getStat(
+ ReplicationManagerReport.HealthState.UNDER_REPLICATED));
+ }
+
@Test
public void testOverReplicatedContainer() {
ContainerInfo container = createContainerInfo(repConfig);
@@ -325,6 +401,61 @@ public class TestRatisReplicationCheckHandler {
ReplicationManagerReport.HealthState.OVER_REPLICATED));
}
+ @Test
+ public void testOverReplicatedContainerWithMismatchedReplicas() {
+ ContainerInfo container = createContainerInfo(repConfig);
+ Set<ContainerReplica> replicas
+ = createReplicas(container.containerID(), State.QUASI_CLOSED, 0, 0);
+ Set<ContainerReplica> unhealthyReplicas =
+ createReplicas(container.containerID(), State.CLOSING, 0, 0);
+ replicas.addAll(unhealthyReplicas);
+ requestBuilder.setContainerReplicas(replicas)
+ .setContainerInfo(container);
+
+ OverReplicatedHealthResult result = (OverReplicatedHealthResult)
+ healthCheck.checkHealth(requestBuilder.build());
+ Assert.assertEquals(HealthState.OVER_REPLICATED, result.getHealthState());
+ Assert.assertEquals(1, result.getExcessRedundancy());
+ Assert.assertFalse(result.isReplicatedOkAfterPending());
+
+ Assert.assertTrue(healthCheck.handle(requestBuilder.build()));
+ Assert.assertEquals(0, repQueue.underReplicatedQueueSize());
+ /*
+ We have an excess replica, but we hold off on adding to the over
+ replication queue until all the mismatched replicas match the container
+ state.
+ */
+ Assert.assertEquals(0, repQueue.overReplicatedQueueSize());
+ Assert.assertEquals(1, report.getStat(
+ ReplicationManagerReport.HealthState.OVER_REPLICATED));
+ }
+
+ @Test
+ public void testHandlerReturnsFalseForExcessUnhealthyReplicas() {
+ ContainerInfo container = createContainerInfo(repConfig);
+ Set<ContainerReplica> replicas
+ = createReplicas(container.containerID(), State.CLOSED, 0, 0);
+ ContainerReplica mismatchedReplica =
+ createContainerReplica(container.containerID(), 0, IN_SERVICE,
+ State.CLOSING);
+ Set<ContainerReplica> unhealthyReplicas =
+ createReplicas(container.containerID(), State.UNHEALTHY, 0, 0, 0);
+ replicas.add(mismatchedReplica);
+ replicas.addAll(unhealthyReplicas);
+ requestBuilder.setContainerReplicas(replicas)
+ .setContainerInfo(container);
+ ContainerHealthResult result =
+ healthCheck.checkHealth(requestBuilder.build());
+
+ // this handler does not deal with an excess of unhealthy replicas when
+ // the container is otherwise sufficiently replicated
+ Assert.assertEquals(HealthState.UNHEALTHY, result.getHealthState());
+
+ Assert.assertFalse(healthCheck.handle(requestBuilder.build()));
+ Assert.assertEquals(0, repQueue.underReplicatedQueueSize());
+ Assert.assertEquals(0, repQueue.overReplicatedQueueSize());
+ }
+
@Test
public void testOverReplicatedContainerFixedByPending() {
ContainerInfo container = createContainerInfo(repConfig);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]