This is an automated email from the ASF dual-hosted git repository.

sodonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/master by this push:
     new 4c6c1218a7 HDDS-7818. Modify Ratis Replication Handling in the new RM 
(#4204)
4c6c1218a7 is described below

commit 4c6c1218a76b17de7d4b78a27a33f6c536f1c997
Author: Siddhant Sangwan <[email protected]>
AuthorDate: Thu Jan 26 15:42:56 2023 +0530

    HDDS-7818. Modify Ratis Replication Handling in the new RM (#4204)
---
 .../replication/ContainerHealthResult.java         |  21 +++-
 .../replication/RatisContainerReplicaCount.java    |  15 +++
 .../replication/RatisOverReplicationHandler.java   |  24 ++--
 .../replication/RatisUnderReplicationHandler.java  |  12 +-
 .../health/RatisReplicationCheckHandler.java       |  82 +++++++------
 .../replication/TestReplicationManager.java        |  58 +++++++++
 .../health/TestRatisReplicationCheckHandler.java   | 131 +++++++++++++++++++++
 7 files changed, 282 insertions(+), 61 deletions(-)

diff --git 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerHealthResult.java
 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerHealthResult.java
index d5b92935ce..259cf78360 100644
--- 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerHealthResult.java
+++ 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ContainerHealthResult.java
@@ -80,7 +80,7 @@ public class ContainerHealthResult {
    */
   public static class UnHealthyResult extends ContainerHealthResult {
 
-    UnHealthyResult(ContainerInfo containerInfo) {
+    public UnHealthyResult(ContainerInfo containerInfo) {
       super(containerInfo, HealthState.UNHEALTHY);
     }
   }
@@ -108,6 +108,7 @@ public class ContainerHealthResult {
     private final boolean dueToDecommission;
     private final boolean sufficientlyReplicatedAfterPending;
     private final boolean unrecoverable;
+    private boolean hasHealthyReplicas;
     private boolean hasUnReplicatedOfflineIndexes = false;
     private int requeueCount = 0;
 
@@ -228,6 +229,14 @@ public class ContainerHealthResult {
     public boolean hasUnreplicatedOfflineIndexes() {
       return hasUnReplicatedOfflineIndexes;
     }
+
+    public boolean hasHealthyReplicas() {
+      return hasHealthyReplicas;
+    }
+
+    public void setHasHealthyReplicas(boolean hasHealthyReplicas) {
+      this.hasHealthyReplicas = hasHealthyReplicas;
+    }
   }
 
   /**
@@ -268,7 +277,7 @@ public class ContainerHealthResult {
 
     private final int excessRedundancy;
     private final boolean sufficientlyReplicatedAfterPending;
-
+    private boolean hasMismatchedReplicas;
 
     public OverReplicatedHealthResult(ContainerInfo containerInfo,
         int excessRedundancy, boolean replicatedOkWithPending) {
@@ -300,5 +309,13 @@ public class ContainerHealthResult {
     public boolean isReplicatedOkAfterPending() {
       return sufficientlyReplicatedAfterPending;
     }
+
+    public boolean hasMismatchedReplicas() {
+      return hasMismatchedReplicas;
+    }
+
+    public void setHasMismatchedReplicas(boolean hasMismatchedReplicas) {
+      this.hasMismatchedReplicas = hasMismatchedReplicas;
+    }
   }
 }
diff --git 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisContainerReplicaCount.java
 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisContainerReplicaCount.java
index e65ac74ed1..d76174c1a8 100644
--- 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisContainerReplicaCount.java
+++ 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisContainerReplicaCount.java
@@ -453,6 +453,21 @@ public class RatisContainerReplicaCount implements 
ContainerReplicaCount {
     return excessRedundancy;
   }
 
+  /**
+   * Checks whether insufficient replication is because of some replicas
+   * being on datanodes that were decommissioned.
+   * @param includePendingAdd if pending adds should be considered
+   * @return true if there is insufficient replication and it's because of
+   * decommissioning.
+   */
+  public boolean inSufficientDueToDecommission(boolean includePendingAdd) {
+    if (isSufficientlyReplicated(includePendingAdd)) {
+      return false;
+    }
+    int delta = redundancyDelta(true, includePendingAdd);
+    return decommissionCount >= delta;
+  }
+
   /**
    * How many more replicas can be lost before the container is
    * unreadable, assuming any infligh deletes will complete. For containers
diff --git 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisOverReplicationHandler.java
 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisOverReplicationHandler.java
index 856027a5af..13e71a0483 100644
--- 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisOverReplicationHandler.java
+++ 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisOverReplicationHandler.java
@@ -98,28 +98,22 @@ public class RatisOverReplicationHandler
         )
         .collect(Collectors.toSet());
 
-    // count pending adds and deletes
-    int pendingAdd = 0, pendingDelete = 0;
-    for (ContainerReplicaOp op : pendingOps) {
-      if (op.getOpType() == ContainerReplicaOp.PendingOpType.ADD) {
-        pendingAdd++;
-      } else if (op.getOpType() == ContainerReplicaOp.PendingOpType.DELETE) {
-        pendingDelete++;
-      }
-    }
     RatisContainerReplicaCount replicaCount =
         new RatisContainerReplicaCount(containerInfo, healthyReplicas,
-            pendingAdd, pendingDelete,
-            containerInfo.getReplicationFactor().getNumber(),
-            minHealthyForMaintenance);
+            pendingOps, minHealthyForMaintenance);
 
     // verify that this container is actually over replicated
     if (!verifyOverReplication(replicaCount)) {
       return Collections.emptySet();
     }
 
-    // get number of excess replicas
-    int excess = replicaCount.getExcessRedundancy(true);
+    // count pending deletes
+    int pendingDelete = 0;
+    for (ContainerReplicaOp op : pendingOps) {
+      if (op.getOpType() == ContainerReplicaOp.PendingOpType.DELETE) {
+        pendingDelete++;
+      }
+    }
     LOG.info("Container {} is over replicated. Actual replica count is {}, " +
             "with {} pending delete(s). Expected replica count is {}.",
         containerInfo.containerID(),
@@ -135,6 +129,8 @@ public class RatisOverReplicationHandler
       return Collections.emptySet();
     }
 
+    // get number of excess replicas
+    int excess = replicaCount.getExcessRedundancy(true);
     return createCommands(containerInfo, eligibleReplicas, excess);
   }
 
diff --git 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisUnderReplicationHandler.java
 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisUnderReplicationHandler.java
index 6d5a2d266b..4a3819702e 100644
--- 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisUnderReplicationHandler.java
+++ 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisUnderReplicationHandler.java
@@ -91,18 +91,8 @@ public class RatisUnderReplicationHandler
     ContainerInfo containerInfo = result.getContainerInfo();
     LOG.debug("Handling under replicated Ratis container {}", containerInfo);
 
-    // count pending adds and deletes
-    int pendingAdd = 0, pendingDelete = 0;
-    for (ContainerReplicaOp op : pendingOps) {
-      if (op.getOpType() == ContainerReplicaOp.PendingOpType.ADD) {
-        pendingAdd++;
-      } else if (op.getOpType() == ContainerReplicaOp.PendingOpType.DELETE) {
-        pendingDelete++;
-      }
-    }
     RatisContainerReplicaCount replicaCount =
-        new RatisContainerReplicaCount(containerInfo, replicas, pendingAdd,
-            pendingDelete, containerInfo.getReplicationFactor().getNumber(),
+        new RatisContainerReplicaCount(containerInfo, replicas, pendingOps,
             minHealthyForMaintenance);
 
     // verify that this container is still under replicated and we don't have
diff --git 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/RatisReplicationCheckHandler.java
 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/RatisReplicationCheckHandler.java
index 91dd51a607..2634ea44a9 100644
--- 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/RatisReplicationCheckHandler.java
+++ 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/RatisReplicationCheckHandler.java
@@ -68,32 +68,45 @@ public class RatisReplicationCheckHandler extends 
AbstractCheck {
     ContainerHealthResult health = checkHealth(request);
     LOG.debug("Checking container {} in RatisReplicationCheckHandler",
         container);
-    if (health.getHealthState() == ContainerHealthResult.HealthState.HEALTHY) {
+    if (health.getHealthState() == ContainerHealthResult.HealthState.HEALTHY ||
+        health.getHealthState() ==
+            ContainerHealthResult.HealthState.UNHEALTHY) {
       // If the container is healthy, there is nothing else to do in this
       // handler so return as unhandled so any further handlers will be tried.
       return false;
     }
+
     if (health.getHealthState()
         == ContainerHealthResult.HealthState.UNDER_REPLICATED) {
+      ContainerHealthResult.UnderReplicatedHealthResult underHealth
+          = ((ContainerHealthResult.UnderReplicatedHealthResult) health);
+      if (!underHealth.isUnrecoverable() && !underHealth.hasHealthyReplicas()) 
{
+        /*
+        If the container is recoverable but does not have healthy replicas,
+        return false. Unhealthy replication can be checked in a handler
+        further down the chain.
+         */
+        return false;
+      }
+
       report.incrementAndSample(
           ReplicationManagerReport.HealthState.UNDER_REPLICATED,
           container.containerID());
-      ContainerHealthResult.UnderReplicatedHealthResult underHealth
-          = ((ContainerHealthResult.UnderReplicatedHealthResult) health);
+      LOG.debug("Container {} is Under Replicated. isReplicatedOkAfterPending" 
+
+              " is [{}]. isUnrecoverable is [{}]. hasHealthyReplicas is [{}].",
+          container,
+          underHealth.isReplicatedOkAfterPending(),
+          underHealth.isUnrecoverable(), underHealth.hasHealthyReplicas());
+
       if (underHealth.isUnrecoverable()) {
         report.incrementAndSample(ReplicationManagerReport.HealthState.MISSING,
             container.containerID());
+        return true;
       }
-      // TODO - if it is unrecoverable, should we return false to other
-      //        handlers can be tried?
-      if (!underHealth.isUnrecoverable() &&
-          !underHealth.isReplicatedOkAfterPending()) {
+      if (!underHealth.isReplicatedOkAfterPending() &&
+          underHealth.hasHealthyReplicas()) {
         request.getReplicationQueue().enqueue(underHealth);
       }
-      LOG.debug("Container {} is Under Replicated. isReplicatedOkAfterPending" 
+
-          " is [{}]. isUnrecoverable is [{}]", container,
-          underHealth.isReplicatedOkAfterPending(),
-          underHealth.isUnrecoverable());
       return true;
     }
 
@@ -104,13 +117,17 @@ public class RatisReplicationCheckHandler extends 
AbstractCheck {
           container.containerID());
       ContainerHealthResult.OverReplicatedHealthResult overHealth
           = ((ContainerHealthResult.OverReplicatedHealthResult) health);
-      if (!overHealth.isReplicatedOkAfterPending()) {
+      if (!overHealth.isReplicatedOkAfterPending() &&
+          !overHealth.hasMismatchedReplicas()) {
         request.getReplicationQueue().enqueue(overHealth);
       }
       LOG.debug("Container {} is Over Replicated. isReplicatedOkAfterPending" +
-              " is [{}]", container, overHealth.isReplicatedOkAfterPending());
+              " is [{}]. hasMismatchedReplicas is [{}]", container,
+          overHealth.isReplicatedOkAfterPending(),
+          overHealth.hasMismatchedReplicas());
       return true;
     }
+
     if (health.getHealthState() ==
         ContainerHealthResult.HealthState.MIS_REPLICATED) {
       report.incrementAndSample(
@@ -139,42 +156,36 @@ public class RatisReplicationCheckHandler extends 
AbstractCheck {
     // Note that this setting is minReplicasForMaintenance. For EC the variable
     // is defined as remainingRedundancy which is subtly different.
     int minReplicasForMaintenance = request.getMaintenanceRedundancy();
-    int pendingAdd = 0;
-    int pendingDelete = 0;
-    for (ContainerReplicaOp op : replicaPendingOps) {
-      if (op.getOpType() == ContainerReplicaOp.PendingOpType.ADD) {
-        pendingAdd++;
-      } else if (op.getOpType() == ContainerReplicaOp.PendingOpType.DELETE) {
-        pendingDelete++;
-      }
-    }
-    int requiredNodes = container.getReplicationConfig().getRequiredNodes();
-
-    // RatisContainerReplicaCount uses the minReplicasForMaintenance rather
-    // than remainingRedundancy which ECContainerReplicaCount uses.
     RatisContainerReplicaCount replicaCount =
-        new RatisContainerReplicaCount(container, replicas, pendingAdd,
-            pendingDelete, requiredNodes, minReplicasForMaintenance);
+        new RatisContainerReplicaCount(container, replicas, replicaPendingOps,
+            minReplicasForMaintenance);
 
     boolean sufficientlyReplicated
         = replicaCount.isSufficientlyReplicated(false);
     if (!sufficientlyReplicated) {
       ContainerHealthResult.UnderReplicatedHealthResult result =
           new ContainerHealthResult.UnderReplicatedHealthResult(
-          container, replicaCount.getRemainingRedundancy(),
-          replicas.size() - pendingDelete >= requiredNodes,
-          replicaCount.isSufficientlyReplicated(true),
-          replicaCount.isUnrecoverable());
+              container, replicaCount.getRemainingRedundancy(),
+              replicaCount.inSufficientDueToDecommission(false),
+              replicaCount.isSufficientlyReplicated(true),
+              replicaCount.isUnrecoverable());
+      result.setHasHealthyReplicas(replicaCount.getHealthyReplicaCount() > 0);
       return result;
     }
 
     boolean isOverReplicated = replicaCount.isOverReplicated(false);
     if (isOverReplicated) {
       boolean repOkWithPending = !replicaCount.isOverReplicated(true);
-      return new ContainerHealthResult.OverReplicatedHealthResult(
-          container, replicaCount.getExcessRedundancy(false), 
repOkWithPending);
+      ContainerHealthResult.OverReplicatedHealthResult result =
+          new ContainerHealthResult.OverReplicatedHealthResult(
+              container, replicaCount.getExcessRedundancy(false),
+              repOkWithPending);
+      result.setHasMismatchedReplicas(
+          replicaCount.getMisMatchedReplicaCount() > 0);
+      return result;
     }
 
+    int requiredNodes = container.getReplicationConfig().getRequiredNodes();
     ContainerPlacementStatus placementStatus =
         getPlacementStatus(replicas, requiredNodes, Collections.emptyList());
     ContainerPlacementStatus placementStatusWithPending = placementStatus;
@@ -187,6 +198,9 @@ public class RatisReplicationCheckHandler extends 
AbstractCheck {
           container, placementStatusWithPending.isPolicySatisfied());
     }
 
+    if (replicaCount.getUnhealthyReplicaCount() != 0) {
+      return new ContainerHealthResult.UnHealthyResult(container);
+    }
     // No issues detected, just return healthy.
     return new ContainerHealthResult.HealthyResult(container);
   }
diff --git 
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManager.java
 
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManager.java
index 2a2e6c68e6..594aad42f0 100644
--- 
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManager.java
+++ 
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManager.java
@@ -180,6 +180,64 @@ public class TestReplicationManager {
     Assert.assertEquals(0, repQueue.overReplicatedQueueSize());
   }
 
+  @Test
+  public void misMatchedReplicasOfRatisContainerShouldBeClosed()
+      throws ContainerNotFoundException {
+    RatisReplicationConfig ratisRepConfig =
+        RatisReplicationConfig.getInstance(THREE);
+    ContainerInfo container = createContainerInfo(ratisRepConfig, 1,
+        HddsProtos.LifeCycleState.CLOSED);
+    // Container is closed but replicas are open
+    Set<ContainerReplica> replicas =
+        addReplicas(container, ContainerReplicaProto.State.CLOSING, 0, 0, 0);
+    ContainerReplica closedReplica =
+        createContainerReplica(container.containerID(), 0, IN_SERVICE,
+            ContainerReplicaProto.State.CLOSED);
+    replicas.add(closedReplica);
+
+    replicationManager.processContainer(container, repQueue, repReport);
+
+    Mockito.verify(eventPublisher, Mockito.times(3))
+        .fireEvent(any(), any());
+    Assert.assertEquals(1, repReport.getStat(
+        ReplicationManagerReport.HealthState.OVER_REPLICATED));
+    Assert.assertEquals(0, repQueue.underReplicatedQueueSize());
+    /*
+    Though over replicated, this container should not be added to over
+    replicated queue until all replicas are closed.
+     */
+    Assert.assertEquals(0, repQueue.overReplicatedQueueSize());
+  }
+
+  @Test
+  public void testUnderReplicatedQuasiClosedContainerWithUnhealthyReplica()
+      throws ContainerNotFoundException {
+    RatisReplicationConfig ratisRepConfig =
+        RatisReplicationConfig.getInstance(THREE);
+    ContainerInfo container = createContainerInfo(ratisRepConfig, 1,
+        HddsProtos.LifeCycleState.QUASI_CLOSED);
+    // Container is closed but replicas are open
+    Set<ContainerReplica> replicas =
+        addReplicas(container, ContainerReplicaProto.State.CLOSING, 0);
+    ContainerReplica quasiClosedReplica =
+        createContainerReplica(container.containerID(), 0, IN_SERVICE,
+            ContainerReplicaProto.State.QUASI_CLOSED);
+    ContainerReplica unhealthyReplica =
+        createContainerReplica(container.containerID(), 0, IN_SERVICE,
+            ContainerReplicaProto.State.UNHEALTHY);
+    replicas.add(quasiClosedReplica);
+    replicas.add(unhealthyReplica);
+
+    replicationManager.processContainer(container, repQueue, repReport);
+
+    Mockito.verify(eventPublisher, Mockito.times(1))
+        .fireEvent(any(), any());
+    Assert.assertEquals(1, repReport.getStat(
+        ReplicationManagerReport.HealthState.UNDER_REPLICATED));
+    Assert.assertEquals(1, repQueue.underReplicatedQueueSize());
+    Assert.assertEquals(0, repQueue.overReplicatedQueueSize());
+  }
+
   @Test
   public void testHealthyContainer() throws ContainerNotFoundException {
     ContainerInfo container = createContainerInfo(repConfig, 1,
diff --git 
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestRatisReplicationCheckHandler.java
 
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestRatisReplicationCheckHandler.java
index 9cc2c6263d..33e7e979c6 100644
--- 
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestRatisReplicationCheckHandler.java
+++ 
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestRatisReplicationCheckHandler.java
@@ -22,6 +22,7 @@ import org.apache.hadoop.hdds.client.RatisReplicationConfig;
 import org.apache.hadoop.hdds.client.ReplicationConfig;
 import org.apache.hadoop.hdds.protocol.DatanodeDetails;
 import org.apache.hadoop.hdds.protocol.MockDatanodeDetails;
+import 
org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto.State;
 import org.apache.hadoop.hdds.scm.PlacementPolicy;
 import org.apache.hadoop.hdds.scm.container.ContainerInfo;
 import org.apache.hadoop.hdds.scm.container.ContainerReplica;
@@ -54,6 +55,7 @@ import static 
org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationFactor
 import static 
org.apache.hadoop.hdds.scm.container.replication.ContainerReplicaOp.PendingOpType.ADD;
 import static 
org.apache.hadoop.hdds.scm.container.replication.ContainerReplicaOp.PendingOpType.DELETE;
 import static 
org.apache.hadoop.hdds.scm.container.replication.ReplicationTestUtil.createContainerInfo;
+import static 
org.apache.hadoop.hdds.scm.container.replication.ReplicationTestUtil.createContainerReplica;
 import static 
org.apache.hadoop.hdds.scm.container.replication.ReplicationTestUtil.createReplicas;
 
 /**
@@ -294,6 +296,80 @@ public class TestRatisReplicationCheckHandler {
         ReplicationManagerReport.HealthState.MISSING));
   }
 
+  /**
+   * Replicas with ContainerReplicaProto#State UNHEALTHY don't contribute to
+   * the redundancy of a container. This tests that a CLOSED container with {
+   * CLOSED, CLOSED, UNHEALTHY, UNHEALTHY} replicas is under replicated.
+   */
+  @Test
+  public void testUnderReplicatedWithUnhealthyReplicas() {
+    ContainerInfo container = createContainerInfo(repConfig);
+    Set<ContainerReplica> replicas
+        = createReplicas(container.containerID(), State.CLOSED, 0, 0);
+    Set<ContainerReplica> unhealthyReplicas =
+        createReplicas(container.containerID(), State.UNHEALTHY, 0, 0);
+    replicas.addAll(unhealthyReplicas);
+    requestBuilder.setContainerReplicas(replicas)
+        .setContainerInfo(container);
+    UnderReplicatedHealthResult result = (UnderReplicatedHealthResult)
+        healthCheck.checkHealth(requestBuilder.build());
+
+    Assert.assertEquals(HealthState.UNDER_REPLICATED, result.getHealthState());
+    Assert.assertEquals(1, result.getRemainingRedundancy());
+    Assert.assertFalse(result.isReplicatedOkAfterPending());
+    Assert.assertFalse(result.underReplicatedDueToDecommission());
+
+    Assert.assertTrue(healthCheck.handle(requestBuilder.build()));
+    Assert.assertEquals(1, repQueue.underReplicatedQueueSize());
+    Assert.assertEquals(0, repQueue.overReplicatedQueueSize());
+    Assert.assertEquals(1, report.getStat(
+        ReplicationManagerReport.HealthState.UNDER_REPLICATED));
+  }
+
+  @Test
+  public void testSufficientReplicationWithMismatchedReplicas() {
+    ContainerInfo container = createContainerInfo(repConfig);
+    Set<ContainerReplica> replicas
+        = createReplicas(container.containerID(), State.CLOSING, 0, 0, 0);
+
+    requestBuilder.setContainerReplicas(replicas)
+        .setContainerInfo(container);
+    ContainerHealthResult result =
+        healthCheck.checkHealth(requestBuilder.build());
+    Assert.assertEquals(HealthState.HEALTHY, result.getHealthState());
+
+    Assert.assertFalse(healthCheck.handle(requestBuilder.build()));
+    Assert.assertEquals(0, repQueue.underReplicatedQueueSize());
+    Assert.assertEquals(0, repQueue.overReplicatedQueueSize());
+  }
+
+  @Test
+  public void testHandlerReturnsFalseWhenAllReplicasAreUnhealthy() {
+    ContainerInfo container = createContainerInfo(repConfig);
+    Set<ContainerReplica> replicas =
+        createReplicas(container.containerID(), State.UNHEALTHY, 0, 0, 0, 0);
+    requestBuilder.setContainerReplicas(replicas)
+        .setContainerInfo(container);
+    UnderReplicatedHealthResult result = (UnderReplicatedHealthResult)
+        healthCheck.checkHealth(requestBuilder.build());
+
+    /*
+    Here, UNDER_REPLICATED health state simply means there aren't enough
+    healthy replicas. This handler cannot make a decision about
+    replication/deleting replicas when all of them are unhealthy.
+     */
+    Assert.assertEquals(HealthState.UNDER_REPLICATED, result.getHealthState());
+    Assert.assertEquals(0, result.getRemainingRedundancy());
+    Assert.assertFalse(result.isReplicatedOkAfterPending());
+    Assert.assertFalse(result.underReplicatedDueToDecommission());
+
+    Assert.assertFalse(healthCheck.handle(requestBuilder.build()));
+    Assert.assertEquals(0, repQueue.underReplicatedQueueSize());
+    Assert.assertEquals(0, repQueue.overReplicatedQueueSize());
+    Assert.assertEquals(0, report.getStat(
+        ReplicationManagerReport.HealthState.UNDER_REPLICATED));
+  }
+
   @Test
   public void testOverReplicatedContainer() {
     ContainerInfo container = createContainerInfo(repConfig);
@@ -325,6 +401,61 @@ public class TestRatisReplicationCheckHandler {
         ReplicationManagerReport.HealthState.OVER_REPLICATED));
   }
 
+  @Test
+  public void testOverReplicatedContainerWithMismatchedReplicas() {
+    ContainerInfo container = createContainerInfo(repConfig);
+    Set<ContainerReplica> replicas
+        = createReplicas(container.containerID(), State.QUASI_CLOSED, 0, 0);
+    Set<ContainerReplica> unhealthyReplicas =
+        createReplicas(container.containerID(), State.CLOSING, 0, 0);
+    replicas.addAll(unhealthyReplicas);
+    requestBuilder.setContainerReplicas(replicas)
+        .setContainerInfo(container);
+
+    OverReplicatedHealthResult result = (OverReplicatedHealthResult)
+        healthCheck.checkHealth(requestBuilder.build());
+    Assert.assertEquals(HealthState.OVER_REPLICATED, result.getHealthState());
+    Assert.assertEquals(1, result.getExcessRedundancy());
+    Assert.assertFalse(result.isReplicatedOkAfterPending());
+
+    Assert.assertTrue(healthCheck.handle(requestBuilder.build()));
+    Assert.assertEquals(0, repQueue.underReplicatedQueueSize());
+    /*
+    We have an excess replica, but we hold off on adding to the over
+    replication queue until all the mismatched replicas match the container
+    state.
+     */
+    Assert.assertEquals(0, repQueue.overReplicatedQueueSize());
+    Assert.assertEquals(1, report.getStat(
+        ReplicationManagerReport.HealthState.OVER_REPLICATED));
+  }
+
+  @Test
+  public void testHandlerReturnsFalseForExcessUnhealthyReplicas() {
+    ContainerInfo container = createContainerInfo(repConfig);
+    Set<ContainerReplica> replicas
+        = createReplicas(container.containerID(), State.CLOSED, 0, 0);
+    ContainerReplica mismatchedReplica =
+        createContainerReplica(container.containerID(), 0, IN_SERVICE,
+            State.CLOSING);
+    Set<ContainerReplica> unhealthyReplicas =
+        createReplicas(container.containerID(), State.UNHEALTHY, 0, 0, 0);
+    replicas.add(mismatchedReplica);
+    replicas.addAll(unhealthyReplicas);
+    requestBuilder.setContainerReplicas(replicas)
+        .setContainerInfo(container);
+    ContainerHealthResult result =
+        healthCheck.checkHealth(requestBuilder.build());
+
+    // this handler does not deal with an excess of unhealthy replicas when
+    // the container is otherwise sufficiently replicated
+    Assert.assertEquals(HealthState.UNHEALTHY, result.getHealthState());
+
+    Assert.assertFalse(healthCheck.handle(requestBuilder.build()));
+    Assert.assertEquals(0, repQueue.underReplicatedQueueSize());
+    Assert.assertEquals(0, repQueue.overReplicatedQueueSize());
+  }
+
   @Test
   public void testOverReplicatedContainerFixedByPending() {
     ContainerInfo container = createContainerInfo(repConfig);


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to