This is an automated email from the ASF dual-hosted git repository.
sodonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new 5ea64550b5 HDDS-7813. Handle Mismatched Replicas (OPEN or CLOSING) of
QUASI-CLOSED containers in RM (#4195)
5ea64550b5 is described below
commit 5ea64550b5de5609097347789a405bc35b54938e
Author: Siddhant Sangwan <[email protected]>
AuthorDate: Mon Jan 23 21:51:19 2023 +0530
HDDS-7813. Handle Mismatched Replicas (OPEN or CLOSING) of QUASI-CLOSED
containers in RM (#4195)
---
.../container/replication/ReplicationManager.java | 4 +-
...Handler.java => MismatchedReplicasHandler.java} | 44 ++++++++++-------
...ler.java => TestMismatchedReplicasHandler.java} | 56 ++++++++++++++++++++--
3 files changed, 82 insertions(+), 22 deletions(-)
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java
index 665f58fa73..535f0dde9d 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java
@@ -37,7 +37,7 @@ import
org.apache.hadoop.hdds.scm.container.ContainerNotFoundException;
import org.apache.hadoop.hdds.scm.container.ContainerReplica;
import org.apache.hadoop.hdds.scm.container.ReplicationManagerReport;
-import
org.apache.hadoop.hdds.scm.container.replication.health.ClosedWithMismatchedReplicasHandler;
+import
org.apache.hadoop.hdds.scm.container.replication.health.MismatchedReplicasHandler;
import
org.apache.hadoop.hdds.scm.container.replication.health.ClosedWithUnhealthyReplicasHandler;
import
org.apache.hadoop.hdds.scm.container.replication.health.ClosingContainerHandler;
import
org.apache.hadoop.hdds.scm.container.replication.health.DeletingContainerHandler;
@@ -243,7 +243,7 @@ public class ReplicationManager implements SCMService {
containerCheckChain
.addNext(new ClosingContainerHandler(this))
.addNext(new QuasiClosedContainerHandler(this))
- .addNext(new ClosedWithMismatchedReplicasHandler(this))
+ .addNext(new MismatchedReplicasHandler(this))
.addNext(new EmptyContainerHandler(this))
.addNext(new DeletingContainerHandler(this))
.addNext(ecReplicationCheckHandler)
diff --git
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ClosedWithMismatchedReplicasHandler.java
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/MismatchedReplicasHandler.java
similarity index 64%
rename from
hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ClosedWithMismatchedReplicasHandler.java
rename to
hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/MismatchedReplicasHandler.java
index 4428428d17..87730b2398 100644
---
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/ClosedWithMismatchedReplicasHandler.java
+++
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/MismatchedReplicasHandler.java
@@ -29,38 +29,43 @@ import org.slf4j.LoggerFactory;
import java.util.Set;
/**
- * Handler to process containers which are closed, but some replicas are still
- * open or closing. This handler will send a command to the datanodes for each
- * mis-matched replica to close it.
+ * Handler to process containers which are closed or quasi-closed, but some
+ * replicas are still open or closing. This handler will send a command to
+ * the datanodes for each mis-matched replica to close it.
*/
-public class ClosedWithMismatchedReplicasHandler extends AbstractCheck {
+public class MismatchedReplicasHandler extends AbstractCheck {
public static final Logger LOG =
- LoggerFactory.getLogger(ClosedWithMismatchedReplicasHandler.class);
+ LoggerFactory.getLogger(MismatchedReplicasHandler.class);
private ReplicationManager replicationManager;
- public ClosedWithMismatchedReplicasHandler(
+ public MismatchedReplicasHandler(
ReplicationManager replicationManager) {
this.replicationManager = replicationManager;
}
/**
- * Handles CLOSED EC or RATIS container. If some replicas are CLOSING or
- * OPEN, this sends a force-close command for them.
+ * Handles CLOSED EC or CLOSED/QUASI-CLOSED RATIS containers. If some
+ * replicas are CLOSING or OPEN, this tries to close them. Force close
+ * command is sent for CLOSED and close command is sent for QUASI-CLOSED
+ * containers (replicas of quasi-closed containers should move to
+ * quasi-closed state).
+ *
* @param request ContainerCheckRequest object representing the container
- * @return always returns true so that other handlers in the chain can fix
+ * @return always returns false so that other handlers in the chain can fix
* issues such as under replication
*/
@Override
public boolean handle(ContainerCheckRequest request) {
ContainerInfo containerInfo = request.getContainerInfo();
Set<ContainerReplica> replicas = request.getContainerReplicas();
- if (containerInfo.getState() != HddsProtos.LifeCycleState.CLOSED) {
- // Handler is only relevant for CLOSED containers.
+ if (containerInfo.getState() != HddsProtos.LifeCycleState.CLOSED &&
+ containerInfo.getState() != HddsProtos.LifeCycleState.QUASI_CLOSED) {
+ // Handler is only relevant for CLOSED or QUASI-CLOSED containers.
return false;
}
- LOG.debug("Checking container {} in ClosedWithMismatchedReplicasHandler",
+ LOG.debug("Checking container {} in MismatchedReplicasHandler",
containerInfo);
// close replica if its state is OPEN or CLOSING
@@ -68,8 +73,15 @@ public class ClosedWithMismatchedReplicasHandler extends
AbstractCheck {
if (isMismatched(replica)) {
LOG.debug("Sending close command for mismatched replica {} of " +
"container {}.", replica, containerInfo);
- replicationManager.sendCloseContainerReplicaCommand(
- containerInfo, replica.getDatanodeDetails(), true);
+
+ if (containerInfo.getState() == HddsProtos.LifeCycleState.CLOSED) {
+ replicationManager.sendCloseContainerReplicaCommand(
+ containerInfo, replica.getDatanodeDetails(), true);
+ } else if (containerInfo.getState() ==
+ HddsProtos.LifeCycleState.QUASI_CLOSED) {
+ replicationManager.sendCloseContainerReplicaCommand(
+ containerInfo, replica.getDatanodeDetails(), false);
+ }
}
}
@@ -81,8 +93,8 @@ public class ClosedWithMismatchedReplicasHandler extends
AbstractCheck {
}
/**
- * If a CLOSED container has an OPEN or CLOSING replica, there is a state
- * mismatch.
+ * If a CLOSED or QUASI-CLOSED container has an OPEN or CLOSING replica,
+ * there is a state mismatch.
* @param replica replica to check for mismatch
* @return true if the replica is in CLOSING or OPEN state, else false
*/
diff --git
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestClosedWithMismatchedReplicasHandler.java
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestMismatchedReplicasHandler.java
similarity index 79%
rename from
hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestClosedWithMismatchedReplicasHandler.java
rename to
hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestMismatchedReplicasHandler.java
index f2b3a72885..e67c1fa6c6 100644
---
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestClosedWithMismatchedReplicasHandler.java
+++
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestMismatchedReplicasHandler.java
@@ -38,17 +38,18 @@ import java.util.Set;
import static
org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState.CLOSED;
import static
org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState.OPEN;
+import static
org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState.QUASI_CLOSED;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.ArgumentMatchers.anyBoolean;
import static org.mockito.Mockito.times;
/**
- * Tests for the ClosedWithMismatchedReplicasHandler.
+ * Tests for the MismatchedReplicasHandler.
*/
-public class TestClosedWithMismatchedReplicasHandler {
+public class TestMismatchedReplicasHandler {
private ReplicationManager replicationManager;
- private ClosedWithMismatchedReplicasHandler handler;
+ private MismatchedReplicasHandler handler;
private ECReplicationConfig ecReplicationConfig;
private RatisReplicationConfig ratisReplicationConfig;
@@ -58,7 +59,7 @@ public class TestClosedWithMismatchedReplicasHandler {
ratisReplicationConfig = RatisReplicationConfig.getInstance(
HddsProtos.ReplicationFactor.THREE);
replicationManager = Mockito.mock(ReplicationManager.class);
- handler = new ClosedWithMismatchedReplicasHandler(replicationManager);
+ handler = new MismatchedReplicasHandler(replicationManager);
}
@Test
@@ -222,4 +223,51 @@ public class TestClosedWithMismatchedReplicasHandler {
.sendCloseContainerReplicaCommand(
containerInfo, mismatch3.getDatanodeDetails(), true);
}
+
+ /**
+ * Mismatched replicas (OPEN or CLOSING) of a QUASI-CLOSED ratis container
+ * should be sent close (and not force close) commands.
+ */
+ @Test
+ public void testCloseCommandSentForMismatchedReplicaOfQuasiClosedContainer()
{
+ ContainerInfo containerInfo = ReplicationTestUtil.createContainerInfo(
+ ratisReplicationConfig, 1, QUASI_CLOSED);
+ ContainerReplica mismatch1 = ReplicationTestUtil.createContainerReplica(
+ containerInfo.containerID(), 0,
+ HddsProtos.NodeOperationalState.IN_SERVICE,
+ ContainerReplicaProto.State.OPEN);
+ ContainerReplica mismatch2 = ReplicationTestUtil.createContainerReplica(
+ containerInfo.containerID(), 0,
+ HddsProtos.NodeOperationalState.IN_SERVICE,
+ ContainerReplicaProto.State.CLOSING);
+ ContainerReplica mismatch3 = ReplicationTestUtil.createContainerReplica(
+ containerInfo.containerID(), 0,
+ HddsProtos.NodeOperationalState.IN_SERVICE,
+ ContainerReplicaProto.State.UNHEALTHY);
+ Set<ContainerReplica> containerReplicas = new HashSet<>();
+ containerReplicas.add(mismatch1);
+ containerReplicas.add(mismatch2);
+ containerReplicas.add(mismatch3);
+ ContainerCheckRequest request = new ContainerCheckRequest.Builder()
+ .setPendingOps(Collections.emptyList())
+ .setReport(new ReplicationManagerReport())
+ .setContainerInfo(containerInfo)
+ .setContainerReplicas(containerReplicas)
+ .build();
+
+ // this handler always returns false so other handlers can fix issues
+ // such as under replication
+ Assertions.assertFalse(handler.handle(request));
+
+ Mockito.verify(replicationManager, times(1))
+ .sendCloseContainerReplicaCommand(
+ containerInfo, mismatch1.getDatanodeDetails(), false);
+ Mockito.verify(replicationManager, times(1))
+ .sendCloseContainerReplicaCommand(
+ containerInfo, mismatch2.getDatanodeDetails(), false);
+ // close command should not be sent for unhealthy replica
+ Mockito.verify(replicationManager, times(0))
+ .sendCloseContainerReplicaCommand(
+ containerInfo, mismatch3.getDatanodeDetails(), false);
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]