This is an automated email from the ASF dual-hosted git repository.

adoroszlai pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/master by this push:
     new 04d6e0bc54 HDDS-10231. ContainerStateManager should not finalize OPEN 
containers without a Pipeline. (#6123)
04d6e0bc54 is described below

commit 04d6e0bc5421ac4c666122d6755a4d132fc39cf0
Author: Nandakumar Vadivelu <[email protected]>
AuthorDate: Tue Jan 30 21:50:36 2024 +0530

    HDDS-10231. ContainerStateManager should not finalize OPEN containers 
without a Pipeline. (#6123)
---
 .../scm/container/ReplicationManagerReport.java    |  5 +-
 .../container/TestReplicationManagerReport.java    |  1 +
 .../scm/container/ContainerStateManagerImpl.java   | 10 ++--
 .../container/replication/ReplicationManager.java  | 10 ++++
 .../replication/health/OpenContainerHandler.java   | 24 +++++----
 .../replication/TestReplicationManager.java        | 14 +++++
 .../health/TestOpenContainerHandler.java           | 60 ++++++++++++++++++++++
 7 files changed, 107 insertions(+), 17 deletions(-)

diff --git 
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ReplicationManagerReport.java
 
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ReplicationManagerReport.java
index 1dbbc73843..a10846bd61 100644
--- 
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ReplicationManagerReport.java
+++ 
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/container/ReplicationManagerReport.java
@@ -74,7 +74,10 @@ public class ReplicationManagerReport {
         "OpenUnhealthyContainers"),
     QUASI_CLOSED_STUCK(
         "Containers QuasiClosed with insufficient datanode origins",
-        "StuckQuasiClosedContainers");
+        "StuckQuasiClosedContainers"),
+    OPEN_WITHOUT_PIPELINE(
+        "Containers in OPEN state without any healthy Pipeline",
+        "OpenContainersWithoutPipeline");
 
     private String description;
     private String metricName;
diff --git 
a/hadoop-hdds/common/src/test/java/org/apache/hadoop/hdds/scm/container/TestReplicationManagerReport.java
 
b/hadoop-hdds/common/src/test/java/org/apache/hadoop/hdds/scm/container/TestReplicationManagerReport.java
index 3bf2ef4023..f022a6030c 100644
--- 
a/hadoop-hdds/common/src/test/java/org/apache/hadoop/hdds/scm/container/TestReplicationManagerReport.java
+++ 
b/hadoop-hdds/common/src/test/java/org/apache/hadoop/hdds/scm/container/TestReplicationManagerReport.java
@@ -112,6 +112,7 @@ class TestReplicationManagerReport {
     assertEquals(0, stats.get("EMPTY").longValue());
     assertEquals(0, stats.get("OPEN_UNHEALTHY").longValue());
     assertEquals(0, stats.get("QUASI_CLOSED_STUCK").longValue());
+    assertEquals(0, stats.get("OPEN_WITHOUT_PIPELINE").longValue());
 
     JsonNode samples = json.get("samples");
     assertEquals(ARRAY, samples.get("UNDER_REPLICATED").getNodeType());
diff --git 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java
 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java
index 72d90abe1f..62e1f01935 100644
--- 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java
+++ 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/ContainerStateManagerImpl.java
@@ -251,16 +251,12 @@ public final class ContainerStateManagerImpl
             pipelineManager.addContainerToPipelineSCMStart(
                 container.getPipelineID(), container.containerID());
           } catch (PipelineNotFoundException ex) {
+            // We are ignoring this here. The container will be moved to
+            // CLOSING state by ReplicationManager's OpenContainerHandler
+            // For more info: HDDS-10231
             LOG.warn("Found container {} which is in OPEN state with " +
                 "pipeline {} that does not exist. Marking container for " +
                 "closing.", container, container.getPipelineID());
-            try {
-              updateContainerState(container.containerID().getProtobuf(),
-                  LifeCycleEvent.FINALIZE);
-            } catch (InvalidStateTransitionException e) {
-              // This cannot happen.
-              LOG.warn("Unable to finalize Container {}.", container);
-            }
           }
         }
       }
diff --git 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java
 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java
index 979cff799f..a3661243be 100644
--- 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java
+++ 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/ReplicationManager.java
@@ -61,6 +61,7 @@ import org.apache.hadoop.hdds.scm.ha.SCMService;
 import org.apache.hadoop.hdds.scm.node.NodeManager;
 import org.apache.hadoop.hdds.scm.node.NodeStatus;
 import org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException;
+import org.apache.hadoop.hdds.scm.pipeline.PipelineNotFoundException;
 import org.apache.hadoop.hdds.scm.server.StorageContainerManager;
 import org.apache.hadoop.hdds.server.events.EventPublisher;
 import 
org.apache.hadoop.ozone.common.statemachine.InvalidStateTransitionException;
@@ -1545,5 +1546,14 @@ public class ReplicationManager implements SCMService {
   private static boolean isEC(ReplicationConfig replicationConfig) {
     return replicationConfig.getReplicationType() == EC;
   }
+
+  public boolean hasHealthyPipeline(ContainerInfo container) {
+    try {
+      return scmContext.getScm().getPipelineManager()
+          .getPipeline(container.getPipelineID()) != null;
+    } catch (PipelineNotFoundException e) {
+      return false;
+    }
+  }
 }
 
diff --git 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/OpenContainerHandler.java
 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/OpenContainerHandler.java
index 2c0b405db9..21c3c76d3e 100644
--- 
a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/OpenContainerHandler.java
+++ 
b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/OpenContainerHandler.java
@@ -53,20 +53,26 @@ public class OpenContainerHandler extends AbstractCheck {
     if (containerInfo.getState() == HddsProtos.LifeCycleState.OPEN) {
       LOG.debug("Checking open container {} in OpenContainerHandler",
           containerInfo);
-      if (!isOpenContainerHealthy(
-          containerInfo, request.getContainerReplicas())) {
-        // This is an unhealthy open container, so we need to trigger the
-        // close process on it.
-        LOG.debug("Container {} is open but unhealthy. Triggering close.",
-            containerInfo);
-        request.getReport().incrementAndSample(
-            ReplicationManagerReport.HealthState.OPEN_UNHEALTHY,
+      final boolean noPipeline = 
!replicationManager.hasHealthyPipeline(containerInfo);
+      // Minor optimization. If noPipeline is true, isOpenContainerHealthy 
will not
+      // be called.
+      final boolean unhealthy = noPipeline || 
!isOpenContainerHealthy(containerInfo,
+          request.getContainerReplicas());
+      if (unhealthy) {
+        // For an OPEN container, we close the container
+        // if the container has no Pipeline or if the container is unhealthy.
+        LOG.info("Container {} is open but {}. Triggering close.",
+            containerInfo, noPipeline ? "has no Pipeline" : "unhealthy");
+
+        request.getReport().incrementAndSample(noPipeline ?
+                ReplicationManagerReport.HealthState.OPEN_WITHOUT_PIPELINE :
+                ReplicationManagerReport.HealthState.OPEN_UNHEALTHY,
             containerInfo.containerID());
+
         if (!request.isReadOnly()) {
           replicationManager
               .sendCloseContainerEvent(containerInfo.containerID());
         }
-        return true;
       }
       // For open containers we do not want to do any further processing in RM
       // so return true to stop the command chain.
diff --git 
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManager.java
 
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManager.java
index c67008c097..47844f32fb 100644
--- 
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManager.java
+++ 
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManager.java
@@ -28,6 +28,7 @@ import org.apache.hadoop.hdds.protocol.MockDatanodeDetails;
 import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
 import 
org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.ContainerReplicaProto;
 import 
org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.SCMCommandProto;
+import org.apache.hadoop.hdds.scm.HddsTestUtils;
 import org.apache.hadoop.hdds.scm.PlacementPolicy;
 import org.apache.hadoop.hdds.scm.container.ContainerID;
 import org.apache.hadoop.hdds.scm.container.ContainerInfo;
@@ -43,6 +44,9 @@ import org.apache.hadoop.hdds.scm.ha.SCMServiceManager;
 import org.apache.hadoop.hdds.scm.node.NodeManager;
 import org.apache.hadoop.hdds.scm.node.NodeStatus;
 import org.apache.hadoop.hdds.scm.node.states.NodeNotFoundException;
+import org.apache.hadoop.hdds.scm.pipeline.PipelineManager;
+import org.apache.hadoop.hdds.scm.server.StorageContainerManager;
+import org.apache.hadoop.hdds.security.token.ContainerTokenGenerator;
 import org.apache.hadoop.hdds.server.events.EventPublisher;
 import org.apache.hadoop.ozone.protocol.commands.DeleteContainerCommand;
 import 
org.apache.hadoop.ozone.protocol.commands.ReconstructECContainersCommand;
@@ -174,6 +178,16 @@ public class TestReplicationManager {
     // Ensure that RM will run when asked.
     when(scmContext.isLeaderReady()).thenReturn(true);
     when(scmContext.isInSafeMode()).thenReturn(false);
+
+    PipelineManager pipelineManager = mock(PipelineManager.class);
+    when(pipelineManager.getPipeline(any()))
+        .thenReturn(HddsTestUtils.getRandomPipeline());
+
+    StorageContainerManager scm = mock(StorageContainerManager.class);
+    when(scm.getPipelineManager()).thenReturn(pipelineManager);
+    
when(scm.getContainerTokenGenerator()).thenReturn(ContainerTokenGenerator.DISABLED);
+
+    when(scmContext.getScm()).thenReturn(scm);
   }
 
   private ReplicationManager createReplicationManager() throws IOException {
diff --git 
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestOpenContainerHandler.java
 
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestOpenContainerHandler.java
index a950008ec9..dec61610d1 100644
--- 
a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestOpenContainerHandler.java
+++ 
b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestOpenContainerHandler.java
@@ -24,17 +24,20 @@ import 
org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolPro
 import org.apache.hadoop.hdds.scm.container.ContainerInfo;
 import org.apache.hadoop.hdds.scm.container.ContainerReplica;
 import org.apache.hadoop.hdds.scm.container.ReplicationManagerReport;
+import 
org.apache.hadoop.hdds.scm.container.ReplicationManagerReport.HealthState;
 import org.apache.hadoop.hdds.scm.container.replication.ContainerCheckRequest;
 import org.apache.hadoop.hdds.scm.container.replication.ReplicationManager;
 import org.apache.hadoop.hdds.scm.container.replication.ReplicationTestUtil;
 import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Test;
+import org.mockito.Mockito;
 
 import java.util.Collections;
 import java.util.Set;
 
 import static 
org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState.CLOSED;
 import static 
org.apache.hadoop.hdds.protocol.proto.HddsProtos.LifeCycleState.OPEN;
+import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.mockito.Mockito.mock;
@@ -58,6 +61,7 @@ public class TestOpenContainerHandler {
     ratisReplicationConfig = RatisReplicationConfig.getInstance(
         HddsProtos.ReplicationFactor.THREE);
     replicationManager = mock(ReplicationManager.class);
+    
Mockito.when(replicationManager.hasHealthyPipeline(any())).thenReturn(true);
     openContainerHandler = new OpenContainerHandler(replicationManager);
   }
 
@@ -119,8 +123,36 @@ public class TestOpenContainerHandler {
     assertTrue(openContainerHandler.handle(readRequest));
     verify(replicationManager, times(1))
         .sendCloseContainerEvent(containerInfo.containerID());
+    assertEquals(1, request.getReport().getStat(HealthState.OPEN_UNHEALTHY));
   }
 
+  @Test
+  public void testOpenContainerWithoutPipelineIsClosed() {
+    
Mockito.when(replicationManager.hasHealthyPipeline(any())).thenReturn(false);
+    ContainerInfo containerInfo = ReplicationTestUtil.createContainerInfo(
+        ecReplicationConfig, 1, OPEN);
+    Set<ContainerReplica> containerReplicas = ReplicationTestUtil
+        .createReplicas(containerInfo.containerID(),
+            ContainerReplicaProto.State.OPEN, 1, 2, 3, 4);
+    ContainerCheckRequest request = new ContainerCheckRequest.Builder()
+        .setPendingOps(Collections.emptyList())
+        .setReport(new ReplicationManagerReport())
+        .setContainerInfo(containerInfo)
+        .setContainerReplicas(containerReplicas)
+        .build();
+    ContainerCheckRequest readRequest = new ContainerCheckRequest.Builder()
+        .setPendingOps(Collections.emptyList())
+        .setReport(new ReplicationManagerReport())
+        .setContainerInfo(containerInfo)
+        .setContainerReplicas(containerReplicas)
+        .setReadOnly(true)
+        .build();
+    assertTrue(openContainerHandler.handle(request));
+    assertTrue(openContainerHandler.handle(readRequest));
+    verify(replicationManager, times(1))
+        .sendCloseContainerEvent(containerInfo.containerID());
+    assertEquals(1, 
request.getReport().getStat(HealthState.OPEN_WITHOUT_PIPELINE));
+  }
   @Test
   public void testClosedRatisContainerReturnsFalse() {
     ContainerInfo containerInfo = ReplicationTestUtil.createContainerInfo(
@@ -178,5 +210,33 @@ public class TestOpenContainerHandler {
     assertTrue(openContainerHandler.handle(request));
     assertTrue(openContainerHandler.handle(readRequest));
     verify(replicationManager, times(1)).sendCloseContainerEvent(any());
+    assertEquals(1, request.getReport().getStat(HealthState.OPEN_UNHEALTHY));
+  }
+
+  @Test
+  public void testOpenRatisContainerWithoutPipelineIsClosed() {
+    
Mockito.when(replicationManager.hasHealthyPipeline(any())).thenReturn(false);
+    ContainerInfo containerInfo = ReplicationTestUtil.createContainerInfo(
+        ratisReplicationConfig, 1, OPEN);
+    Set<ContainerReplica> containerReplicas = ReplicationTestUtil
+        .createReplicas(containerInfo.containerID(),
+            ContainerReplicaProto.State.OPEN, 0, 0, 0);
+    ContainerCheckRequest request = new ContainerCheckRequest.Builder()
+        .setPendingOps(Collections.emptyList())
+        .setReport(new ReplicationManagerReport())
+        .setContainerInfo(containerInfo)
+        .setContainerReplicas(containerReplicas)
+        .build();
+    ContainerCheckRequest readRequest = new ContainerCheckRequest.Builder()
+        .setPendingOps(Collections.emptyList())
+        .setReport(new ReplicationManagerReport())
+        .setContainerInfo(containerInfo)
+        .setContainerReplicas(containerReplicas)
+        .setReadOnly(true)
+        .build();
+    assertTrue(openContainerHandler.handle(request));
+    assertTrue(openContainerHandler.handle(readRequest));
+    verify(replicationManager, times(1)).sendCloseContainerEvent(any());
+    assertEquals(1, 
request.getReport().getStat(HealthState.OPEN_WITHOUT_PIPELINE));
   }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to