This is an automated email from the ASF dual-hosted git repository.

adoroszlai pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/master by this push:
     new 66bc7eaa3d HDDS-12535. Intermittent failure in 
TestContainerReportHandling (#8060)
66bc7eaa3d is described below

commit 66bc7eaa3d6a9e75a324a3d4edf0a6e5bf5441f9
Author: Peter Lee <[email protected]>
AuthorDate: Fri Mar 14 02:35:44 2025 +0800

    HDDS-12535. Intermittent failure in TestContainerReportHandling (#8060)
---
 .../container/TestContainerReportHandling.java     | 18 +++--------
 .../TestContainerReportHandlingWithHA.java         | 37 +++++++++-------------
 .../apache/hadoop/ozone/container/TestHelper.java  | 17 ++++++++++
 3 files changed, 37 insertions(+), 35 deletions(-)

diff --git 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestContainerReportHandling.java
 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestContainerReportHandling.java
index d7eb78ad16..1aed5b76d3 100644
--- 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestContainerReportHandling.java
+++ 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestContainerReportHandling.java
@@ -23,11 +23,11 @@
 import static 
org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_DEADNODE_INTERVAL;
 import static 
org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_STALENODE_INTERVAL;
 import static 
org.apache.hadoop.ozone.container.TestHelper.waitForContainerClose;
+import static 
org.apache.hadoop.ozone.container.TestHelper.waitForContainerStateInSCM;
 import static org.assertj.core.api.Assertions.assertThat;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertNotNull;
 import static org.junit.jupiter.api.Assertions.assertTrue;
-import static org.junit.jupiter.api.Assertions.fail;
 
 import java.io.IOException;
 import java.io.OutputStream;
@@ -41,7 +41,6 @@
 import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
 import org.apache.hadoop.hdds.scm.container.ContainerID;
 import org.apache.hadoop.hdds.scm.container.ContainerManager;
-import org.apache.hadoop.hdds.scm.container.ContainerNotFoundException;
 import org.apache.hadoop.ozone.HddsDatanodeService;
 import org.apache.hadoop.ozone.MiniOzoneCluster;
 import org.apache.hadoop.ozone.client.ObjectStore;
@@ -52,15 +51,12 @@
 import org.apache.hadoop.ozone.om.helpers.OmKeyInfo;
 import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo;
 import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfoGroup;
-import org.apache.ozone.test.GenericTestUtils;
-import org.apache.ozone.test.tag.Flaky;
 import org.junit.jupiter.params.ParameterizedTest;
 import org.junit.jupiter.params.provider.EnumSource;
 
 /**
  * Tests for container report handling.
  */
-@Flaky("HDDS-12535")
 public class TestContainerReportHandling {
   private static final String VOLUME = "vol1";
   private static final String BUCKET = "bucket1";
@@ -97,6 +93,9 @@ void 
testDeletingOrDeletedContainerTransitionsToClosedWhenNonEmptyReplicaIsRepor
         ContainerID containerID = 
ContainerID.valueOf(keyLocation.getContainerID());
         waitForContainerClose(cluster, containerID.getId());
 
+        // also wait till the container is closed in SCM
+        waitForContainerStateInSCM(cluster.getStorageContainerManager(), 
containerID, HddsProtos.LifeCycleState.CLOSED);
+
         // move the container to DELETING
         ContainerManager containerManager = 
cluster.getStorageContainerManager().getContainerManager();
         containerManager.updateContainerState(containerID, 
HddsProtos.LifeCycleEvent.DELETE);
@@ -111,14 +110,7 @@ void 
testDeletingOrDeletedContainerTransitionsToClosedWhenNonEmptyReplicaIsRepor
         // restart a DN and wait for the container to get CLOSED.
         HddsDatanodeService dn = 
cluster.getHddsDatanode(keyLocation.getPipeline().getFirstNode());
         cluster.restartHddsDatanode(dn.getDatanodeDetails(), false);
-        GenericTestUtils.waitFor(() -> {
-          try {
-            return containerManager.getContainer(containerID).getState() == 
HddsProtos.LifeCycleState.CLOSED;
-          } catch (ContainerNotFoundException e) {
-            fail(e);
-          }
-          return false;
-        }, 2000, 20000);
+        waitForContainerStateInSCM(cluster.getStorageContainerManager(), 
containerID, HddsProtos.LifeCycleState.CLOSED);
 
         assertEquals(HddsProtos.LifeCycleState.CLOSED, 
containerManager.getContainer(containerID).getState());
       }
diff --git 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestContainerReportHandlingWithHA.java
 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestContainerReportHandlingWithHA.java
index 9517fd9e45..357945a3fa 100644
--- 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestContainerReportHandlingWithHA.java
+++ 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestContainerReportHandlingWithHA.java
@@ -23,11 +23,11 @@
 import static 
org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_DEADNODE_INTERVAL;
 import static 
org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_STALENODE_INTERVAL;
 import static 
org.apache.hadoop.ozone.container.TestHelper.waitForContainerClose;
+import static 
org.apache.hadoop.ozone.container.TestHelper.waitForContainerStateInSCM;
 import static org.assertj.core.api.Assertions.assertThat;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertNotNull;
 import static org.junit.jupiter.api.Assertions.assertTrue;
-import static org.junit.jupiter.api.Assertions.fail;
 
 import java.io.IOException;
 import java.io.OutputStream;
@@ -35,13 +35,14 @@
 import java.nio.file.Paths;
 import java.util.List;
 import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
 import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.hdds.client.RatisReplicationConfig;
 import org.apache.hadoop.hdds.conf.OzoneConfiguration;
 import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
 import org.apache.hadoop.hdds.scm.container.ContainerID;
 import org.apache.hadoop.hdds.scm.container.ContainerManager;
-import org.apache.hadoop.hdds.scm.container.ContainerNotFoundException;
+import org.apache.hadoop.hdds.scm.server.StorageContainerManager;
 import org.apache.hadoop.ozone.HddsDatanodeService;
 import org.apache.hadoop.ozone.MiniOzoneCluster;
 import org.apache.hadoop.ozone.MiniOzoneHAClusterImpl;
@@ -53,15 +54,12 @@
 import org.apache.hadoop.ozone.om.helpers.OmKeyInfo;
 import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo;
 import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfoGroup;
-import org.apache.ozone.test.GenericTestUtils;
-import org.apache.ozone.test.tag.Flaky;
 import org.junit.jupiter.params.ParameterizedTest;
 import org.junit.jupiter.params.provider.EnumSource;
 
 /**
  * Tests for container report handling with SCM High Availability.
  */
-@Flaky("HDDS-12535")
 public class TestContainerReportHandlingWithHA {
   private static final String VOLUME = "vol1";
   private static final String BUCKET = "bucket1";
@@ -99,6 +97,8 @@ void 
testDeletingOrDeletedContainerTransitionsToClosedWhenNonEmptyReplicaIsRepor
         ContainerID containerID = 
ContainerID.valueOf(keyLocation.getContainerID());
         waitForContainerClose(cluster, containerID.getId());
 
+        waitForContainerStateInAllSCMs(cluster, containerID, 
HddsProtos.LifeCycleState.CLOSED);
+
         // move the container to DELETING
         ContainerManager containerManager = 
cluster.getScmLeader().getContainerManager();
         containerManager.updateContainerState(containerID, 
HddsProtos.LifeCycleEvent.DELETE);
@@ -113,23 +113,8 @@ void 
testDeletingOrDeletedContainerTransitionsToClosedWhenNonEmptyReplicaIsRepor
         // restart a DN and wait for the container to get CLOSED in all SCMs
         HddsDatanodeService dn = 
cluster.getHddsDatanode(keyLocation.getPipeline().getFirstNode());
         cluster.restartHddsDatanode(dn.getDatanodeDetails(), false);
-        ContainerManager[] array = new ContainerManager[numSCM];
-        for (int i = 0; i < numSCM; i++) {
-          array[i] = 
cluster.getStorageContainerManager(i).getContainerManager();
-        }
-        GenericTestUtils.waitFor(() -> {
-          try {
-            for (ContainerManager manager : array) {
-              if (manager.getContainer(containerID).getState() != 
HddsProtos.LifeCycleState.CLOSED) {
-                return false;
-              }
-            }
-            return true;
-          } catch (ContainerNotFoundException e) {
-            fail(e);
-          }
-          return false;
-        }, 2000, 20000);
+
+        waitForContainerStateInAllSCMs(cluster, containerID, 
HddsProtos.LifeCycleState.CLOSED);
 
         assertEquals(HddsProtos.LifeCycleState.CLOSED, 
containerManager.getContainer(containerID).getState());
       }
@@ -177,4 +162,12 @@ private void createTestData(OzoneClient client) throws 
IOException {
     }
   }
 
+  private static void waitForContainerStateInAllSCMs(MiniOzoneHAClusterImpl 
cluster, ContainerID containerID,
+      HddsProtos.LifeCycleState desiredState)
+      throws TimeoutException, InterruptedException {
+    for (StorageContainerManager scm : 
cluster.getStorageContainerManagersList()) {
+      waitForContainerStateInSCM(scm, containerID, desiredState);
+    }
+  }
+
 }
diff --git 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestHelper.java
 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestHelper.java
index 0713868586..c454facea0 100644
--- 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestHelper.java
+++ 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestHelper.java
@@ -24,6 +24,7 @@
 import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.junit.jupiter.api.Assertions.assertInstanceOf;
 import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.junit.jupiter.api.Assertions.fail;
 
 import java.io.IOException;
 import java.security.MessageDigest;
@@ -455,4 +456,20 @@ public static void setConfig(OzoneConfiguration conf, 
String key, String value)
       conf.set(key, value);
     }
   }
+
+  public static void waitForContainerStateInSCM(StorageContainerManager scm,
+      ContainerID containerID, HddsProtos.LifeCycleState expectedState)
+      throws TimeoutException, InterruptedException {
+    ContainerManager containerManager = scm.getContainerManager();
+    GenericTestUtils.waitFor(() -> {
+      try {
+        return containerManager.getContainer(containerID).getState() == 
expectedState;
+      } catch (ContainerNotFoundException e) {
+        LOG.error("Container {} not found while waiting for state {}", 
+            containerID, expectedState, e);
+        fail("Container " + containerID + " not found while waiting for state 
" + expectedState + ": " + e);
+        return false;
+      }
+    }, 2000, 20000);
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to