This is an automated email from the ASF dual-hosted git repository.
adoroszlai pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new 66bc7eaa3d HDDS-12535. Intermittent failure in
TestContainerReportHandling (#8060)
66bc7eaa3d is described below
commit 66bc7eaa3d6a9e75a324a3d4edf0a6e5bf5441f9
Author: Peter Lee <[email protected]>
AuthorDate: Fri Mar 14 02:35:44 2025 +0800
HDDS-12535. Intermittent failure in TestContainerReportHandling (#8060)
---
.../container/TestContainerReportHandling.java | 18 +++--------
.../TestContainerReportHandlingWithHA.java | 37 +++++++++-------------
.../apache/hadoop/ozone/container/TestHelper.java | 17 ++++++++++
3 files changed, 37 insertions(+), 35 deletions(-)
diff --git
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestContainerReportHandling.java
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestContainerReportHandling.java
index d7eb78ad16..1aed5b76d3 100644
---
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestContainerReportHandling.java
+++
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestContainerReportHandling.java
@@ -23,11 +23,11 @@
import static
org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_DEADNODE_INTERVAL;
import static
org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_STALENODE_INTERVAL;
import static
org.apache.hadoop.ozone.container.TestHelper.waitForContainerClose;
+import static
org.apache.hadoop.ozone.container.TestHelper.waitForContainerStateInSCM;
import static org.assertj.core.api.Assertions.assertThat;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
-import static org.junit.jupiter.api.Assertions.fail;
import java.io.IOException;
import java.io.OutputStream;
@@ -41,7 +41,6 @@
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import org.apache.hadoop.hdds.scm.container.ContainerID;
import org.apache.hadoop.hdds.scm.container.ContainerManager;
-import org.apache.hadoop.hdds.scm.container.ContainerNotFoundException;
import org.apache.hadoop.ozone.HddsDatanodeService;
import org.apache.hadoop.ozone.MiniOzoneCluster;
import org.apache.hadoop.ozone.client.ObjectStore;
@@ -52,15 +51,12 @@
import org.apache.hadoop.ozone.om.helpers.OmKeyInfo;
import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo;
import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfoGroup;
-import org.apache.ozone.test.GenericTestUtils;
-import org.apache.ozone.test.tag.Flaky;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.EnumSource;
/**
* Tests for container report handling.
*/
-@Flaky("HDDS-12535")
public class TestContainerReportHandling {
private static final String VOLUME = "vol1";
private static final String BUCKET = "bucket1";
@@ -97,6 +93,9 @@ void
testDeletingOrDeletedContainerTransitionsToClosedWhenNonEmptyReplicaIsRepor
ContainerID containerID =
ContainerID.valueOf(keyLocation.getContainerID());
waitForContainerClose(cluster, containerID.getId());
+ // also wait till the container is closed in SCM
+ waitForContainerStateInSCM(cluster.getStorageContainerManager(),
containerID, HddsProtos.LifeCycleState.CLOSED);
+
// move the container to DELETING
ContainerManager containerManager =
cluster.getStorageContainerManager().getContainerManager();
containerManager.updateContainerState(containerID,
HddsProtos.LifeCycleEvent.DELETE);
@@ -111,14 +110,7 @@ void
testDeletingOrDeletedContainerTransitionsToClosedWhenNonEmptyReplicaIsRepor
// restart a DN and wait for the container to get CLOSED.
HddsDatanodeService dn =
cluster.getHddsDatanode(keyLocation.getPipeline().getFirstNode());
cluster.restartHddsDatanode(dn.getDatanodeDetails(), false);
- GenericTestUtils.waitFor(() -> {
- try {
- return containerManager.getContainer(containerID).getState() ==
HddsProtos.LifeCycleState.CLOSED;
- } catch (ContainerNotFoundException e) {
- fail(e);
- }
- return false;
- }, 2000, 20000);
+ waitForContainerStateInSCM(cluster.getStorageContainerManager(),
containerID, HddsProtos.LifeCycleState.CLOSED);
assertEquals(HddsProtos.LifeCycleState.CLOSED,
containerManager.getContainer(containerID).getState());
}
diff --git
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestContainerReportHandlingWithHA.java
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestContainerReportHandlingWithHA.java
index 9517fd9e45..357945a3fa 100644
---
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestContainerReportHandlingWithHA.java
+++
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestContainerReportHandlingWithHA.java
@@ -23,11 +23,11 @@
import static
org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_DEADNODE_INTERVAL;
import static
org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_STALENODE_INTERVAL;
import static
org.apache.hadoop.ozone.container.TestHelper.waitForContainerClose;
+import static
org.apache.hadoop.ozone.container.TestHelper.waitForContainerStateInSCM;
import static org.assertj.core.api.Assertions.assertThat;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertTrue;
-import static org.junit.jupiter.api.Assertions.fail;
import java.io.IOException;
import java.io.OutputStream;
@@ -35,13 +35,14 @@
import java.nio.file.Paths;
import java.util.List;
import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.hdds.client.RatisReplicationConfig;
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.hdds.protocol.proto.HddsProtos;
import org.apache.hadoop.hdds.scm.container.ContainerID;
import org.apache.hadoop.hdds.scm.container.ContainerManager;
-import org.apache.hadoop.hdds.scm.container.ContainerNotFoundException;
+import org.apache.hadoop.hdds.scm.server.StorageContainerManager;
import org.apache.hadoop.ozone.HddsDatanodeService;
import org.apache.hadoop.ozone.MiniOzoneCluster;
import org.apache.hadoop.ozone.MiniOzoneHAClusterImpl;
@@ -53,15 +54,12 @@
import org.apache.hadoop.ozone.om.helpers.OmKeyInfo;
import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfo;
import org.apache.hadoop.ozone.om.helpers.OmKeyLocationInfoGroup;
-import org.apache.ozone.test.GenericTestUtils;
-import org.apache.ozone.test.tag.Flaky;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.EnumSource;
/**
* Tests for container report handling with SCM High Availability.
*/
-@Flaky("HDDS-12535")
public class TestContainerReportHandlingWithHA {
private static final String VOLUME = "vol1";
private static final String BUCKET = "bucket1";
@@ -99,6 +97,8 @@ void
testDeletingOrDeletedContainerTransitionsToClosedWhenNonEmptyReplicaIsRepor
ContainerID containerID =
ContainerID.valueOf(keyLocation.getContainerID());
waitForContainerClose(cluster, containerID.getId());
+ waitForContainerStateInAllSCMs(cluster, containerID,
HddsProtos.LifeCycleState.CLOSED);
+
// move the container to DELETING
ContainerManager containerManager =
cluster.getScmLeader().getContainerManager();
containerManager.updateContainerState(containerID,
HddsProtos.LifeCycleEvent.DELETE);
@@ -113,23 +113,8 @@ void
testDeletingOrDeletedContainerTransitionsToClosedWhenNonEmptyReplicaIsRepor
// restart a DN and wait for the container to get CLOSED in all SCMs
HddsDatanodeService dn =
cluster.getHddsDatanode(keyLocation.getPipeline().getFirstNode());
cluster.restartHddsDatanode(dn.getDatanodeDetails(), false);
- ContainerManager[] array = new ContainerManager[numSCM];
- for (int i = 0; i < numSCM; i++) {
- array[i] =
cluster.getStorageContainerManager(i).getContainerManager();
- }
- GenericTestUtils.waitFor(() -> {
- try {
- for (ContainerManager manager : array) {
- if (manager.getContainer(containerID).getState() !=
HddsProtos.LifeCycleState.CLOSED) {
- return false;
- }
- }
- return true;
- } catch (ContainerNotFoundException e) {
- fail(e);
- }
- return false;
- }, 2000, 20000);
+
+ waitForContainerStateInAllSCMs(cluster, containerID,
HddsProtos.LifeCycleState.CLOSED);
assertEquals(HddsProtos.LifeCycleState.CLOSED,
containerManager.getContainer(containerID).getState());
}
@@ -177,4 +162,12 @@ private void createTestData(OzoneClient client) throws
IOException {
}
}
+ private static void waitForContainerStateInAllSCMs(MiniOzoneHAClusterImpl
cluster, ContainerID containerID,
+ HddsProtos.LifeCycleState desiredState)
+ throws TimeoutException, InterruptedException {
+ for (StorageContainerManager scm :
cluster.getStorageContainerManagersList()) {
+ waitForContainerStateInSCM(scm, containerID, desiredState);
+ }
+ }
+
}
diff --git
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestHelper.java
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestHelper.java
index 0713868586..c454facea0 100644
---
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestHelper.java
+++
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/container/TestHelper.java
@@ -24,6 +24,7 @@
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertInstanceOf;
import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.junit.jupiter.api.Assertions.fail;
import java.io.IOException;
import java.security.MessageDigest;
@@ -455,4 +456,20 @@ public static void setConfig(OzoneConfiguration conf,
String key, String value)
conf.set(key, value);
}
}
+
+ public static void waitForContainerStateInSCM(StorageContainerManager scm,
+ ContainerID containerID, HddsProtos.LifeCycleState expectedState)
+ throws TimeoutException, InterruptedException {
+ ContainerManager containerManager = scm.getContainerManager();
+ GenericTestUtils.waitFor(() -> {
+ try {
+ return containerManager.getContainer(containerID).getState() ==
expectedState;
+ } catch (ContainerNotFoundException e) {
+ LOG.error("Container {} not found while waiting for state {}",
+ containerID, expectedState, e);
+ fail("Container " + containerID + " not found while waiting for state
" + expectedState + ": " + e);
+ return false;
+ }
+ }, 2000, 20000);
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]