This is an automated email from the ASF dual-hosted git repository.
adoroszlai pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new 857491c230 HDDS-8880. Intermittent fork timeout in
TestOMRatisSnapshots (#5022)
857491c230 is described below
commit 857491c230be99df6ad010d88c7aa446e562271f
Author: Christos Bisias <[email protected]>
AuthorDate: Wed Jul 5 11:28:22 2023 +0300
HDDS-8880. Intermittent fork timeout in TestOMRatisSnapshots (#5022)
---
.../hadoop/ozone/om/TestOMRatisSnapshots.java | 48 ++++++++++++++++------
1 file changed, 35 insertions(+), 13 deletions(-)
diff --git
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java
index 5cd5b78849..b65a17207e 100644
---
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java
+++
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java
@@ -50,12 +50,11 @@ import org.apache.ozone.test.GenericTestUtils;
import org.apache.ozone.test.tag.Flaky;
import org.apache.ratis.server.protocol.TermIndex;
import org.assertj.core.api.Fail;
-import org.junit.Ignore;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeEach;
-import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.TestInfo;
import org.junit.jupiter.api.Timeout;
import org.junit.jupiter.api.io.TempDir;
import org.junit.jupiter.params.ParameterizedTest;
@@ -96,8 +95,6 @@ import static org.junit.jupiter.api.Assertions.assertNotNull;
* Tests the Ratis snapshots feature in OM.
*/
@Timeout(5000)
-@Flaky("HDDS-8876")
-@Disabled("HDDS-8880")
public class TestOMRatisSnapshots {
private MiniOzoneHAClusterImpl cluster = null;
@@ -127,7 +124,7 @@ public class TestOMRatisSnapshots {
* @throws IOException
*/
@BeforeEach
- public void init() throws Exception {
+ public void init(TestInfo testInfo) throws Exception {
conf = new OzoneConfiguration();
clusterId = UUID.randomUUID().toString();
scmId = UUID.randomUUID().toString();
@@ -137,9 +134,16 @@ public class TestOMRatisSnapshots {
StorageUnit.KB);
conf.setStorageSize(OMConfigKeys.
OZONE_OM_RATIS_SEGMENT_PREALLOCATED_SIZE_KEY, 16, StorageUnit.KB);
+ long snapshotThreshold = SNAPSHOT_THRESHOLD;
+ // TODO: refactor tests to run under a new class with different configs.
+ if (testInfo.getTestMethod().isPresent() &&
+ testInfo.getTestMethod().get().getName()
+ .equals("testInstallSnapshot")) {
+ snapshotThreshold = SNAPSHOT_THRESHOLD * 10;
+ }
conf.setLong(
OMConfigKeys.OZONE_OM_RATIS_SNAPSHOT_AUTO_TRIGGER_THRESHOLD_KEY,
- SNAPSHOT_THRESHOLD);
+ snapshotThreshold);
cluster = (MiniOzoneHAClusterImpl) MiniOzoneCluster.newOMHABuilder(conf)
.setClusterId(clusterId)
.setScmId(scmId)
@@ -444,9 +448,15 @@ public class TestOMRatisSnapshots {
// Read & Write after snapshot installed.
List<String> newKeys = writeKeys(1);
readKeys(newKeys);
- assertNotNull(followerOMMetaMngr.getKeyTable(
- TEST_BUCKET_LAYOUT).get(followerOMMetaMngr.getOzoneKey(
- volumeName, bucketName, newKeys.get(0))));
+ GenericTestUtils.waitFor(() -> {
+ try {
+ return followerOMMetaMngr.getKeyTable(TEST_BUCKET_LAYOUT)
+ .get(followerOMMetaMngr.getOzoneKey(
+ volumeName, bucketName, newKeys.get(0))) != null;
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }, 100, 10000);
// Verify follower candidate directory get cleaned
String[] filesInCandidate = followerOM.getOmSnapshotProvider().
@@ -550,6 +560,7 @@ public class TestOMRatisSnapshots {
@Test
@Timeout(300)
+ @Flaky("HDDS-8876")
public void testInstallIncrementalSnapshotWithFailure() throws Exception {
// Get the leader OM
String leaderOMNodeId = OmFailoverProxyUtil
@@ -649,6 +660,11 @@ public class TestOMRatisSnapshots {
.get(followerOMMetaMngr.getOzoneKey(volumeName, bucketName, key)));
}
+ // There is a chance we end up checking the DBCheckpointMetrics
+ // before the follower sends another request to the leader
+ // to generate a checkpoint.
+ // TODO: Add wait check here, to avoid flakiness.
+
// Verify the metrics
DBCheckpointMetrics dbMetrics = leaderOM.getMetrics().
getDBCheckpointMetrics();
@@ -664,9 +680,15 @@ public class TestOMRatisSnapshots {
// Read & Write after snapshot installed.
List<String> newKeys = writeKeys(1);
readKeys(newKeys);
- assertNotNull(followerOMMetaMngr.getKeyTable(
- TEST_BUCKET_LAYOUT).get(followerOMMetaMngr.getOzoneKey(
- volumeName, bucketName, newKeys.get(0))));
+ GenericTestUtils.waitFor(() -> {
+ try {
+ return followerOMMetaMngr.getKeyTable(TEST_BUCKET_LAYOUT)
+ .get(followerOMMetaMngr.getOzoneKey(
+ volumeName, bucketName, newKeys.get(0))) != null;
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }, 100, 10000);
// Verify follower candidate directory get cleaned
String[] filesInCandidate = followerOM.getOmSnapshotProvider().
@@ -675,7 +697,7 @@ public class TestOMRatisSnapshots {
assertEquals(0, filesInCandidate.length);
}
- @Ignore("Enable this unit test after RATIS-1481 used")
+ @Test
public void testInstallSnapshotWithClientWrite() throws Exception {
// Get the leader OM
String leaderOMNodeId = OmFailoverProxyUtil
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]