This is an automated email from the ASF dual-hosted git repository.

adoroszlai pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/master by this push:
     new 857491c230 HDDS-8880. Intermittent fork timeout in 
TestOMRatisSnapshots (#5022)
857491c230 is described below

commit 857491c230be99df6ad010d88c7aa446e562271f
Author: Christos Bisias <[email protected]>
AuthorDate: Wed Jul 5 11:28:22 2023 +0300

    HDDS-8880. Intermittent fork timeout in TestOMRatisSnapshots (#5022)
---
 .../hadoop/ozone/om/TestOMRatisSnapshots.java      | 48 ++++++++++++++++------
 1 file changed, 35 insertions(+), 13 deletions(-)

diff --git 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java
 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java
index 5cd5b78849..b65a17207e 100644
--- 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java
+++ 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOMRatisSnapshots.java
@@ -50,12 +50,11 @@ import org.apache.ozone.test.GenericTestUtils;
 import org.apache.ozone.test.tag.Flaky;
 import org.apache.ratis.server.protocol.TermIndex;
 import org.assertj.core.api.Fail;
-import org.junit.Ignore;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.BeforeEach;
-import org.junit.jupiter.api.Disabled;
 import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.TestInfo;
 import org.junit.jupiter.api.Timeout;
 import org.junit.jupiter.api.io.TempDir;
 import org.junit.jupiter.params.ParameterizedTest;
@@ -96,8 +95,6 @@ import static org.junit.jupiter.api.Assertions.assertNotNull;
  * Tests the Ratis snapshots feature in OM.
  */
 @Timeout(5000)
-@Flaky("HDDS-8876")
-@Disabled("HDDS-8880")
 public class TestOMRatisSnapshots {
 
   private MiniOzoneHAClusterImpl cluster = null;
@@ -127,7 +124,7 @@ public class TestOMRatisSnapshots {
    * @throws IOException
    */
   @BeforeEach
-  public void init() throws Exception {
+  public void init(TestInfo testInfo) throws Exception {
     conf = new OzoneConfiguration();
     clusterId = UUID.randomUUID().toString();
     scmId = UUID.randomUUID().toString();
@@ -137,9 +134,16 @@ public class TestOMRatisSnapshots {
         StorageUnit.KB);
     conf.setStorageSize(OMConfigKeys.
         OZONE_OM_RATIS_SEGMENT_PREALLOCATED_SIZE_KEY, 16, StorageUnit.KB);
+    long snapshotThreshold = SNAPSHOT_THRESHOLD;
+    // TODO: refactor tests to run under a new class with different configs.
+    if (testInfo.getTestMethod().isPresent() &&
+        testInfo.getTestMethod().get().getName()
+            .equals("testInstallSnapshot")) {
+      snapshotThreshold = SNAPSHOT_THRESHOLD * 10;
+    }
     conf.setLong(
         OMConfigKeys.OZONE_OM_RATIS_SNAPSHOT_AUTO_TRIGGER_THRESHOLD_KEY,
-        SNAPSHOT_THRESHOLD);
+        snapshotThreshold);
     cluster = (MiniOzoneHAClusterImpl) MiniOzoneCluster.newOMHABuilder(conf)
         .setClusterId(clusterId)
         .setScmId(scmId)
@@ -444,9 +448,15 @@ public class TestOMRatisSnapshots {
     // Read & Write after snapshot installed.
     List<String> newKeys = writeKeys(1);
     readKeys(newKeys);
-    assertNotNull(followerOMMetaMngr.getKeyTable(
-        TEST_BUCKET_LAYOUT).get(followerOMMetaMngr.getOzoneKey(
-        volumeName, bucketName, newKeys.get(0))));
+    GenericTestUtils.waitFor(() -> {
+      try {
+        return followerOMMetaMngr.getKeyTable(TEST_BUCKET_LAYOUT)
+            .get(followerOMMetaMngr.getOzoneKey(
+                volumeName, bucketName, newKeys.get(0))) != null;
+      } catch (IOException e) {
+        throw new RuntimeException(e);
+      }
+    }, 100, 10000);
 
     // Verify follower candidate directory get cleaned
     String[] filesInCandidate = followerOM.getOmSnapshotProvider().
@@ -550,6 +560,7 @@ public class TestOMRatisSnapshots {
 
   @Test
   @Timeout(300)
+  @Flaky("HDDS-8876")
   public void testInstallIncrementalSnapshotWithFailure() throws Exception {
     // Get the leader OM
     String leaderOMNodeId = OmFailoverProxyUtil
@@ -649,6 +660,11 @@ public class TestOMRatisSnapshots {
           .get(followerOMMetaMngr.getOzoneKey(volumeName, bucketName, key)));
     }
 
+    // There is a chance we end up checking the DBCheckpointMetrics
+    // before the follower sends another request to the leader
+    // to generate a checkpoint.
+    // TODO: Add wait check here, to avoid flakiness.
+
     // Verify the metrics
     DBCheckpointMetrics dbMetrics = leaderOM.getMetrics().
         getDBCheckpointMetrics();
@@ -664,9 +680,15 @@ public class TestOMRatisSnapshots {
     // Read & Write after snapshot installed.
     List<String> newKeys = writeKeys(1);
     readKeys(newKeys);
-    assertNotNull(followerOMMetaMngr.getKeyTable(
-        TEST_BUCKET_LAYOUT).get(followerOMMetaMngr.getOzoneKey(
-        volumeName, bucketName, newKeys.get(0))));
+    GenericTestUtils.waitFor(() -> {
+      try {
+        return followerOMMetaMngr.getKeyTable(TEST_BUCKET_LAYOUT)
+            .get(followerOMMetaMngr.getOzoneKey(
+                volumeName, bucketName, newKeys.get(0))) != null;
+      } catch (IOException e) {
+        throw new RuntimeException(e);
+      }
+    }, 100, 10000);
 
     // Verify follower candidate directory get cleaned
     String[] filesInCandidate = followerOM.getOmSnapshotProvider().
@@ -675,7 +697,7 @@ public class TestOMRatisSnapshots {
     assertEquals(0, filesInCandidate.length);
   }
 
-  @Ignore("Enable this unit test after RATIS-1481 used")
+  @Test
   public void testInstallSnapshotWithClientWrite() throws Exception {
     // Get the leader OM
     String leaderOMNodeId = OmFailoverProxyUtil


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to