This is an automated email from the ASF dual-hosted git repository.

adoroszlai pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git


The following commit(s) were added to refs/heads/master by this push:
     new 29cfb0e5f5 HDDS-9429. Intermittent error in 
TestOzoneManagerHAMetadataOnly#testAllVolumeOperations (#5476)
29cfb0e5f5 is described below

commit 29cfb0e5f511c3084f79d7e2932aef7cf4188997
Author: Doroszlai, Attila <[email protected]>
AuthorDate: Tue Oct 24 18:53:17 2023 +0200

    HDDS-9429. Intermittent error in 
TestOzoneManagerHAMetadataOnly#testAllVolumeOperations (#5476)
---
 .../apache/hadoop/ozone/MiniOzoneClusterImpl.java  |  8 +++----
 .../hadoop/ozone/MiniOzoneHAClusterImpl.java       | 26 +++++++++++-----------
 .../apache/hadoop/ozone/om/TestOzoneManagerHA.java | 23 +++++++++++++++++--
 .../ozone/om/TestOzoneManagerHAMetadataOnly.java   |  2 --
 .../hadoop/ozone/om/TestOzoneManagerHAMetrics.java | 20 -----------------
 .../hadoop/ozone/om/TestOzoneManagerPrepare.java   | 15 ++++++-------
 .../org/apache/hadoop/ozone/om/OzoneManager.java   |  6 +++--
 .../hadoop/ozone/om/OzoneManagerStarter.java       |  5 +++--
 8 files changed, 51 insertions(+), 54 deletions(-)

diff --git 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java
 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java
index cd21063156..d904d1372d 100644
--- 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java
+++ 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java
@@ -372,7 +372,7 @@ public class MiniOzoneClusterImpl implements 
MiniOzoneCluster {
 
   @Override
   public void restartOzoneManager() throws IOException {
-    ozoneManager.stop();
+    stopOM(ozoneManager);
     ozoneManager.restart();
   }
 
@@ -551,10 +551,8 @@ public class MiniOzoneClusterImpl implements 
MiniOzoneCluster {
     }
   }
 
-  private static void stopOM(OzoneManager om) {
-    if (om != null) {
-      LOG.info("Stopping the OzoneManager");
-      om.stop();
+  protected static void stopOM(OzoneManager om) {
+    if (om != null && om.stop()) {
       om.join();
     }
   }
diff --git 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneHAClusterImpl.java
 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneHAClusterImpl.java
index 80fdd5ddb6..8e37a319be 100644
--- 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneHAClusterImpl.java
+++ 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneHAClusterImpl.java
@@ -221,8 +221,11 @@ public class MiniOzoneHAClusterImpl extends 
MiniOzoneClusterImpl {
 
   @Override
   public void restartOzoneManager() throws IOException {
-    for (OzoneManager ozoneManager : this.omhaService.getServices()) {
-      ozoneManager.stop();
+    for (OzoneManager ozoneManager : omhaService.getActiveServices()) {
+      stopOM(ozoneManager);
+    }
+    omhaService.inactiveServices().forEachRemaining(omhaService::activate);
+    for (OzoneManager ozoneManager : omhaService.getServices()) {
       ozoneManager.restart();
     }
   }
@@ -301,8 +304,7 @@ public class MiniOzoneHAClusterImpl extends 
MiniOzoneClusterImpl {
     for (OzoneManager ozoneManager : this.omhaService.getServices()) {
       if (ozoneManager != null) {
         LOG.info("Stopping the OzoneManager {}", ozoneManager.getOMNodeId());
-        ozoneManager.stop();
-        ozoneManager.join();
+        stopOM(ozoneManager);
       }
     }
 
@@ -317,17 +319,16 @@ public class MiniOzoneHAClusterImpl extends 
MiniOzoneClusterImpl {
   }
 
   public void stopOzoneManager(int index) {
-    OzoneManager om = omhaService.getServices().get(index);
-    om.stop();
-    om.join();
+    stopAndDeactivate(omhaService.getServices().get(index));
+  }
+
+  private void stopAndDeactivate(OzoneManager om) {
+    stopOM(om);
     omhaService.deactivate(om);
   }
 
   public void stopOzoneManager(String omNodeId) {
-    OzoneManager om = omhaService.getServiceById(omNodeId);
-    om.stop();
-    om.join();
-    omhaService.deactivate(om);
+    stopAndDeactivate(omhaService.getServiceById(omNodeId));
   }
 
   private static void configureOMPorts(ConfigurationTarget conf,
@@ -514,8 +515,7 @@ public class MiniOzoneHAClusterImpl extends 
MiniOzoneClusterImpl {
           break;
         } catch (BindException e) {
           for (OzoneManager om : omList) {
-            om.stop();
-            om.join();
+            stopOM(om);
             LOG.info("Stopping OzoneManager server at {}",
                 om.getOmRpcServerAddr());
           }
diff --git 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerHA.java
 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerHA.java
index 8197cdf42c..86977d2490 100644
--- 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerHA.java
+++ 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerHA.java
@@ -44,6 +44,7 @@ import org.apache.ozone.test.GenericTestUtils;
 import org.junit.jupiter.api.AfterAll;
 import org.junit.jupiter.api.AfterEach;
 import org.junit.jupiter.api.BeforeAll;
+import org.junit.jupiter.api.BeforeEach;
 import org.junit.jupiter.api.Timeout;
 
 import java.io.IOException;
@@ -52,6 +53,8 @@ import java.time.Duration;
 import java.util.Iterator;
 import java.util.UUID;
 import java.util.HashMap;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
 
 import static java.nio.charset.StandardCharsets.UTF_8;
 import static 
org.apache.hadoop.fs.CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_KEY;
@@ -64,6 +67,7 @@ import static 
org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_CLIENT_FAILOVER_MAX_
 import static org.apache.hadoop.ozone.OzoneConsts.OM_KEY_PREFIX;
 import static 
org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_DEFAULT_BUCKET_LAYOUT;
 import static 
org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_KEY_DELETING_LIMIT_PER_TASK;
+import static 
org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_RATIS_SERVER_FAILURE_TIMEOUT_DURATION_DEFAULT;
 import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 import static org.junit.jupiter.api.Assertions.fail;
@@ -198,13 +202,20 @@ public abstract class TestOzoneManagerHA {
     objectStore = client.getObjectStore();
   }
 
+  /**
+   * After restarting OMs we need to wait
+   * for a leader to be elected and ready.
+   */
+  @BeforeEach
+  protected void setup() throws Exception {
+    waitForLeaderToBeReady();
+  }
 
   /**
    * Reset cluster between tests.
    */
   @AfterEach
-  public void resetCluster()
-      throws IOException {
+  public void resetCluster() throws Exception {
     if (cluster != null) {
       cluster.restartOzoneManager();
     }
@@ -470,4 +481,12 @@ public abstract class TestOzoneManagerHA {
     }
   }
 
+  protected void waitForLeaderToBeReady()
+      throws InterruptedException, TimeoutException {
+    // Wait for Leader Election timeout
+    int timeout = OZONE_OM_RATIS_SERVER_FAILURE_TIMEOUT_DURATION_DEFAULT
+        .toIntExact(TimeUnit.MILLISECONDS);
+    GenericTestUtils.waitFor(() ->
+        getCluster().getOMLeader() != null, 500, timeout);
+  }
 }
diff --git 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerHAMetadataOnly.java
 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerHAMetadataOnly.java
index a43d7c9bf9..a1a5477d14 100644
--- 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerHAMetadataOnly.java
+++ 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerHAMetadataOnly.java
@@ -96,7 +96,6 @@ public class TestOzoneManagerHAMetadataOnly extends 
TestOzoneManagerHA {
 
   @Test
   public void testAllVolumeOperations() throws Exception {
-
     String volumeName = "volume" + RandomStringUtils.randomNumeric(5);
 
     createAndCheckVolume(volumeName);
@@ -114,7 +113,6 @@ public class TestOzoneManagerHAMetadataOnly extends 
TestOzoneManagerHA {
 
   @Test
   public void testAllBucketOperations() throws Exception {
-
     String volumeName = "volume" + RandomStringUtils.randomNumeric(5);
     String bucketName = "volume" + RandomStringUtils.randomNumeric(5);
 
diff --git 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerHAMetrics.java
 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerHAMetrics.java
index 2b18babc7b..3422494710 100644
--- 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerHAMetrics.java
+++ 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerHAMetrics.java
@@ -17,15 +17,10 @@
 package org.apache.hadoop.ozone.om;
 
 import org.apache.hadoop.ozone.om.ha.OMHAMetrics;
-import org.apache.ozone.test.GenericTestUtils;
 import org.junit.jupiter.api.Assertions;
 import org.junit.jupiter.api.Test;
 
 import java.util.List;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.TimeoutException;
-
-import static 
org.apache.hadoop.ozone.om.OMConfigKeys.OZONE_OM_RATIS_SERVER_FAILURE_TIMEOUT_DURATION_DEFAULT;
 
 /**
  * Test Ozone Manager HA Metrics.
@@ -34,8 +29,6 @@ public class TestOzoneManagerHAMetrics extends 
TestOzoneManagerHA {
 
   @Test
   public void testOMHAMetrics() throws Exception {
-    waitForLeaderToBeReady();
-
     // Get leader OM
     OzoneManager leaderOM = getCluster().getOMLeader();
     // Store current leader's node ID,
@@ -88,17 +81,4 @@ public class TestOzoneManagerHAMetrics extends 
TestOzoneManagerHA {
     }
   }
 
-
-  /**
-   * After restarting OMs we need to wait
-   * for a leader to be elected and ready.
-   */
-  private void waitForLeaderToBeReady()
-      throws InterruptedException, TimeoutException {
-    // Wait for Leader Election timeout
-    int timeout = OZONE_OM_RATIS_SERVER_FAILURE_TIMEOUT_DURATION_DEFAULT
-        .toIntExact(TimeUnit.MILLISECONDS);
-    GenericTestUtils.waitFor(() ->
-        getCluster().getOMLeader() != null, 500, timeout);
-  }
 }
diff --git 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerPrepare.java
 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerPrepare.java
index 20b343dbf9..9cb6395a6f 100644
--- 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerPrepare.java
+++ 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerPrepare.java
@@ -51,7 +51,6 @@ import org.apache.hadoop.ozone.container.TestHelper;
 import org.apache.hadoop.ozone.om.exceptions.OMException;
 import org.apache.hadoop.ozone.om.helpers.OmKeyInfo;
 import 
org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos.PrepareStatusResponse.PrepareStatus;
-import org.apache.ozone.test.GenericTestUtils;
 import org.apache.ozone.test.LambdaTestUtils;
 import org.apache.ozone.test.tag.Slow;
 import org.apache.ozone.test.tag.Unhealthy;
@@ -83,7 +82,7 @@ public class TestOzoneManagerPrepare extends 
TestOzoneManagerHA {
   private static final Logger LOG =
       LoggerFactory.getLogger(TestOzoneManagerPrepare.class);
 
-  public void setup() throws Exception {
+  private void initInstanceVariables() {
     cluster = getCluster();
     store = getObjectStore();
     clientProtocol = store.getClientProxy();
@@ -91,14 +90,14 @@ public class TestOzoneManagerPrepare extends 
TestOzoneManagerHA {
 
   /**
    * Make sure OM is out of Prepare state before executing individual tests.
-   * @throws Exception
    */
   @BeforeEach
-  public void initOM() throws Exception {
-    setup();
+  @Override
+  protected void setup() throws Exception {
+    initInstanceVariables();
+
     LOG.info("Waiting for OM leader election");
-    GenericTestUtils.waitFor(() -> cluster.getOMLeader() != null,
-        1000, 120_000);
+    waitForLeaderToBeReady();
     submitCancelPrepareRequest();
     assertClusterNotPrepared();
   }
@@ -202,7 +201,7 @@ public class TestOzoneManagerPrepare extends 
TestOzoneManagerHA {
     // modified cluster.
     shutdown();
     init();
-    setup();
+    initInstanceVariables();
 
     String volumeName = VOLUME + UUID.randomUUID().toString();
     writeKeysAndWaitForLogs(volumeName, 10);
diff --git 
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java
 
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java
index 27dcd689ee..60ccbfbdf8 100644
--- 
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java
+++ 
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManager.java
@@ -2209,10 +2209,10 @@ public final class OzoneManager extends 
ServiceRuntimeInfoImpl
   /**
    * Stop service.
    */
-  public void stop() {
+  public boolean stop() {
     LOG.info("{}: Stopping Ozone Manager", omNodeDetails.getOMPrintInfo());
     if (isStopped()) {
-      return;
+      return false;
     }
     try {
       omState = State.STOPPED;
@@ -2270,9 +2270,11 @@ public final class OzoneManager extends 
ServiceRuntimeInfoImpl
       if (omhaMetrics != null) {
         OMHAMetrics.unRegister();
       }
+      return true;
     } catch (Exception e) {
       LOG.error("OzoneManager stop failed.", e);
     }
+    return false;
   }
 
   public void shutDown(String message) {
diff --git 
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManagerStarter.java
 
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManagerStarter.java
index 21e53315bf..b307873911 100644
--- 
a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManagerStarter.java
+++ 
b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OzoneManagerStarter.java
@@ -190,8 +190,9 @@ public class OzoneManagerStarter extends GenericCli {
       om.start();
       ShutdownHookManager.get().addShutdownHook(() -> {
         try {
-          om.stop();
-          om.join();
+          if (om.stop()) {
+            om.join();
+          }
         } catch (Exception e) {
           LOG.error("Error during stop OzoneManager.", e);
         }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to