This is an automated email from the ASF dual-hosted git repository.

bharat pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hadoop-ozone.git


The following commit(s) were added to refs/heads/master by this push:
     new 36cdf5a  HDDS-3467. OM Client RPC failover retries happening more than 
configured. (#853)
36cdf5a is described below

commit 36cdf5ac2271011b2b3208975d4c55cf34691bde
Author: Uma Maheswara Rao G <[email protected]>
AuthorDate: Tue Apr 21 19:16:43 2020 -0700

    HDDS-3467. OM Client RPC failover retries happening more than configured. 
(#853)
---
 .../OzoneManagerProtocolClientSideTranslatorPB.java       |  2 +-
 .../org/apache/hadoop/ozone/MiniOzoneHAClusterImpl.java   |  1 +
 .../org/apache/hadoop/ozone/om/TestOzoneManagerHA.java    | 15 +++------------
 3 files changed, 5 insertions(+), 13 deletions(-)

diff --git 
a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocolPB/OzoneManagerProtocolClientSideTranslatorPB.java
 
b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocolPB/OzoneManagerProtocolClientSideTranslatorPB.java
index 674f672..ec355e9 100644
--- 
a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocolPB/OzoneManagerProtocolClientSideTranslatorPB.java
+++ 
b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocolPB/OzoneManagerProtocolClientSideTranslatorPB.java
@@ -284,7 +284,7 @@ public final class 
OzoneManagerProtocolClientSideTranslatorPB
 
       private RetryAction getRetryAction(RetryDecision fallbackAction,
           int failovers) {
-        if (failovers <= maxFailovers) {
+        if (failovers < maxFailovers) {
           return new RetryAction(fallbackAction,
               omFailoverProxyProvider.getWaitTime());
         } else {
diff --git 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneHAClusterImpl.java
 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneHAClusterImpl.java
index 939e669..ac565f6 100644
--- 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneHAClusterImpl.java
+++ 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneHAClusterImpl.java
@@ -244,6 +244,7 @@ public class MiniOzoneHAClusterImpl extends 
MiniOzoneClusterImpl {
 
   public void stopOzoneManager(int index) {
     ozoneManagers.get(index).stop();
+    ozoneManagers.get(index).join();
   }
 
   public void stopOzoneManager(String omNodeId) {
diff --git 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerHA.java
 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerHA.java
index 43971cc..fe25841 100644
--- 
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerHA.java
+++ 
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerHA.java
@@ -282,6 +282,7 @@ public class TestOzoneManagerHA {
   /**
    * Test client request fails when 2 OMs are down.
    */
+  @Ignore("This test is failing randomly. It will be enabled after fixing it.")
   @Test
   public void testTwoOMNodesDown() throws Exception {
     cluster.stopOzoneManager(1);
@@ -738,7 +739,6 @@ public class TestOzoneManagerHA {
     Assert.assertEquals(leaderOMNodeId, newLeaderOMNodeId);
   }
 
-  @Ignore("This test randomly failing. Let's enable once its fixed.")
   @Test
   public void testOMRetryProxy() throws Exception {
     // Stop all the OMs.
@@ -756,19 +756,10 @@ public class TestOzoneManagerHA {
       // the RpcClient should give up.
       fail("TestOMRetryProxy should fail when there are no OMs running");
     } catch (ConnectException e) {
-      // Each retry attempt tries IPC_CLIENT_CONNECT_MAX_RETRIES times.
-      // So there should be at least
-      // OZONE_CLIENT_FAILOVER_MAX_ATTEMPTS * IPC_CLIENT_CONNECT_MAX_RETRIES
-      // "Retrying connect to server" messages.
-      // Also, the first call will result in EOFException.
-      // That will result in another IPC_CLIENT_CONNECT_MAX_RETRIES attempts.
-      Assert.assertEquals(
-          (OZONE_CLIENT_FAILOVER_MAX_ATTEMPTS + 1) *
-              IPC_CLIENT_CONNECT_MAX_RETRIES,
-          appender.countLinesWithMessage("Retrying connect to server:"));
-
       Assert.assertEquals(1,
           appender.countLinesWithMessage("Failed to connect to OMs:"));
+      Assert.assertEquals(OZONE_CLIENT_FAILOVER_MAX_ATTEMPTS,
+          appender.countLinesWithMessage("Trying to failover"));
       Assert.assertEquals(1,
           appender.countLinesWithMessage("Attempted " +
               OZONE_CLIENT_FAILOVER_MAX_ATTEMPTS + " failovers."));


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to