This is an automated email from the ASF dual-hosted git repository.
bharat pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hadoop-ozone.git
The following commit(s) were added to refs/heads/master by this push:
new 36cdf5a HDDS-3467. OM Client RPC failover retries happening more than
configured. (#853)
36cdf5a is described below
commit 36cdf5ac2271011b2b3208975d4c55cf34691bde
Author: Uma Maheswara Rao G <[email protected]>
AuthorDate: Tue Apr 21 19:16:43 2020 -0700
HDDS-3467. OM Client RPC failover retries happening more than configured.
(#853)
---
.../OzoneManagerProtocolClientSideTranslatorPB.java | 2 +-
.../org/apache/hadoop/ozone/MiniOzoneHAClusterImpl.java | 1 +
.../org/apache/hadoop/ozone/om/TestOzoneManagerHA.java | 15 +++------------
3 files changed, 5 insertions(+), 13 deletions(-)
diff --git
a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocolPB/OzoneManagerProtocolClientSideTranslatorPB.java
b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocolPB/OzoneManagerProtocolClientSideTranslatorPB.java
index 674f672..ec355e9 100644
---
a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocolPB/OzoneManagerProtocolClientSideTranslatorPB.java
+++
b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/protocolPB/OzoneManagerProtocolClientSideTranslatorPB.java
@@ -284,7 +284,7 @@ public final class
OzoneManagerProtocolClientSideTranslatorPB
private RetryAction getRetryAction(RetryDecision fallbackAction,
int failovers) {
- if (failovers <= maxFailovers) {
+ if (failovers < maxFailovers) {
return new RetryAction(fallbackAction,
omFailoverProxyProvider.getWaitTime());
} else {
diff --git
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneHAClusterImpl.java
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneHAClusterImpl.java
index 939e669..ac565f6 100644
---
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneHAClusterImpl.java
+++
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneHAClusterImpl.java
@@ -244,6 +244,7 @@ public class MiniOzoneHAClusterImpl extends
MiniOzoneClusterImpl {
public void stopOzoneManager(int index) {
ozoneManagers.get(index).stop();
+ ozoneManagers.get(index).join();
}
public void stopOzoneManager(String omNodeId) {
diff --git
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerHA.java
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerHA.java
index 43971cc..fe25841 100644
---
a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerHA.java
+++
b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/om/TestOzoneManagerHA.java
@@ -282,6 +282,7 @@ public class TestOzoneManagerHA {
/**
* Test client request fails when 2 OMs are down.
*/
+ @Ignore("This test is failing randomly. It will be enabled after fixing it.")
@Test
public void testTwoOMNodesDown() throws Exception {
cluster.stopOzoneManager(1);
@@ -738,7 +739,6 @@ public class TestOzoneManagerHA {
Assert.assertEquals(leaderOMNodeId, newLeaderOMNodeId);
}
- @Ignore("This test randomly failing. Let's enable once its fixed.")
@Test
public void testOMRetryProxy() throws Exception {
// Stop all the OMs.
@@ -756,19 +756,10 @@ public class TestOzoneManagerHA {
// the RpcClient should give up.
fail("TestOMRetryProxy should fail when there are no OMs running");
} catch (ConnectException e) {
- // Each retry attempt tries IPC_CLIENT_CONNECT_MAX_RETRIES times.
- // So there should be at least
- // OZONE_CLIENT_FAILOVER_MAX_ATTEMPTS * IPC_CLIENT_CONNECT_MAX_RETRIES
- // "Retrying connect to server" messages.
- // Also, the first call will result in EOFException.
- // That will result in another IPC_CLIENT_CONNECT_MAX_RETRIES attempts.
- Assert.assertEquals(
- (OZONE_CLIENT_FAILOVER_MAX_ATTEMPTS + 1) *
- IPC_CLIENT_CONNECT_MAX_RETRIES,
- appender.countLinesWithMessage("Retrying connect to server:"));
-
Assert.assertEquals(1,
appender.countLinesWithMessage("Failed to connect to OMs:"));
+ Assert.assertEquals(OZONE_CLIENT_FAILOVER_MAX_ATTEMPTS,
+ appender.countLinesWithMessage("Trying to failover"));
Assert.assertEquals(1,
appender.countLinesWithMessage("Attempted " +
OZONE_CLIENT_FAILOVER_MAX_ATTEMPTS + " failovers."));
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]