This is an automated email from the ASF dual-hosted git repository.
hanishakoneru pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/ozone.git
The following commit(s) were added to refs/heads/master by this push:
new c2db377 HDDS-5872. Do not failover on some RpcExceptions (#2772)
c2db377 is described below
commit c2db3776773c9a80e44a4c1dbd58d38ff6a41f9f
Author: Hanisha Koneru <[email protected]>
AuthorDate: Fri Nov 12 11:29:23 2021 -0800
HDDS-5872. Do not failover on some RpcExceptions (#2772)
---
.../java/org/apache/hadoop/hdds/HddsUtils.java | 51 ++++++++++++++++++++++
.../org/apache/hadoop/hdds/scm/ha/SCMHAUtils.java | 31 ++++---------
.../main/java/org/apache/hadoop/ozone/OmUtils.java | 25 -----------
.../ozone/om/ha/OMFailoverProxyProvider.java | 11 ++++-
4 files changed, 68 insertions(+), 50 deletions(-)
diff --git
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsUtils.java
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsUtils.java
index af5d27a..cce8170 100644
--- a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsUtils.java
+++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/HddsUtils.java
@@ -18,6 +18,7 @@
package org.apache.hadoop.hdds;
+import com.google.protobuf.ServiceException;
import javax.management.ObjectName;
import java.io.File;
import java.io.IOException;
@@ -45,6 +46,11 @@ import
org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerC
import org.apache.hadoop.hdds.scm.ScmConfigKeys;
import org.apache.hadoop.hdds.scm.ha.SCMHAUtils;
import org.apache.hadoop.hdds.scm.ha.SCMNodeInfo;
+import org.apache.hadoop.ipc.RPC;
+import org.apache.hadoop.ipc.RemoteException;
+import org.apache.hadoop.ipc.RpcException;
+import org.apache.hadoop.ipc.RpcNoSuchMethodException;
+import org.apache.hadoop.ipc.RpcNoSuchProtocolException;
import org.apache.hadoop.metrics2.util.MBeans;
import org.apache.hadoop.net.DNS;
import org.apache.hadoop.net.NetUtils;
@@ -65,6 +71,8 @@ import static
org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_DATANODE_PORT_D
import static
org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_DATANODE_PORT_KEY;
import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_SCM_NAMES;
+import org.apache.hadoop.security.AccessControlException;
+import org.apache.hadoop.security.token.SecretManager;
import org.apache.ratis.util.SizeInBytes;
import org.apache.hadoop.ozone.conf.OzoneServiceConfig;
import org.slf4j.Logger;
@@ -622,4 +630,47 @@ public final class HddsUtils {
public static int roundupMb(long bytes) {
return (int)Math.ceil((double) bytes/(double) ONE_MB);
}
+
+ /**
+ * Unwrap exception to check if it is some kind of access control problem
+ * ({@link AccessControlException} or {@link SecretManager.InvalidToken})
+ * or a RpcException.
+ */
+ public static Throwable getUnwrappedException(Exception ex) {
+ if (ex instanceof ServiceException) {
+ Throwable t = ex.getCause();
+ if (t instanceof RemoteException) {
+ t = ((RemoteException) t).unwrapRemoteException();
+ }
+ while (t != null) {
+ if (t instanceof RpcException ||
+ t instanceof AccessControlException ||
+ t instanceof SecretManager.InvalidToken) {
+ return t;
+ }
+ t = t.getCause();
+ }
+ }
+ return null;
+ }
+
+ /**
+ * For some Rpc Exceptions, client should not failover.
+ */
+ public static boolean shouldNotFailoverOnRpcException(Throwable exception) {
+ if (exception instanceof RpcException) {
+ // Should not failover for following exceptions
+ if (exception instanceof RpcNoSuchMethodException ||
+ exception instanceof RpcNoSuchProtocolException ||
+ exception instanceof RPC.VersionMismatch) {
+ return true;
+ }
+ if (exception.getMessage().contains(
+ "RPC response exceeds maximum data length") ||
+ exception.getMessage().contains("RPC response has invalid length")) {
+ return true;
+ }
+ }
+ return false;
+ }
}
diff --git
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHAUtils.java
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHAUtils.java
index 8674217..60f5aa2 100644
---
a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHAUtils.java
+++
b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/scm/ha/SCMHAUtils.java
@@ -22,7 +22,7 @@ package org.apache.hadoop.hdds.scm.ha;
import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
import com.google.common.collect.ImmutableList;
-import com.google.protobuf.ServiceException;
+import org.apache.hadoop.hdds.HddsUtils;
import org.apache.hadoop.hdds.conf.ConfigurationException;
import org.apache.hadoop.hdds.conf.ConfigurationSource;
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
@@ -311,8 +311,13 @@ public final class SCMHAUtils {
public static RetryPolicy.RetryAction getRetryAction(int failovers, int
retry,
Exception e, int maxRetryCount, long retryInterval) {
- // For AccessControl Exception where Client is not authenticated.
- if (isAccessControlException(e)) {
+ Throwable unwrappedException = HddsUtils.getUnwrappedException(e);
+ if (unwrappedException instanceof AccessControlException) {
+ // For AccessControl Exception where Client is not authenticated.
+ return RetryPolicy.RetryAction.FAIL;
+ } else if (HddsUtils.shouldNotFailoverOnRpcException(unwrappedException)) {
+ // For some types of Rpc Exceptions, retrying on different server would
+ // not help.
return RetryPolicy.RetryAction.FAIL;
} else if (SCMHAUtils.checkRetriableWithNoFailoverException(e)) {
if (retry < maxRetryCount) {
@@ -335,24 +340,4 @@ public final class SCMHAUtils {
}
}
}
-
- /**
- * Unwrap exception to check if it is some kind of access control problem.
- * {@link AccessControlException}
- */
- public static boolean isAccessControlException(Exception ex) {
- if (ex instanceof ServiceException) {
- Throwable t = ex.getCause();
- if (t instanceof RemoteException) {
- t = ((RemoteException) t).unwrapRemoteException();
- }
- while (t != null) {
- if (t instanceof AccessControlException) {
- return true;
- }
- t = t.getCause();
- }
- }
- return false;
- }
}
diff --git
a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/OmUtils.java
b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/OmUtils.java
index eb36b76..4892f1b 100644
--- a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/OmUtils.java
+++ b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/OmUtils.java
@@ -19,7 +19,6 @@ package org.apache.hadoop.ozone;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
-import com.google.protobuf.ServiceException;
import java.io.File;
import java.io.IOException;
import java.net.InetSocketAddress;
@@ -43,7 +42,6 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdds.conf.ConfigurationSource;
import org.apache.hadoop.hdds.conf.OzoneConfiguration;
import org.apache.hadoop.hdds.scm.client.HddsClientUtils;
-import org.apache.hadoop.ipc.RemoteException;
import org.apache.hadoop.net.NetUtils;
import org.apache.hadoop.ozone.conf.OMClientConfig;
import org.apache.hadoop.ozone.ha.ConfUtils;
@@ -51,8 +49,6 @@ import org.apache.hadoop.ozone.om.exceptions.OMException;
import org.apache.hadoop.ozone.om.helpers.OmKeyInfo;
import org.apache.hadoop.ozone.om.helpers.RepeatedOmKeyInfo;
import org.apache.hadoop.ozone.protocol.proto.OzoneManagerProtocolProtos;
-import org.apache.hadoop.security.AccessControlException;
-import org.apache.hadoop.security.token.SecretManager;
import org.apache.commons.lang3.StringUtils;
@@ -620,27 +616,6 @@ public final class OmUtils {
}
/**
- * Unwrap exception to check if it is some kind of access control problem
- * ({@link AccessControlException} or {@link SecretManager.InvalidToken}).
- */
- public static boolean isAccessControlException(Exception ex) {
- if (ex instanceof ServiceException) {
- Throwable t = ex.getCause();
- if (t instanceof RemoteException) {
- t = ((RemoteException) t).unwrapRemoteException();
- }
- while (t != null) {
- if (t instanceof AccessControlException ||
- t instanceof SecretManager.InvalidToken) {
- return true;
- }
- t = t.getCause();
- }
- }
- return false;
- }
-
- /**
* Normalize the key name. This method used {@link Path} to
* normalize the key name.
* @param keyName
diff --git
a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/ha/OMFailoverProxyProvider.java
b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/ha/OMFailoverProxyProvider.java
index 219edc2..7c45593 100644
---
a/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/ha/OMFailoverProxyProvider.java
+++
b/hadoop-ozone/common/src/main/java/org/apache/hadoop/ozone/om/ha/OMFailoverProxyProvider.java
@@ -34,6 +34,7 @@ import java.util.Map;
import java.util.Set;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hdds.HddsUtils;
import org.apache.hadoop.hdds.conf.ConfigurationSource;
import org.apache.hadoop.hdds.utils.LegacyHadoopConfigurationSource;
import org.apache.hadoop.io.Text;
@@ -54,7 +55,9 @@ import org.apache.hadoop.ozone.ha.ConfUtils;
import org.apache.hadoop.ozone.om.exceptions.OMLeaderNotReadyException;
import org.apache.hadoop.ozone.om.exceptions.OMNotLeaderException;
import
org.apache.hadoop.ozone.om.protocolPB.OzoneManagerProtocolClientSideTranslatorPB;
+import org.apache.hadoop.security.AccessControlException;
import org.apache.hadoop.security.UserGroupInformation;
+import org.apache.hadoop.security.token.SecretManager;
import org.apache.ratis.protocol.exceptions.StateMachineException;
import org.slf4j.Logger;
@@ -451,8 +454,10 @@ public class OMFailoverProxyProvider<T> implements
return waitBetweenRetries;
}
- public synchronized boolean shouldFailover(Exception ex) {
- if (OmUtils.isAccessControlException(ex)) {
+ private synchronized boolean shouldFailover(Exception ex) {
+ Throwable unwrappedException = HddsUtils.getUnwrappedException(ex);
+ if (unwrappedException instanceof AccessControlException ||
+ unwrappedException instanceof SecretManager.InvalidToken) {
// Retry all available OMs once before failing with
// AccessControlException.
if (accessControlExceptionOMs.contains(currentProxyOMNodeId)) {
@@ -464,6 +469,8 @@ public class OMFailoverProxyProvider<T> implements
return false;
}
}
+ } else if (HddsUtils.shouldNotFailoverOnRpcException(unwrappedException)) {
+ return false;
} else if (ex instanceof StateMachineException) {
StateMachineException smEx = (StateMachineException) ex;
Throwable cause = smEx.getCause();
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]