Author: todd Date: Wed Feb 5 04:18:11 2014 New Revision: 1564627 URL: http://svn.apache.org/r1564627 Log: HDFS-5399. Revisit SafeModeException and corresponding retry policies. Contributed by Haohui Mai.
Modified: hadoop/common/branches/branch-2.3/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt hadoop/common/branches/branch-2.3/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java hadoop/common/branches/branch-2.3/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java hadoop/common/branches/branch-2.3/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/NameNodeProxies.java hadoop/common/branches/branch-2.3/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java hadoop/common/branches/branch-2.3/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java hadoop/common/branches/branch-2.3/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java Modified: hadoop/common/branches/branch-2.3/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.3/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt?rev=1564627&r1=1564626&r2=1564627&view=diff ============================================================================== --- hadoop/common/branches/branch-2.3/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt (original) +++ hadoop/common/branches/branch-2.3/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt Wed Feb 5 04:18:11 2014 @@ -529,6 +529,9 @@ Release 2.3.0 - UNRELEASED HDFS-5842. Cannot create hftp filesystem when using a proxy user ugi and a doAs on a secure cluster. (jing9) + HDFS-5399. Revisit SafeModeException and corresponding retry policies. + (Haohui Mai via todd) + BREAKDOWN OF HDFS-2832 SUBTASKS AND RELATED JIRAS HDFS-4985. Add storage type to the protocol and expose it in block report Modified: hadoop/common/branches/branch-2.3/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.3/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java?rev=1564627&r1=1564626&r2=1564627&view=diff ============================================================================== --- hadoop/common/branches/branch-2.3/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java (original) +++ hadoop/common/branches/branch-2.3/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java Wed Feb 5 04:18:11 2014 @@ -36,6 +36,8 @@ import static org.apache.hadoop.hdfs.DFS import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_FAILOVER_SLEEPTIME_BASE_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_FAILOVER_SLEEPTIME_MAX_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_FAILOVER_SLEEPTIME_MAX_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_RETRY_MAX_ATTEMPTS_DEFAULT; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_RETRY_MAX_ATTEMPTS_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_MAX_BLOCK_ACQUIRE_FAILURES_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_MAX_BLOCK_ACQUIRE_FAILURES_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_READ_PREFETCH_SIZE_KEY; @@ -260,6 +262,7 @@ public class DFSClient implements java.i public static class Conf { final int hdfsTimeout; // timeout value for a DFS operation. final int maxFailoverAttempts; + final int maxRetryAttempts; final int failoverSleepBaseMillis; final int failoverSleepMaxMillis; final int maxBlockAcquireFailures; @@ -305,6 +308,9 @@ public class DFSClient implements java.i maxFailoverAttempts = conf.getInt( DFS_CLIENT_FAILOVER_MAX_ATTEMPTS_KEY, DFS_CLIENT_FAILOVER_MAX_ATTEMPTS_DEFAULT); + maxRetryAttempts = conf.getInt( + DFS_CLIENT_RETRY_MAX_ATTEMPTS_KEY, + DFS_CLIENT_RETRY_MAX_ATTEMPTS_DEFAULT); failoverSleepBaseMillis = conf.getInt( DFS_CLIENT_FAILOVER_SLEEPTIME_BASE_KEY, DFS_CLIENT_FAILOVER_SLEEPTIME_BASE_DEFAULT); Modified: hadoop/common/branches/branch-2.3/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.3/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java?rev=1564627&r1=1564626&r2=1564627&view=diff ============================================================================== --- hadoop/common/branches/branch-2.3/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java (original) +++ hadoop/common/branches/branch-2.3/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java Wed Feb 5 04:18:11 2014 @@ -82,6 +82,8 @@ public class DFSConfigKeys extends Commo public static final int DFS_CLIENT_FAILOVER_CONNECTION_RETRIES_DEFAULT = 0; public static final String DFS_CLIENT_FAILOVER_CONNECTION_RETRIES_ON_SOCKET_TIMEOUTS_KEY = "dfs.client.failover.connection.retries.on.timeouts"; public static final int DFS_CLIENT_FAILOVER_CONNECTION_RETRIES_ON_SOCKET_TIMEOUTS_DEFAULT = 0; + public static final String DFS_CLIENT_RETRY_MAX_ATTEMPTS_KEY = "dfs.client.retry.max.attempts"; + public static final int DFS_CLIENT_RETRY_MAX_ATTEMPTS_DEFAULT = 10; public static final String DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_KEY = "dfs.client.socketcache.expiryMsec"; public static final long DFS_CLIENT_SOCKET_CACHE_EXPIRY_MSEC_DEFAULT = 2 * 60 * 1000; @@ -572,6 +574,8 @@ public class DFSConfigKeys extends Commo public static final String DFS_HTTP_CLIENT_RETRY_POLICY_SPEC_DEFAULT = "10000,6,60000,10"; //t1,n1,t2,n2,... public static final String DFS_HTTP_CLIENT_FAILOVER_MAX_ATTEMPTS_KEY = "dfs.http.client.failover.max.attempts"; public static final int DFS_HTTP_CLIENT_FAILOVER_MAX_ATTEMPTS_DEFAULT = 15; + public static final String DFS_HTTP_CLIENT_RETRY_MAX_ATTEMPTS_KEY = "dfs.http.client.retry.max.attempts"; + public static final int DFS_HTTP_CLIENT_RETRY_MAX_ATTEMPTS_DEFAULT = 10; public static final String DFS_HTTP_CLIENT_FAILOVER_SLEEPTIME_BASE_KEY = "dfs.http.client.failover.sleep.base.millis"; public static final int DFS_HTTP_CLIENT_FAILOVER_SLEEPTIME_BASE_DEFAULT = 500; public static final String DFS_HTTP_CLIENT_FAILOVER_SLEEPTIME_MAX_KEY = "dfs.http.client.failover.sleep.max.millis"; Modified: hadoop/common/branches/branch-2.3/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/NameNodeProxies.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.3/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/NameNodeProxies.java?rev=1564627&r1=1564626&r2=1564627&view=diff ============================================================================== --- hadoop/common/branches/branch-2.3/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/NameNodeProxies.java (original) +++ hadoop/common/branches/branch-2.3/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/NameNodeProxies.java Wed Feb 5 04:18:11 2014 @@ -24,6 +24,8 @@ import static org.apache.hadoop.hdfs.DFS import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_FAILOVER_SLEEPTIME_BASE_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_FAILOVER_SLEEPTIME_MAX_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_FAILOVER_SLEEPTIME_MAX_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_RETRY_MAX_ATTEMPTS_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_CLIENT_RETRY_MAX_ATTEMPTS_DEFAULT; import java.io.IOException; import java.lang.reflect.Constructor; @@ -144,9 +146,10 @@ public class NameNodeProxies { .createFailoverProxyProvider(conf, failoverProxyProviderClass, xface, nameNodeUri); Conf config = new Conf(conf); - T proxy = (T) RetryProxy.create(xface, failoverProxyProvider, RetryPolicies - .failoverOnNetworkException(RetryPolicies.TRY_ONCE_THEN_FAIL, - config.maxFailoverAttempts, config.failoverSleepBaseMillis, + T proxy = (T) RetryProxy.create(xface, failoverProxyProvider, + RetryPolicies.failoverOnNetworkException( + RetryPolicies.TRY_ONCE_THEN_FAIL, config.maxFailoverAttempts, + config.maxRetryAttempts, config.failoverSleepBaseMillis, config.failoverSleepMaxMillis)); Text dtService = HAUtil.buildTokenServiceForLogicalUri(nameNodeUri); @@ -192,11 +195,14 @@ public class NameNodeProxies { int maxFailoverAttempts = config.getInt( DFS_CLIENT_FAILOVER_MAX_ATTEMPTS_KEY, DFS_CLIENT_FAILOVER_MAX_ATTEMPTS_DEFAULT); + int maxRetryAttempts = config.getInt( + DFS_CLIENT_RETRY_MAX_ATTEMPTS_KEY, + DFS_CLIENT_RETRY_MAX_ATTEMPTS_DEFAULT); InvocationHandler dummyHandler = new LossyRetryInvocationHandler<T>( numResponseToDrop, failoverProxyProvider, RetryPolicies.failoverOnNetworkException( - RetryPolicies.TRY_ONCE_THEN_FAIL, - Math.max(numResponseToDrop + 1, maxFailoverAttempts), delay, + RetryPolicies.TRY_ONCE_THEN_FAIL, maxFailoverAttempts, + Math.max(numResponseToDrop + 1, maxRetryAttempts), delay, maxCap)); T proxy = (T) Proxy.newProxyInstance( Modified: hadoop/common/branches/branch-2.3/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.3/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java?rev=1564627&r1=1564626&r2=1564627&view=diff ============================================================================== --- hadoop/common/branches/branch-2.3/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java (original) +++ hadoop/common/branches/branch-2.3/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java Wed Feb 5 04:18:11 2014 @@ -1132,7 +1132,8 @@ public class FSNamesystem implements Nam if (isInSafeMode()) { SafeModeException se = new SafeModeException(errorMsg, safeMode); if (haEnabled && haContext != null - && haContext.getState().getServiceState() == HAServiceState.ACTIVE) { + && haContext.getState().getServiceState() == HAServiceState.ACTIVE + && shouldRetrySafeMode(this.safeMode)) { throw new RetriableException(se); } else { throw se; @@ -1140,6 +1141,18 @@ public class FSNamesystem implements Nam } } + /** + * We already know that the safemode is on. We will throw a RetriableException + * if the safemode is not manual or caused by low resource. + */ + private boolean shouldRetrySafeMode(SafeModeInfo safeMode) { + if (safeMode == null) { + return false; + } else { + return !safeMode.isManual() && !safeMode.areResourcesLow(); + } + } + public static Collection<URI> getNamespaceDirs(Configuration conf) { return getStorageDirs(conf, DFS_NAMENODE_NAME_DIR_KEY); } Modified: hadoop/common/branches/branch-2.3/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.3/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java?rev=1564627&r1=1564626&r2=1564627&view=diff ============================================================================== --- hadoop/common/branches/branch-2.3/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java (original) +++ hadoop/common/branches/branch-2.3/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/WebHdfsFileSystem.java Wed Feb 5 04:18:11 2014 @@ -188,6 +188,9 @@ public class WebHdfsFileSystem extends F int maxFailoverAttempts = conf.getInt( DFSConfigKeys.DFS_HTTP_CLIENT_FAILOVER_MAX_ATTEMPTS_KEY, DFSConfigKeys.DFS_HTTP_CLIENT_FAILOVER_MAX_ATTEMPTS_DEFAULT); + int maxRetryAttempts = conf.getInt( + DFSConfigKeys.DFS_HTTP_CLIENT_RETRY_MAX_ATTEMPTS_KEY, + DFSConfigKeys.DFS_HTTP_CLIENT_RETRY_MAX_ATTEMPTS_DEFAULT); int failoverSleepBaseMillis = conf.getInt( DFSConfigKeys.DFS_HTTP_CLIENT_FAILOVER_SLEEPTIME_BASE_KEY, DFSConfigKeys.DFS_HTTP_CLIENT_FAILOVER_SLEEPTIME_BASE_DEFAULT); @@ -197,7 +200,7 @@ public class WebHdfsFileSystem extends F this.retryPolicy = RetryPolicies .failoverOnNetworkException(RetryPolicies.TRY_ONCE_THEN_FAIL, - maxFailoverAttempts, failoverSleepBaseMillis, + maxFailoverAttempts, maxRetryAttempts, failoverSleepBaseMillis, failoverSleepMaxMillis); } Modified: hadoop/common/branches/branch-2.3/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/branch-2.3/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java?rev=1564627&r1=1564626&r2=1564627&view=diff ============================================================================== --- hadoop/common/branches/branch-2.3/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java (original) +++ hadoop/common/branches/branch-2.3/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestHASafeMode.java Wed Feb 5 04:18:11 2014 @@ -55,6 +55,7 @@ import org.apache.hadoop.hdfs.protocol.H import org.apache.hadoop.hdfs.server.blockmanagement.BlockManagerTestUtil; import org.apache.hadoop.hdfs.server.namenode.FSImage; import org.apache.hadoop.hdfs.server.namenode.FSNamesystem; +import org.apache.hadoop.hdfs.server.namenode.FSNamesystem.SafeModeInfo; import org.apache.hadoop.hdfs.server.namenode.NameNode; import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter; import org.apache.hadoop.io.IOUtils; @@ -65,6 +66,7 @@ import org.apache.log4j.Level; import org.junit.After; import org.junit.Before; import org.junit.Test; +import org.mockito.internal.util.reflection.Whitebox; import com.google.common.base.Supplier; import com.google.common.collect.Lists; @@ -124,6 +126,9 @@ public class TestHASafeMode { final Path test = new Path("/test"); // let nn0 enter safemode NameNodeAdapter.enterSafeMode(nn0, false); + SafeModeInfo safeMode = (SafeModeInfo) Whitebox.getInternalState( + nn0.getNamesystem(), "safeMode"); + Whitebox.setInternalState(safeMode, "extension", Integer.valueOf(30000)); LOG.info("enter safemode"); new Thread() { @Override