Author: jing9
Date: Thu Apr 24 23:05:25 2014
New Revision: 1589906
URL: http://svn.apache.org/r1589906
Log:
HADOOP-10535. Merge r1589905 from trunk.
Modified:
hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/CHANGES.txt
hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java
hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ActiveStandbyElector.java
hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ZKFailoverController.java
hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestActiveStandbyElector.java
hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestActiveStandbyElectorRealZK.java
hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestZKFailoverControllerStress.java
Modified:
hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/CHANGES.txt
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/CHANGES.txt?rev=1589906&r1=1589905&r2=1589906&view=diff
==============================================================================
---
hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/CHANGES.txt
(original)
+++
hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/CHANGES.txt
Thu Apr 24 23:05:25 2014
@@ -27,6 +27,9 @@ Release 2.5.0 - UNRELEASED
HADOOP-10503. Move junit up to v 4.11. (cnauroth)
+ HADOOP-10535. Make the retry numbers in ActiveStandbyElector configurable.
+ (jing9)
+
OPTIMIZATIONS
BUG FIXES
Modified:
hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java?rev=1589906&r1=1589905&r2=1589906&view=diff
==============================================================================
---
hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java
(original)
+++
hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java
Thu Apr 24 23:05:25 2014
@@ -199,6 +199,11 @@ public class CommonConfigurationKeys ext
"ha.failover-controller.graceful-fence.connection.retries";
public static final int HA_FC_GRACEFUL_FENCE_CONNECTION_RETRIES_DEFAULT = 1;
+ /** number of zookeeper operation retry times in ActiveStandbyElector */
+ public static final String HA_FC_ELECTOR_ZK_OP_RETRIES_KEY =
+ "ha.failover-controller.active-standby-elector.zk.op.retries";
+ public static final int HA_FC_ELECTOR_ZK_OP_RETRIES_DEFAULT = 3;
+
/* Timeout that the CLI (manual) FC waits for monitorHealth, getServiceState
*/
public static final String HA_FC_CLI_CHECK_TIMEOUT_KEY =
"ha.failover-controller.cli-check.rpc-timeout.ms";
Modified:
hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ActiveStandbyElector.java
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ActiveStandbyElector.java?rev=1589906&r1=1589905&r2=1589906&view=diff
==============================================================================
---
hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ActiveStandbyElector.java
(original)
+++
hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ActiveStandbyElector.java
Thu Apr 24 23:05:25 2014
@@ -143,7 +143,6 @@ public class ActiveStandbyElector implem
public static final Log LOG = LogFactory.getLog(ActiveStandbyElector.class);
- static int NUM_RETRIES = 3;
private static final int SLEEP_AFTER_FAILURE_TO_BECOME_ACTIVE = 1000;
private static enum ConnectionState {
@@ -170,6 +169,7 @@ public class ActiveStandbyElector implem
private final String zkLockFilePath;
private final String zkBreadCrumbPath;
private final String znodeWorkingDir;
+ private final int maxRetryNum;
private Lock sessionReestablishLockForTests = new ReentrantLock();
private boolean wantToBeInElection;
@@ -207,7 +207,7 @@ public class ActiveStandbyElector implem
public ActiveStandbyElector(String zookeeperHostPorts,
int zookeeperSessionTimeout, String parentZnodeName, List<ACL> acl,
List<ZKAuthInfo> authInfo,
- ActiveStandbyElectorCallback app) throws IOException,
+ ActiveStandbyElectorCallback app, int maxRetryNum) throws IOException,
HadoopIllegalArgumentException, KeeperException {
if (app == null || acl == null || parentZnodeName == null
|| zookeeperHostPorts == null || zookeeperSessionTimeout <= 0) {
@@ -220,7 +220,8 @@ public class ActiveStandbyElector implem
appClient = app;
znodeWorkingDir = parentZnodeName;
zkLockFilePath = znodeWorkingDir + "/" + LOCK_FILENAME;
- zkBreadCrumbPath = znodeWorkingDir + "/" + BREADCRUMB_FILENAME;
+ zkBreadCrumbPath = znodeWorkingDir + "/" + BREADCRUMB_FILENAME;
+ this.maxRetryNum = maxRetryNum;
// createConnection for future API calls
createConnection();
@@ -439,7 +440,7 @@ public class ActiveStandbyElector implem
LOG.debug(errorMessage);
if (shouldRetry(code)) {
- if (createRetryCount < NUM_RETRIES) {
+ if (createRetryCount < maxRetryNum) {
LOG.debug("Retrying createNode createRetryCount: " + createRetryCount);
++createRetryCount;
createLockNodeAsync();
@@ -500,7 +501,7 @@ public class ActiveStandbyElector implem
LOG.debug(errorMessage);
if (shouldRetry(code)) {
- if (statRetryCount < NUM_RETRIES) {
+ if (statRetryCount < maxRetryNum) {
++statRetryCount;
monitorLockNodeAsync();
return;
@@ -735,7 +736,7 @@ public class ActiveStandbyElector implem
private boolean reEstablishSession() {
int connectionRetryCount = 0;
boolean success = false;
- while(!success && connectionRetryCount < NUM_RETRIES) {
+ while(!success && connectionRetryCount < maxRetryNum) {
LOG.debug("Establishing zookeeper connection for " + this);
try {
createConnection();
@@ -972,14 +973,14 @@ public class ActiveStandbyElector implem
});
}
- private static <T> T zkDoWithRetries(ZKAction<T> action)
- throws KeeperException, InterruptedException {
+ private <T> T zkDoWithRetries(ZKAction<T> action) throws KeeperException,
+ InterruptedException {
int retry = 0;
while (true) {
try {
return action.run();
} catch (KeeperException ke) {
- if (shouldRetry(ke.code()) && ++retry < NUM_RETRIES) {
+ if (shouldRetry(ke.code()) && ++retry < maxRetryNum) {
continue;
}
throw ke;
Modified:
hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ZKFailoverController.java
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ZKFailoverController.java?rev=1589906&r1=1589905&r2=1589906&view=diff
==============================================================================
---
hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ZKFailoverController.java
(original)
+++
hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ZKFailoverController.java
Thu Apr 24 23:05:25 2014
@@ -32,6 +32,7 @@ import org.apache.commons.logging.LogFac
import org.apache.hadoop.HadoopIllegalArgumentException;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.CommonConfigurationKeys;
import org.apache.hadoop.ha.ActiveStandbyElector.ActiveNotFoundException;
import org.apache.hadoop.ha.ActiveStandbyElector.ActiveStandbyElectorCallback;
import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState;
@@ -341,10 +342,12 @@ public abstract class ZKFailoverControll
Preconditions.checkArgument(zkTimeout > 0,
"Invalid ZK session timeout %s", zkTimeout);
-
+ int maxRetryNum = conf.getInt(
+ CommonConfigurationKeys.HA_FC_ELECTOR_ZK_OP_RETRIES_KEY,
+ CommonConfigurationKeys.HA_FC_ELECTOR_ZK_OP_RETRIES_DEFAULT);
elector = new ActiveStandbyElector(zkQuorum,
zkTimeout, getParentZnode(), zkAcls, zkAuths,
- new ElectorCallbacks());
+ new ElectorCallbacks(), maxRetryNum);
}
private String getParentZnode() {
Modified:
hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestActiveStandbyElector.java
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestActiveStandbyElector.java?rev=1589906&r1=1589905&r2=1589906&view=diff
==============================================================================
---
hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestActiveStandbyElector.java
(original)
+++
hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestActiveStandbyElector.java
Thu Apr 24 23:05:25 2014
@@ -39,6 +39,7 @@ import org.junit.Assert;
import org.mockito.Mockito;
import org.apache.hadoop.HadoopIllegalArgumentException;
+import org.apache.hadoop.fs.CommonConfigurationKeys;
import org.apache.hadoop.ha.ActiveStandbyElector.ActiveStandbyElectorCallback;
import org.apache.hadoop.ha.ActiveStandbyElector.ActiveNotFoundException;
import org.apache.hadoop.util.ZKUtil.ZKAuthInfo;
@@ -59,8 +60,9 @@ public class TestActiveStandbyElector {
ActiveStandbyElectorTester(String hostPort, int timeout, String parent,
List<ACL> acl, ActiveStandbyElectorCallback app) throws IOException,
KeeperException {
- super(hostPort, timeout, parent, acl,
- Collections.<ZKAuthInfo>emptyList(), app);
+ super(hostPort, timeout, parent, acl, Collections
+ .<ZKAuthInfo> emptyList(), app,
+ CommonConfigurationKeys.HA_FC_ELECTOR_ZK_OP_RETRIES_DEFAULT);
}
@Override
@@ -715,7 +717,8 @@ public class TestActiveStandbyElector {
public void testWithoutZKServer() throws Exception {
try {
new ActiveStandbyElector("127.0.0.1", 2000, ZK_PARENT_NAME,
- Ids.OPEN_ACL_UNSAFE, Collections.<ZKAuthInfo> emptyList(), mockApp);
+ Ids.OPEN_ACL_UNSAFE, Collections.<ZKAuthInfo> emptyList(), mockApp,
+ CommonConfigurationKeys.HA_FC_ELECTOR_ZK_OP_RETRIES_DEFAULT);
Assert.fail("Did not throw zookeeper connection loss exceptions!");
} catch (KeeperException ke) {
GenericTestUtils.assertExceptionContains( "ConnectionLoss", ke);
Modified:
hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestActiveStandbyElectorRealZK.java
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestActiveStandbyElectorRealZK.java?rev=1589906&r1=1589905&r2=1589906&view=diff
==============================================================================
---
hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestActiveStandbyElectorRealZK.java
(original)
+++
hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestActiveStandbyElectorRealZK.java
Thu Apr 24 23:05:25 2014
@@ -26,6 +26,7 @@ import java.util.Collections;
import java.util.UUID;
import org.apache.commons.logging.impl.Log4JLogger;
+import org.apache.hadoop.fs.CommonConfigurationKeys;
import org.apache.hadoop.ha.ActiveStandbyElector.ActiveStandbyElectorCallback;
import org.apache.hadoop.ha.ActiveStandbyElector.State;
import org.apache.hadoop.util.ZKUtil.ZKAuthInfo;
@@ -70,9 +71,9 @@ public class TestActiveStandbyElectorRea
for (int i = 0; i < NUM_ELECTORS; i++) {
cbs[i] = Mockito.mock(ActiveStandbyElectorCallback.class);
appDatas[i] = Ints.toByteArray(i);
- electors[i] = new ActiveStandbyElector(
- hostPort, 5000, PARENT_DIR, Ids.OPEN_ACL_UNSAFE,
- Collections.<ZKAuthInfo>emptyList(), cbs[i]);
+ electors[i] = new ActiveStandbyElector(hostPort, 5000, PARENT_DIR,
+ Ids.OPEN_ACL_UNSAFE, Collections.<ZKAuthInfo> emptyList(), cbs[i],
+ CommonConfigurationKeys.HA_FC_ELECTOR_ZK_OP_RETRIES_DEFAULT);
}
}
Modified:
hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestZKFailoverControllerStress.java
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestZKFailoverControllerStress.java?rev=1589906&r1=1589905&r2=1589906&view=diff
==============================================================================
---
hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestZKFailoverControllerStress.java
(original)
+++
hadoop/common/branches/branch-2/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestZKFailoverControllerStress.java
Thu Apr 24 23:05:25 2014
@@ -23,6 +23,7 @@ import java.util.Random;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.CommonConfigurationKeys;
import org.apache.hadoop.util.Shell;
import org.apache.hadoop.util.Time;
import org.junit.After;
@@ -126,8 +127,7 @@ public class TestZKFailoverControllerStr
.when(cluster.getService(0).proxy).monitorHealth();
Mockito.doAnswer(new RandomlyThrow(1))
.when(cluster.getService(1).proxy).monitorHealth();
- ActiveStandbyElector.NUM_RETRIES = 100;
-
+ conf.setInt(CommonConfigurationKeys.HA_FC_ELECTOR_ZK_OP_RETRIES_KEY, 100);
// Don't start until after the above mocking. Otherwise we can get
// Mockito errors if the HM calls the proxy in the middle of
// setting up the mock.