Author: todd
Date: Tue Apr 24 19:34:51 2012
New Revision: 1329960
URL: http://svn.apache.org/viewvc?rev=1329960&view=rev
Log:
HADOOP-8306. ZKFC: improve error message when ZK is not running. Contributed by
Todd Lipcon.
Modified:
hadoop/common/branches/HDFS-3042/hadoop-common-project/hadoop-common/CHANGES.HDFS-3042.txt
hadoop/common/branches/HDFS-3042/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ZKFailoverController.java
hadoop/common/branches/HDFS-3042/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestZKFailoverController.java
Modified:
hadoop/common/branches/HDFS-3042/hadoop-common-project/hadoop-common/CHANGES.HDFS-3042.txt
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-3042/hadoop-common-project/hadoop-common/CHANGES.HDFS-3042.txt?rev=1329960&r1=1329959&r2=1329960&view=diff
==============================================================================
---
hadoop/common/branches/HDFS-3042/hadoop-common-project/hadoop-common/CHANGES.HDFS-3042.txt
(original)
+++
hadoop/common/branches/HDFS-3042/hadoop-common-project/hadoop-common/CHANGES.HDFS-3042.txt
Tue Apr 24 19:34:51 2012
@@ -19,3 +19,5 @@ HADOOP-8260. Replace ClientBaseWithFixes
HADOOP-8246. Auto-HA: automatically scope znode by nameservice ID (todd)
HADOOP-8247. Add a config to enable auto-HA, which disables manual
FailoverController (todd)
+
+HADOOP-8306. ZKFC: improve error message when ZK is not running. (todd)
Modified:
hadoop/common/branches/HDFS-3042/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ZKFailoverController.java
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-3042/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ZKFailoverController.java?rev=1329960&r1=1329959&r2=1329960&view=diff
==============================================================================
---
hadoop/common/branches/HDFS-3042/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ZKFailoverController.java
(original)
+++
hadoop/common/branches/HDFS-3042/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ZKFailoverController.java
Tue Apr 24 19:34:51 2012
@@ -34,6 +34,7 @@ import org.apache.hadoop.ha.HAZKUtil.ZKA
import org.apache.hadoop.ha.HealthMonitor.State;
import org.apache.hadoop.security.SecurityUtil;
import org.apache.hadoop.util.Tool;
+import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.ZooDefs.Ids;
import org.apache.hadoop.util.ToolRunner;
import org.apache.zookeeper.data.ACL;
@@ -76,8 +77,11 @@ public abstract class ZKFailoverControll
static final int ERR_CODE_NO_FENCER = 4;
/** Automatic failover is not enabled */
static final int ERR_CODE_AUTO_FAILOVER_NOT_ENABLED = 5;
+ /** Cannot connect to ZooKeeper */
+ static final int ERR_CODE_NO_ZK = 6;
private Configuration conf;
+ private String zkQuorum;
private HealthMonitor healthMonitor;
private ActiveStandbyElector elector;
@@ -162,11 +166,23 @@ public abstract class ZKFailoverControll
}
}
- if (!elector.parentZNodeExists()) {
- LOG.fatal("Unable to start failover controller. " +
- "Parent znode does not exist.\n" +
- "Run with -formatZK flag to initialize ZooKeeper.");
- return ERR_CODE_NO_PARENT_ZNODE;
+ try {
+ if (!elector.parentZNodeExists()) {
+ LOG.fatal("Unable to start failover controller. " +
+ "Parent znode does not exist.\n" +
+ "Run with -formatZK flag to initialize ZooKeeper.");
+ return ERR_CODE_NO_PARENT_ZNODE;
+ }
+ } catch (IOException ioe) {
+ if (ioe.getCause() instanceof KeeperException.ConnectionLossException) {
+ LOG.fatal("Unable to start failover controller. Unable to connect " +
+ "to ZooKeeper quorum at " + zkQuorum + ". Please check the " +
+ "configured value for " + ZK_QUORUM_KEY + " and ensure that " +
+ "ZooKeeper is running.");
+ return ERR_CODE_NO_ZK;
+ } else {
+ throw ioe;
+ }
}
try {
@@ -248,7 +264,7 @@ public abstract class ZKFailoverControll
}
private void initZK() throws HadoopIllegalArgumentException, IOException {
- String zkQuorum = conf.get(ZK_QUORUM_KEY);
+ zkQuorum = conf.get(ZK_QUORUM_KEY);
int zkTimeout = conf.getInt(ZK_SESSION_TIMEOUT_KEY,
ZK_SESSION_TIMEOUT_DEFAULT);
// Parse ACLs from configuration.
Modified:
hadoop/common/branches/HDFS-3042/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestZKFailoverController.java
URL:
http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-3042/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestZKFailoverController.java?rev=1329960&r1=1329959&r2=1329960&view=diff
==============================================================================
---
hadoop/common/branches/HDFS-3042/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestZKFailoverController.java
(original)
+++
hadoop/common/branches/HDFS-3042/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ha/TestZKFailoverController.java
Tue Apr 24 19:34:51 2012
@@ -95,6 +95,18 @@ public class TestZKFailoverController ex
assertEquals(0, runFC(svc, "-formatZK", "-force"));
}
+ /**
+ * Test that if ZooKeeper is not running, the correct error
+ * code is returned.
+ */
+ @Test(timeout=15000)
+ public void testNoZK() throws Exception {
+ stopServer();
+ DummyHAService svc = cluster.getService(1);
+ assertEquals(ZKFailoverController.ERR_CODE_NO_ZK,
+ runFC(svc));
+ }
+
@Test
public void testFormatOneClusterLeavesOtherClustersAlone() throws Exception {
DummyHAService svc = cluster.getService(1);