Repository: hadoop Updated Branches: refs/heads/branch-2.7 c2ed7e4a0 -> 2d1ff2e1c
YARN-4101. RM should print alert messages if Zookeeper and Resourcemanager gets connection issue. Contributed by Xuan Gong (cherry picked from commit 09c64ba1ba8be7a2ac31f4e42efb8c99b682399f) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/2d1ff2e1 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/2d1ff2e1 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/2d1ff2e1 Branch: refs/heads/branch-2.7 Commit: 2d1ff2e1ca8f30375f3c0884feb6835ec58cab29 Parents: c2ed7e4 Author: Jian He <[email protected]> Authored: Wed Sep 2 17:45:23 2015 -0700 Committer: Jian He <[email protected]> Committed: Wed Sep 2 17:46:24 2015 -0700 ---------------------------------------------------------------------- .../java/org/apache/hadoop/ha/ActiveStandbyElector.java | 4 ++++ hadoop-yarn-project/CHANGES.txt | 3 +++ .../hadoop/yarn/server/resourcemanager/AdminService.java | 9 +++++++++ .../yarn/server/resourcemanager/EmbeddedElectorService.java | 4 ++++ .../yarn/server/resourcemanager/webapp/AboutBlock.java | 2 ++ .../hadoop/yarn/server/resourcemanager/webapp/RMWebApp.java | 5 +++++ .../yarn/server/resourcemanager/webapp/RMWebAppFilter.java | 5 ++++- .../yarn/server/resourcemanager/webapp/dao/ClusterInfo.java | 6 ++++++ .../server/resourcemanager/webapp/TestRMWebServices.java | 8 ++++++-- 9 files changed, 43 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/2d1ff2e1/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ActiveStandbyElector.java ---------------------------------------------------------------------- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ActiveStandbyElector.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ActiveStandbyElector.java index 69bd82f..5264278 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ActiveStandbyElector.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ActiveStandbyElector.java @@ -1118,4 +1118,8 @@ public class ActiveStandbyElector implements StatCallback, StringCallback { ((appData == null) ? "null" : StringUtils.byteToHexString(appData)) + " cb=" + appClient; } + + public String getHAZookeeperConnectionState() { + return this.zkConnectionState.name(); + } } http://git-wip-us.apache.org/repos/asf/hadoop/blob/2d1ff2e1/hadoop-yarn-project/CHANGES.txt ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 2f43b3f..8efcc81 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -64,6 +64,9 @@ Release 2.7.2 - UNRELEASED YARN-3893. Both RM in active state when Admin#transitionToActive failure from refeshAll() (Bibin A Chundatt via rohithsharmaks) + YARN-4101. RM should print alert messages if Zookeeper and Resourcemanager + gets connection issue. (Xuan Gong via jianhe) + Release 2.7.1 - 2015-07-06 INCOMPATIBLE CHANGES http://git-wip-us.apache.org/repos/asf/hadoop/blob/2d1ff2e1/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java index 11ddf81..de393f2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java @@ -701,4 +701,13 @@ public class AdminService extends CompositeService implements "AdminService", "Exception " + msg); return RPCUtil.getRemoteException(ioe); } + + public String getHAZookeeperConnectionState() { + if (!rmContext.isHAEnabled()) { + return "ResourceManager HA is not enabled."; + } else if (!autoFailoverEnabled) { + return "Auto Failover is not enabled."; + } + return this.embeddedElector.getHAZookeeperConnectionState(); + } } http://git-wip-us.apache.org/repos/asf/hadoop/blob/2d1ff2e1/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/EmbeddedElectorService.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/EmbeddedElectorService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/EmbeddedElectorService.java index c7b7768..73bdca0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/EmbeddedElectorService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/EmbeddedElectorService.java @@ -205,4 +205,8 @@ public class EmbeddedElectorService extends AbstractService elector.quitElection(false); elector.joinElection(localActiveNodeInfo); } + + public String getHAZookeeperConnectionState() { + return elector.getHAZookeeperConnectionState(); + } } http://git-wip-us.apache.org/repos/asf/hadoop/blob/2d1ff2e1/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/AboutBlock.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/AboutBlock.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/AboutBlock.java index ea5c48a..4225afd 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/AboutBlock.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/AboutBlock.java @@ -44,6 +44,8 @@ public class AboutBlock extends HtmlBlock { _("Cluster ID:", cinfo.getClusterId()). _("ResourceManager state:", cinfo.getState()). _("ResourceManager HA state:", cinfo.getHAState()). + _("ResourceManager HA zookeeper connection state:", + cinfo.getHAZookeeperConnectionState()). _("ResourceManager RMStateStore:", cinfo.getRMStateStore()). _("ResourceManager started on:", Times.format(cinfo.getStartedOn())). _("ResourceManager version:", cinfo.getRMBuildVersion() + http://git-wip-us.apache.org/repos/asf/hadoop/blob/2d1ff2e1/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebApp.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebApp.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebApp.java index 4189053..1377e26 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebApp.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebApp.java @@ -114,4 +114,9 @@ public class RMWebApp extends WebApp implements YarnWebParams { } return path; } + + public String getHAZookeeperConnectionState() { + return rm.getRMContext().getRMAdminService() + .getHAZookeeperConnectionState(); + } } http://git-wip-us.apache.org/repos/asf/hadoop/blob/2d1ff2e1/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebAppFilter.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebAppFilter.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebAppFilter.java index a8f793a..9f35b6c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebAppFilter.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RMWebAppFilter.java @@ -58,6 +58,7 @@ public class RMWebAppFilter extends GuiceContainer { private String path; private static final int BASIC_SLEEP_TIME = 5; private static final int MAX_SLEEP_TIME = 5 * 60; + private static final Random randnum = new Random(); @Inject public RMWebAppFilter(Injector injector, Configuration conf) { @@ -126,6 +127,8 @@ public class RMWebAppFilter extends GuiceContainer { String redirectMsg = doRetry ? "Can not find any active RM. Will retry in next " + next + " seconds." : "There is no active RM right now."; + redirectMsg += "\nHA Zookeeper Connection State: " + + rmWebApp.getHAZookeeperConnectionState(); PrintWriter out = response.getWriter(); out.println(redirectMsg); if (doRetry) { @@ -172,6 +175,6 @@ public class RMWebAppFilter extends GuiceContainer { private static int calculateExponentialTime(int retries) { long baseTime = BASIC_SLEEP_TIME * (1L << retries); - return (int) (baseTime * ((new Random()).nextDouble() + 0.5)); + return (int) (baseTime * (randnum.nextDouble() + 0.5)); } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/hadoop/blob/2d1ff2e1/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ClusterInfo.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ClusterInfo.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ClusterInfo.java index b529f21..512a5c4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ClusterInfo.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/dao/ClusterInfo.java @@ -43,6 +43,7 @@ public class ClusterInfo { protected String hadoopVersion; protected String hadoopBuildVersion; protected String hadoopVersionBuiltOn; + protected String haZooKeeperConnectionState; public ClusterInfo() { } // JAXB needs this @@ -62,6 +63,8 @@ public class ClusterInfo { this.hadoopVersion = VersionInfo.getVersion(); this.hadoopBuildVersion = VersionInfo.getBuildVersion(); this.hadoopVersionBuiltOn = VersionInfo.getDate(); + this.haZooKeeperConnectionState = + rm.getRMContext().getRMAdminService().getHAZookeeperConnectionState(); } public String getState() { @@ -108,4 +111,7 @@ public class ClusterInfo { return this.startedOn; } + public String getHAZookeeperConnectionState() { + return this.haZooKeeperConnectionState; + } } http://git-wip-us.apache.org/repos/asf/hadoop/blob/2d1ff2e1/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServices.java ---------------------------------------------------------------------- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServices.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServices.java index 298246c..08d0c41 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServices.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/TestRMWebServices.java @@ -277,6 +277,8 @@ public class TestRMWebServices extends JerseyTestBase { WebServicesTestUtils.getXmlLong(element, "startedOn"), WebServicesTestUtils.getXmlString(element, "state"), WebServicesTestUtils.getXmlString(element, "haState"), + WebServicesTestUtils.getXmlString( + element, "haZooKeeperConnectionState"), WebServicesTestUtils.getXmlString(element, "hadoopVersionBuiltOn"), WebServicesTestUtils.getXmlString(element, "hadoopBuildVersion"), WebServicesTestUtils.getXmlString(element, "hadoopVersion"), @@ -292,9 +294,10 @@ public class TestRMWebServices extends JerseyTestBase { Exception { assertEquals("incorrect number of elements", 1, json.length()); JSONObject info = json.getJSONObject("clusterInfo"); - assertEquals("incorrect number of elements", 11, info.length()); + assertEquals("incorrect number of elements", 12, info.length()); verifyClusterGeneric(info.getLong("id"), info.getLong("startedOn"), info.getString("state"), info.getString("haState"), + info.getString("haZooKeeperConnectionState"), info.getString("hadoopVersionBuiltOn"), info.getString("hadoopBuildVersion"), info.getString("hadoopVersion"), info.getString("resourceManagerVersionBuiltOn"), @@ -304,7 +307,8 @@ public class TestRMWebServices extends JerseyTestBase { } public void verifyClusterGeneric(long clusterid, long startedon, - String state, String haState, String hadoopVersionBuiltOn, + String state, String haState, String haZooKeeperConnectionState, + String hadoopVersionBuiltOn, String hadoopBuildVersion, String hadoopVersion, String resourceManagerVersionBuiltOn, String resourceManagerBuildVersion, String resourceManagerVersion) {
