Repository: ambari Updated Branches: refs/heads/branch-2.4 5fc7dbf42 -> ab8a118b7
AMBARI-17928 - NameNode High Availability Health Alert Issue (jonathanhurley) Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/ab8a118b Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/ab8a118b Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/ab8a118b Branch: refs/heads/branch-2.4 Commit: ab8a118b7f009980d75f11fdd00398694c1033de Parents: 5fc7dbf Author: Jonathan Hurley <[email protected]> Authored: Thu Jul 28 08:54:48 2016 -0400 Committer: Jonathan Hurley <[email protected]> Committed: Thu Jul 28 08:56:03 2016 -0400 ---------------------------------------------------------------------- .../package/alerts/alert_ha_namenode_health.py | 39 ++------------------ 1 file changed, 4 insertions(+), 35 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ambari/blob/ab8a118b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_ha_namenode_health.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_ha_namenode_health.py b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_ha_namenode_health.py index 00d1421..28b3f22 100644 --- a/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_ha_namenode_health.py +++ b/ambari-server/src/main/resources/common-services/HDFS/2.1.0.2.0/package/alerts/alert_ha_namenode_health.py @@ -185,49 +185,18 @@ def execute(configurations={}, parameters={}, host_name=None): logger.exception(LOGGER_EXCEPTION_MESSAGE.format(host_name)) unknown_namenodes.append(value) - # now that the request is done, determine if this host is the host that - # should report the status of the HA topology - is_active_namenode = False - for active_namenode in active_namenodes: - if active_namenode.startswith(host_name): - is_active_namenode = True - # there's only one scenario here; there is exactly 1 active and 1 standby is_topology_healthy = len(active_namenodes) == 1 and len(standby_namenodes) == 1 result_label = 'Active{0}, Standby{1}, Unknown{2}'.format(str(active_namenodes), str(standby_namenodes), str(unknown_namenodes)) - # Healthy Topology: - # - Active NN reports the alert, standby does not - # - # Unhealthy Topology: - # - Report the alert if this is the first named host - # - Report the alert if not the first named host, but the other host - # could not report its status if is_topology_healthy: - if is_active_namenode is True: - return (RESULT_STATE_OK, [result_label]) - else: - return (RESULT_STATE_SKIPPED, ['Another host will report this alert']) + # if there is exactly 1 active and 1 standby NN + return (RESULT_STATE_OK, [result_label]) else: - # dfs.namenode.rpc-address.service.alias is guaranteed in HA mode - first_listed_host_key = 'dfs.namenode.rpc-address.{0}.{1}'.format( - name_service, nn_unique_ids[0]) - - first_listed_host = '' - if first_listed_host_key in hdfs_site: - first_listed_host = hdfs_site[first_listed_host_key] - - is_first_listed_host = False - if first_listed_host.startswith(host_name): - is_first_listed_host = True - - if is_first_listed_host: - return (RESULT_STATE_CRITICAL, [result_label]) - else: - # not the first listed host, but the first host might be in the unknown - return (RESULT_STATE_SKIPPED, ['Another host will report this alert']) + # other scenario + return (RESULT_STATE_CRITICAL, [result_label]) def get_jmx(query, connection_timeout):
