Repository: ambari Updated Branches: refs/heads/trunk bc55f2d4e -> 6727c1dc8
http://git-wip-us.apache.org/repos/asf/ambari/blob/6727c1dc/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/package/files/alert_ha_namenode_health.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/package/files/alert_ha_namenode_health.py b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/package/files/alert_ha_namenode_health.py index 2066d46..19e3170 100644 --- a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/package/files/alert_ha_namenode_health.py +++ b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/package/files/alert_ha_namenode_health.py @@ -35,7 +35,8 @@ NN_HTTP_ADDRESS_KEY = '{{hdfs-site/dfs.namenode.http-address}}' NN_HTTPS_ADDRESS_KEY = '{{hdfs-site/dfs.namenode.https-address}}' DFS_POLICY_KEY = '{{hdfs-site/dfs.http.policy}}' -CONNECTION_TIMEOUT = 5.0 +CONNECTION_TIMEOUT_KEY = 'connection.timeout' +CONNECTION_TIMEOUT_DEFAULT = 5.0 def get_tokens(): """ @@ -44,36 +45,43 @@ def get_tokens(): """ return (HDFS_SITE_KEY, NAMESERVICE_KEY, NN_HTTP_ADDRESS_KEY, NN_HTTPS_ADDRESS_KEY, DFS_POLICY_KEY) - -def execute(parameters=None, host_name=None): + +def execute(configurations={}, parameters={}, host_name=None): """ Returns a tuple containing the result code and a pre-formatted result label Keyword arguments: - parameters (dictionary): a mapping of parameter key to value + configurations (dictionary): a mapping of configuration key to value + parameters (dictionary): a mapping of script parameter key to value host_name (string): the name of this host where the alert is running """ - if parameters is None: - return (RESULT_STATE_UNKNOWN, ['There were no parameters supplied to the script.']) + if configurations is None: + return (RESULT_STATE_UNKNOWN, ['There were no configurations supplied to the script.']) # if not in HA mode, then SKIP - if not NAMESERVICE_KEY in parameters: + if not NAMESERVICE_KEY in configurations: return (RESULT_STATE_SKIPPED, ['NameNode HA is not enabled']) # hdfs-site is required - if not HDFS_SITE_KEY in parameters: + if not HDFS_SITE_KEY in configurations: return (RESULT_STATE_UNKNOWN, ['{0} is a required parameter for the script'.format(HDFS_SITE_KEY)]) + # parse script arguments + connection_timeout = CONNECTION_TIMEOUT_DEFAULT + if CONNECTION_TIMEOUT_KEY in parameters: + connection_timeout = float(parameters[CONNECTION_TIMEOUT_KEY]) + + # determine whether or not SSL is enabled is_ssl_enabled = False - if DFS_POLICY_KEY in parameters: - dfs_policy = parameters[DFS_POLICY_KEY] + if DFS_POLICY_KEY in configurations: + dfs_policy = configurations[DFS_POLICY_KEY] if dfs_policy == "HTTPS_ONLY": is_ssl_enabled = True - name_service = parameters[NAMESERVICE_KEY] - hdfs_site = parameters[HDFS_SITE_KEY] + name_service = configurations[NAMESERVICE_KEY] + hdfs_site = configurations[HDFS_SITE_KEY] # look for dfs.ha.namenodes.foo nn_unique_ids_key = 'dfs.ha.namenodes.' + name_service @@ -105,7 +113,7 @@ def execute(parameters=None, host_name=None): try: jmx_uri = jmx_uri_fragment.format(value) - state = get_value_from_jmx(jmx_uri,'State') + state = get_value_from_jmx(jmx_uri, 'State', connection_timeout) if state == HDFS_NN_STATE_ACTIVE: active_namenodes.append(value) @@ -161,11 +169,11 @@ def execute(parameters=None, host_name=None): return (RESULT_STATE_SKIPPED, ['Another host will report this alert']) -def get_value_from_jmx(query, jmx_property): +def get_value_from_jmx(query, jmx_property, connection_timeout): response = None - + try: - response = urllib2.urlopen(query, timeout=CONNECTION_TIMEOUT) + response = urllib2.urlopen(query, timeout=connection_timeout) data = response.read() data_dict = json.loads(data) http://git-wip-us.apache.org/repos/asf/ambari/blob/6727c1dc/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HIVE/package/files/alert_hive_thrift_port.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HIVE/package/files/alert_hive_thrift_port.py b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HIVE/package/files/alert_hive_thrift_port.py index 2837226..35217fc 100644 --- a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HIVE/package/files/alert_hive_thrift_port.py +++ b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HIVE/package/files/alert_hive_thrift_port.py @@ -40,15 +40,21 @@ SMOKEUSER_KEY = '{{cluster-env/smokeuser}}' # The configured Kerberos executable search paths, if any KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY = '{{kerberos-env/executable_search_paths}}' -PERCENT_WARNING = 200 -PERCENT_CRITICAL = 200 - THRIFT_PORT_DEFAULT = 10000 HIVE_SERVER_TRANSPORT_MODE_DEFAULT = 'binary' HIVE_SERVER_PRINCIPAL_DEFAULT = 'hive/[email protected]' HIVE_SERVER2_AUTHENTICATION_DEFAULT = 'NOSASL' + +# default keytab location +SMOKEUSER_KEYTAB_SCRIPT_PARAM_KEY = 'default.smoke.keytab' SMOKEUSER_KEYTAB_DEFAULT = '/etc/security/keytabs/smokeuser.headless.keytab' + +# default smoke principal +SMOKEUSER_PRINCIPAL_SCRIPT_PARAM_KEY = 'default.smoke.principal' SMOKEUSER_PRINCIPAL_DEFAULT = '[email protected]' + +# default smoke user +SMOKEUSER_SCRIPT_PARAM_KEY = 'default.smoke.user' SMOKEUSER_DEFAULT = 'ambari-qa' def get_tokens(): @@ -59,62 +65,76 @@ def get_tokens(): return (HIVE_SERVER_THRIFT_PORT_KEY,SECURITY_ENABLED_KEY, SMOKEUSER_KEY, HIVE_SERVER2_AUTHENTICATION_KEY,HIVE_SERVER_PRINCIPAL_KEY, SMOKEUSER_KEYTAB_KEY,SMOKEUSER_PRINCIPAL_KEY,HIVE_SERVER_THRIFT_HTTP_PORT_KEY, - HIVE_SERVER_TRANSPORT_MODE_KEY, KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY) + HIVE_SERVER_TRANSPORT_MODE_KEY,KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY) -def execute(parameters=None, host_name=None): +def execute(configurations={}, parameters={}, host_name=None): """ Returns a tuple containing the result code and a pre-formatted result label Keyword arguments: - parameters (dictionary): a mapping of parameter key to value + configurations (dictionary): a mapping of configuration key to value + parameters (dictionary): a mapping of script parameter key to value host_name (string): the name of this host where the alert is running """ - if parameters is None: - return ('UNKNOWN', ['There were no parameters supplied to the script.']) + if configurations is None: + return ('UNKNOWN', ['There were no configurations supplied to the script.']) transport_mode = HIVE_SERVER_TRANSPORT_MODE_DEFAULT - if HIVE_SERVER_TRANSPORT_MODE_KEY in parameters: - transport_mode = parameters[HIVE_SERVER_TRANSPORT_MODE_KEY] + if HIVE_SERVER_TRANSPORT_MODE_KEY in configurations: + transport_mode = configurations[HIVE_SERVER_TRANSPORT_MODE_KEY] port = THRIFT_PORT_DEFAULT - if transport_mode.lower() == 'binary' and HIVE_SERVER_THRIFT_PORT_KEY in parameters: - port = int(parameters[HIVE_SERVER_THRIFT_PORT_KEY]) - elif transport_mode.lower() == 'http' and HIVE_SERVER_THRIFT_HTTP_PORT_KEY in parameters: - port = int(parameters[HIVE_SERVER_THRIFT_HTTP_PORT_KEY]) + if transport_mode.lower() == 'binary' and HIVE_SERVER_THRIFT_PORT_KEY in configurations: + port = int(configurations[HIVE_SERVER_THRIFT_PORT_KEY]) + elif transport_mode.lower() == 'http' and HIVE_SERVER_THRIFT_HTTP_PORT_KEY in configurations: + port = int(configurations[HIVE_SERVER_THRIFT_HTTP_PORT_KEY]) security_enabled = False - if SECURITY_ENABLED_KEY in parameters: - security_enabled = str(parameters[SECURITY_ENABLED_KEY]).upper() == 'TRUE' + if SECURITY_ENABLED_KEY in configurations: + security_enabled = str(configurations[SECURITY_ENABLED_KEY]).upper() == 'TRUE' hive_server2_authentication = HIVE_SERVER2_AUTHENTICATION_DEFAULT - if HIVE_SERVER2_AUTHENTICATION_KEY in parameters: - hive_server2_authentication = parameters[HIVE_SERVER2_AUTHENTICATION_KEY] + if HIVE_SERVER2_AUTHENTICATION_KEY in configurations: + hive_server2_authentication = configurations[HIVE_SERVER2_AUTHENTICATION_KEY] + # defaults + smokeuser_keytab = SMOKEUSER_KEYTAB_DEFAULT smokeuser_principal = SMOKEUSER_PRINCIPAL_DEFAULT - if SMOKEUSER_PRINCIPAL_KEY in parameters: - smokeuser_principal = parameters[SMOKEUSER_PRINCIPAL_KEY] - smokeuser = SMOKEUSER_DEFAULT - if SMOKEUSER_KEY in parameters: - smokeuser = parameters[SMOKEUSER_KEY] + + # check script params + if SMOKEUSER_PRINCIPAL_SCRIPT_PARAM_KEY in parameters: + smokeuser_principal = parameters[SMOKEUSER_PRINCIPAL_SCRIPT_PARAM_KEY] + + if SMOKEUSER_SCRIPT_PARAM_KEY in parameters: + smokeuser = parameters[SMOKEUSER_SCRIPT_PARAM_KEY] + + if SMOKEUSER_KEYTAB_SCRIPT_PARAM_KEY in parameters: + smokeuser_keytab = parameters[SMOKEUSER_KEYTAB_SCRIPT_PARAM_KEY] + + + # check configurations last as they should always take precedence + if SMOKEUSER_PRINCIPAL_KEY in configurations: + smokeuser_principal = configurations[SMOKEUSER_PRINCIPAL_KEY] + + if SMOKEUSER_KEY in configurations: + smokeuser = configurations[SMOKEUSER_KEY] result_code = None if security_enabled: hive_server_principal = HIVE_SERVER_PRINCIPAL_DEFAULT - if HIVE_SERVER_PRINCIPAL_KEY in parameters: - hive_server_principal = parameters[HIVE_SERVER_PRINCIPAL_KEY] - - smokeuser_keytab = SMOKEUSER_KEYTAB_DEFAULT + if HIVE_SERVER_PRINCIPAL_KEY in configurations: + hive_server_principal = configurations[HIVE_SERVER_PRINCIPAL_KEY] - if SMOKEUSER_KEYTAB_KEY in parameters: - smokeuser_keytab = parameters[SMOKEUSER_KEYTAB_KEY] + if SMOKEUSER_KEYTAB_KEY in configurations: + smokeuser_keytab = configurations[SMOKEUSER_KEYTAB_KEY] # Get the configured Kerberos executable search paths, if any - if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in parameters: - kerberos_executable_search_paths = parameters[KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY] + if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in configurations: + kerberos_executable_search_paths = configurations[KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY] else: kerberos_executable_search_paths = None http://git-wip-us.apache.org/repos/asf/ambari/blob/6727c1dc/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/OOZIE/package/files/alert_check_oozie_server.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/OOZIE/package/files/alert_check_oozie_server.py b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/OOZIE/package/files/alert_check_oozie_server.py index eaad265..c0dc18a 100644 --- a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/OOZIE/package/files/alert_check_oozie_server.py +++ b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/OOZIE/package/files/alert_check_oozie_server.py @@ -24,7 +24,8 @@ from resource_management.libraries.functions import format from resource_management.libraries.functions import get_kinit_path from resource_management.libraries.functions import get_klist_path from ambari_commons.os_check import OSConst, OSCheck -from os import getpid, sep +from ambari_commons.os_family_impl import OsFamilyFuncImpl, OsFamilyImpl +import os from urlparse import urlparse RESULT_CODE_OK = 'OK' @@ -39,6 +40,17 @@ SECURITY_ENABLED = '{{cluster-env/security_enabled}}' OOZIE_PRINCIPAL = '{{oozie-site/oozie.authentication.kerberos.principal}}' OOZIE_KEYTAB = '{{oozie-site/oozie.authentication.kerberos.keytab}}' +class KerberosPropertiesNotFound(Exception): pass + +@OsFamilyFuncImpl(os_family=OSConst.WINSRV_FAMILY) +def get_tokens(): + """ + Returns a tuple of tokens in the format {{site/property}} that will be used + to build the dictionary passed into execute + """ + return (OOZIE_URL_KEY,) + +@OsFamilyFuncImpl(os_family=OsFamilyImpl.DEFAULT) def get_tokens(): """ Returns a tuple of tokens in the format {{site/property}} that will be used @@ -46,78 +58,88 @@ def get_tokens(): """ return (OOZIE_URL_KEY, OOZIE_PRINCIPAL, SECURITY_ENABLED, OOZIE_KEYTAB, KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY) -def execute(parameters=None, host_name=None): +@OsFamilyFuncImpl(os_family=OSConst.WINSRV_FAMILY) +def get_check_command(oozie_url, host_name, configurations): + from resource_management.libraries.functions import reload_windows_env + reload_windows_env() + oozie_home = os.environ['OOZIE_HOME'] + command = format("{oozie_home}\\bin\\oozie.cmd admin -oozie {oozie_url} -status") + return (command, None) + +@OsFamilyFuncImpl(os_family=OsFamilyImpl.DEFAULT) +def get_check_command(oozie_url, host_name, configurations): + security_enabled = False + if SECURITY_ENABLED in configurations: + security_enabled = str(configurations[SECURITY_ENABLED]).upper() == 'TRUE' + kerberos_env = None + if security_enabled: + if OOZIE_KEYTAB in configurations and OOZIE_PRINCIPAL in configurations: + oozie_keytab = configurations[OOZIE_KEYTAB] + oozie_principal = configurations[OOZIE_PRINCIPAL] + + # substitute _HOST in kerberos principal with actual fqdn + oozie_principal = oozie_principal.replace('_HOST', host_name) + else: + raise KerberosPropertiesNotFound('The Oozie keytab and principal are required configurations when security is enabled.') + + # Create the kerberos credentials cache (ccache) file and set it in the environment to use + # when executing curl + env = Environment.get_instance() + ccache_file = "{0}{1}oozie_alert_cc_{2}".format(env.tmp_dir, os.sep, os.getpid()) + kerberos_env = {'KRB5CCNAME': ccache_file} + + # Get the configured Kerberos executable search paths, if any + if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in configurations: + kerberos_executable_search_paths = configurations[KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY] + else: + kerberos_executable_search_paths = None + + klist_path_local = get_klist_path(kerberos_executable_search_paths) + klist_command = format("{klist_path_local} -s {ccache_file}") + + # Determine if we need to kinit by testing to see if the relevant cache exists and has + # non-expired tickets. Tickets are marked to expire after 5 minutes to help reduce the number + # it kinits we do but recover quickly when keytabs are regenerated + return_code, _ = call(klist_command) + if return_code != 0: + kinit_path_local = get_kinit_path(kerberos_executable_search_paths) + kinit_command = format("{kinit_path_local} -l 5m -kt {oozie_keytab} {oozie_principal}; ") + + # kinit + Execute(kinit_command, environment=kerberos_env) + command = format("source /etc/oozie/conf/oozie-env.sh ; oozie admin -oozie {oozie_url} -status") + return (command, kerberos_env) + +def execute(configurations={}, parameters={}, host_name=None): """ Returns a tuple containing the result code and a pre-formatted result label Keyword arguments: - parameters (dictionary): a mapping of parameter key to value + configurations (dictionary): a mapping of configuration key to value + parameters (dictionary): a mapping of script parameter key to value host_name (string): the name of this host where the alert is running """ - if parameters is None: - return (RESULT_CODE_UNKNOWN, ['There were no parameters supplied to the script.']) + if configurations is None: + return (RESULT_CODE_UNKNOWN, ['There were no configurations supplied to the script.']) - if not OOZIE_URL_KEY in parameters: + if not OOZIE_URL_KEY in configurations: return (RESULT_CODE_UNKNOWN, ['The Oozie URL is a required parameter.']) # use localhost on Windows, 0.0.0.0 on others; 0.0.0.0 means bind to all # interfaces, which doesn't work on Windows localhost_address = 'localhost' if OSCheck.get_os_family() == OSConst.WINSRV_FAMILY else '0.0.0.0' - oozie_url = parameters[OOZIE_URL_KEY] + oozie_url = configurations[OOZIE_URL_KEY] oozie_url = oozie_url.replace(urlparse(oozie_url).hostname,localhost_address) - security_enabled = False - if SECURITY_ENABLED in parameters: - security_enabled = str(parameters[SECURITY_ENABLED]).upper() == 'TRUE' - - command = format("source /etc/oozie/conf/oozie-env.sh ; oozie admin -oozie {oozie_url} -status") - try: - # kinit if security is enabled so that oozie-env.sh can make the web request - kerberos_env = None - - if security_enabled: - if OOZIE_KEYTAB in parameters and OOZIE_PRINCIPAL in parameters: - oozie_keytab = parameters[OOZIE_KEYTAB] - oozie_principal = parameters[OOZIE_PRINCIPAL] - - # substitute _HOST in kerberos principal with actual fqdn - oozie_principal = oozie_principal.replace('_HOST', host_name) - else: - return (RESULT_CODE_UNKNOWN, ['The Oozie keytab and principal are required parameters when security is enabled.']) - - # Create the kerberos credentials cache (ccache) file and set it in the environment to use - # when executing curl - env = Environment.get_instance() - ccache_file = "{0}{1}oozie_alert_cc_{2}".format(env.tmp_dir, sep, getpid()) - kerberos_env = {'KRB5CCNAME': ccache_file} - - # Get the configured Kerberos executable search paths, if any - if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in parameters: - kerberos_executable_search_paths = parameters[KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY] - else: - kerberos_executable_search_paths = None - - klist_path_local = get_klist_path(kerberos_executable_search_paths) - klist_command = format("{klist_path_local} -s {ccache_file}") - - # Determine if we need to kinit by testing to see if the relevant cache exists and has - # non-expired tickets. Tickets are marked to expire after 5 minutes to help reduce the number - # it kinits we do but recover quickly when keytabs are regenerated - return_code, _ = call(klist_command) - if return_code != 0: - kinit_path_local = get_kinit_path(kerberos_executable_search_paths) - kinit_command = format("{kinit_path_local} -l 5m -kt {oozie_keytab} {oozie_principal}; ") - - # kinit - Execute(kinit_command, environment=kerberos_env) - + command, env = get_check_command(oozie_url, host_name, configurations) # execute the command - Execute(command, environment=kerberos_env) + Execute(command, environment=env) return (RESULT_CODE_OK, ["Successful connection to {0}".format(oozie_url)]) - + except KerberosPropertiesNotFound, ex: + return (RESULT_CODE_UNKNOWN, [str(ex)]) except Exception, ex: return (RESULT_CODE_CRITICAL, [str(ex)]) http://git-wip-us.apache.org/repos/asf/ambari/blob/6727c1dc/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/WEBHCAT/package/files/alert_webhcat_server.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/WEBHCAT/package/files/alert_webhcat_server.py b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/WEBHCAT/package/files/alert_webhcat_server.py index dd20be4..7ee375e 100644 --- a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/WEBHCAT/package/files/alert_webhcat_server.py +++ b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/WEBHCAT/package/files/alert_webhcat_server.py @@ -53,8 +53,10 @@ KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY = '{{kerberos-env/executable_search_paths}} WEBHCAT_OK_RESPONSE = 'ok' WEBHCAT_PORT_DEFAULT = 50111 -CURL_CONNECTION_TIMEOUT = '5' -CONNECTION_TIMEOUT = 5.0 +CONNECTION_TIMEOUT_KEY = 'connection.timeout' +CONNECTION_TIMEOUT_DEFAULT = 5.0 +CURL_CONNECTION_TIMEOUT_DEFAULT = str(int(CONNECTION_TIMEOUT_DEFAULT)) + def get_tokens(): """ @@ -64,27 +66,36 @@ def get_tokens(): return (TEMPLETON_PORT_KEY, SECURITY_ENABLED_KEY, WEBHCAT_KEYTAB_KEY, WEBHCAT_PRINCIPAL_KEY, KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY) -def execute(parameters=None, host_name=None): +def execute(configurations={}, parameters={}, host_name=None): """ Returns a tuple containing the result code and a pre-formatted result label Keyword arguments: - parameters (dictionary): a mapping of parameter key to value + configurations (dictionary): a mapping of configuration key to value + parameters (dictionary): a mapping of script parameter key to value host_name (string): the name of this host where the alert is running """ result_code = RESULT_CODE_UNKNOWN - if parameters is None: - return (result_code, ['There were no parameters supplied to the script.']) + if configurations is None: + return (result_code, ['There were no configurations supplied to the script.']) webhcat_port = WEBHCAT_PORT_DEFAULT - if TEMPLETON_PORT_KEY in parameters: - webhcat_port = int(parameters[TEMPLETON_PORT_KEY]) + if TEMPLETON_PORT_KEY in configurations: + webhcat_port = int(configurations[TEMPLETON_PORT_KEY]) security_enabled = False - if SECURITY_ENABLED_KEY in parameters: - security_enabled = parameters[SECURITY_ENABLED_KEY].lower() == 'true' + if SECURITY_ENABLED_KEY in configurations: + security_enabled = configurations[SECURITY_ENABLED_KEY].lower() == 'true' + + # parse script arguments + connection_timeout = CONNECTION_TIMEOUT_DEFAULT + curl_connection_timeout = CURL_CONNECTION_TIMEOUT_DEFAULT + if CONNECTION_TIMEOUT_KEY in parameters: + connection_timeout = float(parameters[CONNECTION_TIMEOUT_KEY]) + curl_connection_timeout = str(int(connection_timeout)) + # the alert will always run on the webhcat host if host_name is None: @@ -98,12 +109,12 @@ def execute(parameters=None, host_name=None): json_response = {} if security_enabled: - if WEBHCAT_KEYTAB_KEY not in parameters or WEBHCAT_PRINCIPAL_KEY not in parameters: - return (RESULT_CODE_UNKNOWN, [str(parameters)]) + if WEBHCAT_KEYTAB_KEY not in configurations or WEBHCAT_PRINCIPAL_KEY not in configurations: + return (RESULT_CODE_UNKNOWN, [str(configurations)]) try: - webhcat_keytab = parameters[WEBHCAT_KEYTAB_KEY] - webhcat_principal = parameters[WEBHCAT_PRINCIPAL_KEY] + webhcat_keytab = configurations[WEBHCAT_KEYTAB_KEY] + webhcat_principal = configurations[WEBHCAT_PRINCIPAL_KEY] # substitute _HOST in kerberos principal with actual fqdn webhcat_principal = webhcat_principal.replace('_HOST', host_name) @@ -115,8 +126,8 @@ def execute(parameters=None, host_name=None): kerberos_env = {'KRB5CCNAME': ccache_file} # Get the configured Kerberos executable search paths, if any - if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in parameters: - kerberos_executable_search_paths = parameters[KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY] + if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in configurations: + kerberos_executable_search_paths = configurations[KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY] else: kerberos_executable_search_paths = None @@ -136,7 +147,7 @@ def execute(parameters=None, host_name=None): # make a single curl call to get just the http code curl = subprocess.Popen(['curl', '--negotiate', '-u', ':', '-sL', '-w', - '%{http_code}', '--connect-timeout', CURL_CONNECTION_TIMEOUT, + '%{http_code}', '--connect-timeout', curl_connection_timeout, '-o', '/dev/null', query_url], stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=kerberos_env) stdout, stderr = curl.communicate() @@ -160,7 +171,7 @@ def execute(parameters=None, host_name=None): # now that we have the http status and it was 200, get the content start_time = time.time() curl = subprocess.Popen(['curl', '--negotiate', '-u', ':', '-sL', - '--connect-timeout', CURL_CONNECTION_TIMEOUT, query_url, ], + '--connect-timeout', curl_connection_timeout, query_url, ], stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=kerberos_env) stdout, stderr = curl.communicate() @@ -178,7 +189,7 @@ def execute(parameters=None, host_name=None): try: # execute the query for the JSON that includes WebHCat status start_time = time.time() - url_response = urllib2.urlopen(query_url, timeout=CONNECTION_TIMEOUT) + url_response = urllib2.urlopen(query_url, timeout=connection_timeout) total_time = time.time() - start_time json_response = json.loads(url_response.read()) http://git-wip-us.apache.org/repos/asf/ambari/blob/6727c1dc/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/YARN/alerts.json ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/YARN/alerts.json b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/YARN/alerts.json index 94f0926..fa1e20a 100644 --- a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/YARN/alerts.json +++ b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/YARN/alerts.json @@ -203,7 +203,18 @@ "enabled": true, "source": { "type": "SCRIPT", - "path": "YARN/2.1.0.2.0/package/alerts/alert_nodemanager_health.py" + "path": "YARN/2.1.0.2.0/package/alerts/alert_nodemanager_health.py", + "parameters": [ + { + "name": "connection.timeout", + "display_name": "Connection Timeout", + "value": 5.0, + "type": "NUMERIC", + "description": "The maximum time before this alert is considered to be CRITICAL", + "units": "seconds", + "threshold": "CRITICAL" + } + ] } } ], http://git-wip-us.apache.org/repos/asf/ambari/blob/6727c1dc/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/YARN/package/files/alert_nodemanager_health.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/YARN/package/files/alert_nodemanager_health.py b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/YARN/package/files/alert_nodemanager_health.py index 516d858..1cdeb97 100644 --- a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/YARN/package/files/alert_nodemanager_health.py +++ b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/YARN/package/files/alert_nodemanager_health.py @@ -40,7 +40,8 @@ CRITICAL_NODEMANAGER_UNKNOWN_JSON_MESSAGE = 'Unable to determine NodeManager hea NODEMANAGER_DEFAULT_PORT = 8042 -CONNECTION_TIMEOUT = 5.0 +CONNECTION_TIMEOUT_KEY = 'connection.timeout' +CONNECTION_TIMEOUT_DEFAULT = 5.0 def get_tokens(): """ @@ -51,32 +52,40 @@ def get_tokens(): YARN_HTTP_POLICY_KEY) -def execute(parameters=None, host_name=None): +def execute(configurations={}, parameters={}, host_name=None): """ Returns a tuple containing the result code and a pre-formatted result label Keyword arguments: - parameters (dictionary): a mapping of parameter key to value + configurations (dictionary): a mapping of configuration key to value + parameters (dictionary): a mapping of script parameter key to value host_name (string): the name of this host where the alert is running """ result_code = RESULT_CODE_UNKNOWN - if parameters is None: - return (result_code, ['There were no parameters supplied to the script.']) + if configurations is None: + return (result_code, ['There were no configurations supplied to the script.']) scheme = 'http' http_uri = None https_uri = None http_policy = 'HTTP_ONLY' - if NODEMANAGER_HTTP_ADDRESS_KEY in parameters: - http_uri = parameters[NODEMANAGER_HTTP_ADDRESS_KEY] + if NODEMANAGER_HTTP_ADDRESS_KEY in configurations: + http_uri = configurations[NODEMANAGER_HTTP_ADDRESS_KEY] - if NODEMANAGER_HTTPS_ADDRESS_KEY in parameters: - https_uri = parameters[NODEMANAGER_HTTPS_ADDRESS_KEY] + if NODEMANAGER_HTTPS_ADDRESS_KEY in configurations: + https_uri = configurations[NODEMANAGER_HTTPS_ADDRESS_KEY] + + if YARN_HTTP_POLICY_KEY in configurations: + http_policy = configurations[YARN_HTTP_POLICY_KEY] + + + # parse script arguments + connection_timeout = CONNECTION_TIMEOUT_DEFAULT + if CONNECTION_TIMEOUT_KEY in parameters: + connection_timeout = float(parameters[CONNECTION_TIMEOUT_KEY]) - if YARN_HTTP_POLICY_KEY in parameters: - http_policy = parameters[YARN_HTTP_POLICY_KEY] # determine the right URI and whether to use SSL uri = http_uri @@ -108,7 +117,7 @@ def execute(parameters=None, host_name=None): try: # execute the query for the JSON that includes templeton status - url_response = urllib2.urlopen(query, timeout=CONNECTION_TIMEOUT) + url_response = urllib2.urlopen(query, timeout=connection_timeout) except urllib2.HTTPError, httpError: label = CRITICAL_HTTP_STATUS_MESSAGE.format(str(httpError.code), query, str(httpError)) @@ -122,6 +131,7 @@ def execute(parameters=None, host_name=None): try: json_response = json.loads(url_response.read()) node_healthy = json_response['nodeInfo']['nodeHealthy'] + node_healthy_report = json_response['nodeInfo']['healthReport'] # convert boolean to string node_healthy = str(node_healthy) @@ -138,6 +148,9 @@ def execute(parameters=None, host_name=None): if node_healthy.lower() == 'true': result_code = RESULT_CODE_OK label = OK_MESSAGE + elif node_healthy.lower() == 'false': + result_code = RESULT_CODE_CRITICAL + label = node_healthy_report else: result_code = RESULT_CODE_CRITICAL label = CRITICAL_NODEMANAGER_STATUS_MESSAGE.format(node_healthy)
