Repository: ambari Updated Branches: refs/heads/branch-1.7.0 b7cca6265 -> d22559192
AMBARI-7772 - Nagios Alerts Fail When SSL Is Enabled For Some Services (jonathanhurley) Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/d2255919 Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/d2255919 Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/d2255919 Branch: refs/heads/branch-1.7.0 Commit: d225591926afede4e34fd316b63e0c0357470aa9 Parents: b7cca62 Author: Jonathan Hurley <[email protected]> Authored: Tue Oct 14 07:31:57 2014 -0400 Committer: Jonathan Hurley <[email protected]> Committed: Tue Oct 14 09:52:26 2014 -0400 ---------------------------------------------------------------------- .../NAGIOS/package/files/check_webui.sh | 2 +- .../NAGIOS/package/files/check_webui_ha.sh | 2 +- .../NAGIOS/package/files/check_webui.sh | 2 +- .../package/files/check_checkpoint_time.py | 35 ++++--- .../NAGIOS/package/files/check_webui.sh | 2 +- .../NAGIOS/package/files/check_webui_ha.sh | 2 +- .../services/NAGIOS/package/scripts/params.py | 97 ++++++++++++++++---- .../package/templates/hadoop-commands.cfg.j2 | 2 +- .../package/templates/hadoop-services.cfg.j2 | 26 +++--- 9 files changed, 123 insertions(+), 47 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ambari/blob/d2255919/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_webui.sh ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_webui.sh b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_webui.sh index e47a74c..7044878 100644 --- a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_webui.sh +++ b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_webui.sh @@ -27,7 +27,7 @@ port=$3 checkurl () { url=$1 export no_proxy=$host - curl $url -o /dev/null + curl $url -k -o /dev/null echo $? } http://git-wip-us.apache.org/repos/asf/ambari/blob/d2255919/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_webui_ha.sh ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_webui_ha.sh b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_webui_ha.sh index 1d0f5f3..d9a814d 100644 --- a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_webui_ha.sh +++ b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_webui_ha.sh @@ -28,7 +28,7 @@ checkurl () { url=$1 host=$2 export no_proxy=$host - curl $url -o /dev/null + curl $url -k -o /dev/null echo $? } http://git-wip-us.apache.org/repos/asf/ambari/blob/d2255919/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/files/check_webui.sh ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/files/check_webui.sh b/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/files/check_webui.sh index 011dcc7..f1f6641 100644 --- a/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/files/check_webui.sh +++ b/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/files/check_webui.sh @@ -27,7 +27,7 @@ port=$3 checkurl () { url=$1 export no_proxy=$host - curl $url -o /dev/null + curl $url -k -o /dev/null echo $? } http://git-wip-us.apache.org/repos/asf/ambari/blob/d2255919/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_checkpoint_time.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_checkpoint_time.py b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_checkpoint_time.py index ab889d1..6848e17 100644 --- a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_checkpoint_time.py +++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_checkpoint_time.py @@ -32,7 +32,6 @@ OK_MESSAGE = "OK: Last checkpoint time" WARNING_JMX_MESSAGE = "WARNING: NameNode JMX not accessible" def main(): - current_time = int(round(time.time() * 1000)) parser = optparse.OptionParser() @@ -41,6 +40,8 @@ def main(): default="localhost", help="NameNode host") parser.add_option("-p", "--port", dest="port", default="50070", help="NameNode jmx port") + parser.add_option("-s", "--ssl-enabled", dest="is_ssl_enabled", + default=False, help="SSL Enabled") parser.add_option("-w", "--warning", dest="warning", default="200", help="Percent for warning alert") parser.add_option("-c", "--critical", dest="crit", @@ -50,16 +51,25 @@ def main(): parser.add_option("-x", "--txns", dest="txns", default="1000000", help="CheckpointNode will create a checkpoint of the namespace every 'dfs.namenode.checkpoint.txns'") + (options, args) = parser.parse_args() - host = get_available_nn_host(options) + scheme = "http" + if options.is_ssl_enabled == "true": + scheme = "https" + + host = get_available_nn_host(options,scheme) + + last_checkpoint_time_qry = "{scheme}://{host}:{port}/jmx?qry=Hadoop:service=NameNode,name=FSNamesystem".format( + scheme=scheme, host=host, port=options.port) - last_checkpoint_time_qry = "http://{host}:{port}/jmx?qry=Hadoop:service=NameNode,name=FSNamesystem".\ - format(host=host, port=options.port) + print last_checkpoint_time_qry + last_checkpoint_time = int(get_value_from_jmx(last_checkpoint_time_qry,"LastCheckpointTime")) - journal_transaction_info_qry = "http://{host}:{port}/jmx?qry=Hadoop:service=NameNode,name=NameNodeInfo".\ - format(host=host, port=options.port) + journal_transaction_info_qry = "{scheme}://{host}:{port}/jmx?qry=Hadoop:service=NameNode,name=NameNodeInfo".format( + scheme=scheme, host=host, port=options.port) + journal_transaction_info = get_value_from_jmx(journal_transaction_info_qry,"JournalTransactionInfo") journal_transaction_info_dict = json.loads(journal_transaction_info) @@ -78,11 +88,13 @@ def main(): print OK_MESSAGE exit(0) + def get_time(delta): h = int(delta/3600) m = int((delta % 3600)/60) return {'h':h, 'm':m} + def get_value_from_jmx(qry, property): try: response = urllib2.urlopen(qry) @@ -94,19 +106,18 @@ def get_value_from_jmx(qry, property): data_dict = json.loads(data) return (data_dict["beans"][0][property]) -def get_available_nn_host(options): + +def get_available_nn_host(options, scheme): nn_hosts = options.host.split(" ") for nn_host in nn_hosts: try: - urllib2.urlopen("http://{host}:{port}/jmx".format(host=nn_host, port=options.port)) + urllib2.urlopen("{scheme}://{host}:{port}/jmx".format(scheme=scheme, host=nn_host, port=options.port)) return nn_host except Exception: pass print WARNING_JMX_MESSAGE exit(1) -if __name__ == "__main__": - main() - - +if __name__ == "__main__": + main() \ No newline at end of file http://git-wip-us.apache.org/repos/asf/ambari/blob/d2255919/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_webui.sh ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_webui.sh b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_webui.sh index e47a74c..7044878 100644 --- a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_webui.sh +++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_webui.sh @@ -27,7 +27,7 @@ port=$3 checkurl () { url=$1 export no_proxy=$host - curl $url -o /dev/null + curl $url -k -o /dev/null echo $? } http://git-wip-us.apache.org/repos/asf/ambari/blob/d2255919/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_webui_ha.sh ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_webui_ha.sh b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_webui_ha.sh index 1d0f5f3..d9a814d 100644 --- a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_webui_ha.sh +++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_webui_ha.sh @@ -28,7 +28,7 @@ checkurl () { url=$1 host=$2 export no_proxy=$host - curl $url -o /dev/null + curl $url -k -o /dev/null echo $? } http://git-wip-us.apache.org/repos/asf/ambari/blob/d2255919/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/params.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/params.py b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/params.py index aadc0e6..e1100c4 100644 --- a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/params.py +++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/params.py @@ -24,6 +24,9 @@ from functions import is_jdk_greater_6 from resource_management import * import status_params +HADOOP_HTTP_POLICY = "HTTP_ONLY" +HADOOP_HTTPS_POLICY = "HTTPS_ONLY" + # server configurations config = Script.get_config() @@ -85,40 +88,102 @@ nagios_service_cfg = format("{nagios_obj_dir}/hadoop-services.cfg") nagios_command_cfg = format("{nagios_obj_dir}/hadoop-commands.cfg") eventhandlers_dir = "/usr/lib/nagios/eventhandlers" nagios_principal_name = default("/configurations/nagios-env/nagios_principal_name", "nagios") -hadoop_ssl_enabled = False oozie_server_port = get_port_from_url(config['configurations']['oozie-site']['oozie.base.url']) namenode_host = default("/clusterHostInfo/namenode_host", None) +has_namenode = not namenode_host == None + # - test for HDFS or HCFS (glusterfs) if 'namenode_host' in config['clusterHostInfo']: ishdfs_value = "HDFS" else: ishdfs_value = None -has_namenode = not namenode_host == None - -# different to HDP1 +# HDFS, YARN, and MR use different settings to enable SSL +hdfs_ssl_enabled = False +yarn_ssl_enabled = False +mapreduce_ssl_enabled = False + +# initialize all http policies to HTTP_ONLY +dfs_http_policy = HADOOP_HTTP_POLICY +yarn_http_policy = HADOOP_HTTP_POLICY +mapreduce_http_policy = HADOOP_HTTP_POLICY + +# +if 'dfs.http.policy' in config['configurations']['hdfs-site']: + dfs_http_policy = config['configurations']['hdfs-site']['dfs.http.policy'] + +if 'yarn.http.policy' in config['configurations']['yarn-site']: + yarn_http_policy = config['configurations']['yarn-site']['yarn.http.policy'] + +if 'mapreduce.jobhistory.http.policy' in config['configurations']['mapred-site']: + mapreduce_http_policy = config['configurations']['mapred-site']['mapreduce.jobhistory.http.policy'] + +if dfs_http_policy == HADOOP_HTTPS_POLICY: + hdfs_ssl_enabled = True + +if yarn_http_policy == HADOOP_HTTPS_POLICY: + yarn_ssl_enabled = True + +if mapreduce_http_policy == HADOOP_HTTPS_POLICY: + mapreduce_ssl_enabled = True + +# set default ports and webui lookup properties +dfs_namenode_webui_default_port = '50070' +dfs_snamenode_webui_default_port = '50090' +yarn_nodemanager_default_port = '8042' +dfs_namenode_webui_property = 'dfs.namenode.http-address' +dfs_snamenode_webui_property = 'dfs.namenode.secondary.http-address' +dfs_datanode_webui_property = 'dfs.datanode.http.address' +yarn_rm_webui_property = 'yarn.resourcemanager.webapp.address' +yarn_timeline_service_webui_property = 'yarn.timeline-service.webapp.address' +yarn_nodemanager_webui_property = 'yarn.nodemanager.webapp.address' +mapreduce_jobhistory_webui_property = 'mapreduce.jobhistory.webapp.address' + +# if HDFS is protected by SSL, adjust the ports and lookup properties +if hdfs_ssl_enabled == True: + dfs_namenode_webui_default_port = '50470' + dfs_snamenode_webui_default_port = '50091' + dfs_namenode_webui_property = 'dfs.namenode.https-address' + dfs_snamenode_webui_property = 'dfs.namenode.secondary.https-address' + dfs_datanode_webui_property = 'dfs.datanode.https.address' + +# if YARN is protected by SSL, adjust the ports and lookup properties +if yarn_ssl_enabled == True: + yarn_rm_webui_property = 'yarn.resourcemanager.webapp.https.address' + yarn_nodemanager_webui_property = 'yarn.nodemanager.webapp.https.address' + yarn_timeline_service_webui_property = 'yarn.timeline-service.webapp.https.address' + +# if MR is protected by SSL, adjust the ports and lookup properties +if mapreduce_ssl_enabled == True: + mapreduce_jobhistory_webui_property = 'mapreduce.jobhistory.webapp.https.address' + if has_namenode: - if 'dfs.namenode.http-address' in config['configurations']['hdfs-site']: - namenode_port = get_port_from_url(config['configurations']['hdfs-site']['dfs.namenode.http-address']) + # extract NameNode + if dfs_namenode_webui_property in config['configurations']['hdfs-site']: + namenode_port = get_port_from_url(config['configurations']['hdfs-site'][dfs_namenode_webui_property]) else: - namenode_port = "50070" + namenode_port = dfs_namenode_webui_default_port - if 'dfs.namenode.secondary.http-address' in config['configurations']['hdfs-site']: - snamenode_port = get_port_from_url(config['configurations']['hdfs-site']['dfs.namenode.secondary.http-address']) + # extract Secondary NameNode + if dfs_snamenode_webui_property in config['configurations']['hdfs-site']: + snamenode_port = get_port_from_url(config['configurations']['hdfs-site'][dfs_snamenode_webui_property]) else: - snamenode_port = "50071" + snamenode_port = dfs_snamenode_webui_default_port if 'dfs.journalnode.http-address' in config['configurations']['hdfs-site']: journalnode_port = get_port_from_url(config['configurations']['hdfs-site']['dfs.journalnode.http-address']) - datanode_port = get_port_from_url(config['configurations']['hdfs-site']['dfs.datanode.http.address']) + datanode_port = get_port_from_url(config['configurations']['hdfs-site'][dfs_datanode_webui_property]) -hbase_master_rpc_port = default('/configurations/hbase-site/hbase.master.port', "60000") -rm_port = get_port_from_url(config['configurations']['yarn-site']['yarn.resourcemanager.webapp.address']) -nm_port = "8042" -hs_port = get_port_from_url(config['configurations']['mapred-site']['mapreduce.jobhistory.webapp.address']) +nm_port = yarn_nodemanager_default_port +if yarn_nodemanager_webui_property in config['configurations']['yarn-site']: + nm_port = get_port_from_url(config['configurations']['yarn-site'][yarn_nodemanager_webui_property]) + flume_port = "4159" +hbase_master_rpc_port = default('/configurations/hbase-site/hbase.master.port', "60000") +rm_port = get_port_from_url(config['configurations']['yarn-site'][yarn_rm_webui_property]) +hs_port = get_port_from_url(config['configurations']['mapred-site'][mapreduce_jobhistory_webui_property]) hive_metastore_port = get_port_from_url(config['configurations']['hive-site']['hive.metastore.uris']) #"9083" hive_server_port = default('/configurations/hive-site/hive.server2.thrift.port',"10000") templeton_port = config['configurations']['webhcat-site']['templeton.port'] #"50111" @@ -130,7 +195,7 @@ nimbus_port = config['configurations']['storm-site']['nimbus.thrift.port'] supervisor_port = "56431" storm_rest_api_port = "8745" falcon_port = config['configurations']['falcon-env']['falcon_port'] -ahs_port = get_port_from_url(config['configurations']['yarn-site']['yarn.timeline-service.webapp.address']) +ahs_port = get_port_from_url(config['configurations']['yarn-site'][yarn_timeline_service_webui_property]) knox_gateway_port = config['configurations']['gateway-site']['gateway.port'] # use sensible defaults for checkpoint as they are required by Nagios and http://git-wip-us.apache.org/repos/asf/ambari/blob/d2255919/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-commands.cfg.j2 ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-commands.cfg.j2 b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-commands.cfg.j2 index 39bb636..9a6f7a4 100644 --- a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-commands.cfg.j2 +++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-commands.cfg.j2 @@ -152,7 +152,7 @@ define command{ define command{ command_name check_checkpoint_time - command_line $USER1$/check_wrapper.sh /var/lib/ambari-agent/ambari-python-wrap $USER1$/check_checkpoint_time.py -H "$ARG1$" -p $ARG2$ -w $ARG3$ -c $ARG4$ -t $ARG5$ -x $ARG6$ + command_line $USER1$/check_wrapper.sh /var/lib/ambari-agent/ambari-python-wrap $USER1$/check_checkpoint_time.py -H "$ARG1$" -p $ARG2$ -w $ARG3$ -c $ARG4$ -t $ARG5$ -x $ARG6$ -s $ARG7$ } define command{ http://git-wip-us.apache.org/repos/asf/ambari/blob/d2255919/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-services.cfg.j2 ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-services.cfg.j2 b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-services.cfg.j2 index ec375be..1ed55d1 100644 --- a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-services.cfg.j2 +++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-services.cfg.j2 @@ -332,7 +332,7 @@ define service { use hadoop-service service_description NAMENODE::NameNode edit logs directory status on {{ namenode_hostname }} servicegroups HDFS - check_command check_name_dir_status!{{ namenode_port }}!{{ str(hadoop_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }} + check_command check_name_dir_status!{{ namenode_port }}!{{ str(hdfs_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }} normal_check_interval 0.5 retry_check_interval 0.5 max_check_attempts 3 @@ -345,7 +345,7 @@ define service { service_description NAMENODE::NameNode host CPU utilization on {{ namenode_hostname }} servicegroups HDFS # check_command check_cpu!200%!250% - check_command check_cpu!{{ namenode_port }}!200%!250%!{{ str(hadoop_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }} + check_command check_cpu!{{ namenode_port }}!200%!250%!{{ str(hdfs_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }} normal_check_interval 5 retry_check_interval 2 max_check_attempts 5 @@ -379,7 +379,7 @@ define service { use hadoop-service service_description HDFS::NameNode RPC latency on {{ namenode_hostname }} servicegroups HDFS - check_command check_rpcq_latency!NameNode!{{ namenode_port }}!3000!5000!{{ str(hadoop_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }} + check_command check_rpcq_latency!NameNode!{{ namenode_port }}!3000!5000!{{ str(hdfs_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }} normal_check_interval 5 retry_check_interval 1 max_check_attempts 5 @@ -392,7 +392,7 @@ define service { use hadoop-service service_description NAMENODE::Last checkpoint time servicegroups HDFS - check_command check_checkpoint_time!{{ nn_hosts_string }}!{{ namenode_port }}!200!200!{{ dfs_namenode_checkpoint_period }}!{{dfs_namenode_checkpoint_txns}} + check_command check_checkpoint_time!{{ nn_hosts_string }}!{{ namenode_port }}!200!200!{{ dfs_namenode_checkpoint_period }}!{{dfs_namenode_checkpoint_txns}}!{{str(hdfs_ssl_enabled).lower()}} normal_check_interval 0.5 retry_check_interval 0.25 max_check_attempts 3 @@ -403,7 +403,7 @@ define service { use hadoop-service service_description HDFS::Blocks health servicegroups HDFS - check_command check_hdfs_blocks!$HOSTGROUPMEMBERS:namenode$!{{ namenode_port }}!{{ nn_metrics_property }}!{{ str(hadoop_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }} + check_command check_hdfs_blocks!$HOSTGROUPMEMBERS:namenode$!{{ namenode_port }}!{{ nn_metrics_property }}!{{ str(hdfs_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }} normal_check_interval 2 retry_check_interval 1 max_check_attempts 1 @@ -414,7 +414,7 @@ define service { use hadoop-service service_description HDFS::HDFS capacity utilization servicegroups HDFS - check_command check_hdfs_capacity!$HOSTGROUPMEMBERS:namenode$!{{ namenode_port }}!80%!90%!{{ str(hadoop_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }} + check_command check_hdfs_capacity!$HOSTGROUPMEMBERS:namenode$!{{ namenode_port }}!80%!90%!{{ str(hdfs_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }} normal_check_interval 2 retry_check_interval 1 max_check_attempts 1 @@ -442,7 +442,7 @@ define service { service_description RESOURCEMANAGER::ResourceManager CPU utilization servicegroups YARN # check_command check_cpu!200%!250% - check_command check_cpu_ha!{{ rm_hosts_in_str }}!{{ rm_port }}!200%!250%!{{ str(hadoop_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }} + check_command check_cpu_ha!{{ rm_hosts_in_str }}!{{ rm_port }}!200%!250%!{{ str(yarn_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }} normal_check_interval 5 retry_check_interval 2 max_check_attempts 5 @@ -454,7 +454,7 @@ define service { use hadoop-service service_description RESOURCEMANAGER::ResourceManager RPC latency servicegroups YARN - check_command check_rpcq_latency_ha!{{ rm_hosts_in_str }}!ResourceManager!{{ rm_port }}!3000!5000!{{ str(hadoop_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }} + check_command check_rpcq_latency_ha!{{ rm_hosts_in_str }}!ResourceManager!{{ rm_port }}!3000!5000!{{ str(yarn_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }} normal_check_interval 5 retry_check_interval 1 max_check_attempts 5 @@ -492,7 +492,7 @@ define service { use hadoop-service service_description NODEMANAGER::NodeManager health servicegroups YARN - check_command check_nodemanager_health!{{ nm_port }}!{{ str(security_enabled).lower() }}!{{ str(hadoop_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }} + check_command check_nodemanager_health!{{ nm_port }}!{{ str(security_enabled).lower() }}!{{ str(yarn_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }} normal_check_interval 1 retry_check_interval 1 max_check_attempts 3 @@ -529,7 +529,7 @@ define service { service_description JOBHISTORY::HistoryServer CPU utilization servicegroups MAPREDUCE # check_command check_cpu!200%!250% - check_command check_cpu!{{ hs_port }}!200%!250%!{{ str(hadoop_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }} + check_command check_cpu!{{ hs_port }}!200%!250%!{{ str(mapreduce_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }} normal_check_interval 5 retry_check_interval 2 max_check_attempts 5 @@ -541,7 +541,7 @@ define service { use hadoop-service service_description JOBHISTORY::HistoryServer RPC latency servicegroups MAPREDUCE - check_command check_rpcq_latency!JobHistoryServer!{{ hs_port }}!3000!5000!{{ str(hadoop_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }} + check_command check_rpcq_latency!JobHistoryServer!{{ hs_port }}!3000!5000!{{ str(mapreduce_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }} normal_check_interval 5 retry_check_interval 1 max_check_attempts 5 @@ -605,7 +605,7 @@ define service { use hadoop-service service_description DATANODE::DataNode space servicegroups HDFS - check_command check_datanode_storage!{{ datanode_port }}!90%!90%!{{ str(hadoop_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }} + check_command check_datanode_storage!{{ datanode_port }}!90%!90%!{{ str(hdfs_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }} normal_check_interval 2 retry_check_interval 1 max_check_attempts 2 @@ -659,7 +659,7 @@ define service { service_description HBASEMASTER::HBase Master CPU utilization servicegroups HBASE # check_command check_cpu!200%!250% - check_command check_cpu_ha!{{ hbase_master_hosts_in_str }}!{{ hbase_master_port }}!200%!250%!{{ str(hadoop_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }} + check_command check_cpu_ha!{{ hbase_master_hosts_in_str }}!{{ hbase_master_port }}!200%!250%!false!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }} normal_check_interval 5 retry_check_interval 2 max_check_attempts 5
