Repository: ambari Updated Branches: refs/heads/trunk 9f1318585 -> 09ffe3195
AMBARI-6697. Fix nagios alerts for ResourceManager HA mode.(vbrodetskyi) Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/09ffe319 Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/09ffe319 Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/09ffe319 Branch: refs/heads/trunk Commit: 09ffe3195afe35bc432a938d559f9033fd9d687a Parents: 9f13185 Author: Vitaly Brodetskyi <vbrodets...@hortonworks.com> Authored: Thu Jul 31 17:22:06 2014 +0300 Committer: Vitaly Brodetskyi <vbrodets...@hortonworks.com> Committed: Thu Jul 31 17:22:06 2014 +0300 ---------------------------------------------------------------------- .../NAGIOS/package/files/check_cpu_ha.php | 116 +++++++++++++++++++ .../NAGIOS/package/files/check_hbase_cpu.php | 116 ------------------- .../package/scripts/nagios_server_config.py | 2 +- .../package/templates/hadoop-commands.cfg.j2 | 4 +- .../package/templates/hadoop-services.cfg.j2 | 2 +- .../NAGIOS/package/files/check_cpu_ha.php | 116 +++++++++++++++++++ .../NAGIOS/package/files/check_hbase_cpu.php | 116 ------------------- .../package/files/check_rpcq_latency_ha.php | 115 ++++++++++++++++++ .../NAGIOS/package/files/check_webui_ha.sh | 64 ++++++++++ .../package/scripts/nagios_server_config.py | 4 +- .../services/NAGIOS/package/scripts/params.py | 2 + .../package/templates/hadoop-commands.cfg.j2 | 14 ++- .../package/templates/hadoop-services.cfg.j2 | 22 ++-- .../stacks/1.3.2/NAGIOS/test_nagios_server.py | 4 +- .../stacks/2.0.6/NAGIOS/test_nagios_server.py | 14 ++- 15 files changed, 458 insertions(+), 253 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ambari/blob/09ffe319/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/files/check_cpu_ha.php ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/files/check_cpu_ha.php b/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/files/check_cpu_ha.php new file mode 100644 index 0000000..91a7c64 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/files/check_cpu_ha.php @@ -0,0 +1,116 @@ +<?php +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + include "hdp_nagios_init.php"; + + $options = getopt ("h:p:w:c:k:r:t:u:e"); + if (!array_key_exists('h', $options) || !array_key_exists('p', $options) || !array_key_exists('w', $options) + || !array_key_exists('c', $options)) { + usage(); + exit(3); + } + + $hosts=$options['h']; + $port=$options['p']; + $warn=$options['w']; $warn = preg_replace('/%$/', '', $warn); + $crit=$options['c']; $crit = preg_replace('/%$/', '', $crit); + $keytab_path=$options['k']; + $principal_name=$options['r']; + $kinit_path_local=$options['t']; + $security_enabled=$options['u']; + $ssl_enabled=$options['e']; + + /* Kinit if security enabled */ + $status = kinit_if_needed($security_enabled, $kinit_path_local, $keytab_path, $principal_name); + $retcode = $status[0]; + $output = $status[1]; + + if ($output != 0) { + echo "CRITICAL: Error doing kinit for nagios. $output"; + exit (2); + } + + $protocol = ($ssl_enabled == "true" ? "https" : "http"); + + $jmx_response_available = false; + $jmx_response; + + foreach (preg_split('/,/', $hosts) as $host) { + /* Get the json document */ + + $ch = curl_init(); + $username = rtrim(`id -un`, "\n"); + curl_setopt_array($ch, array( CURLOPT_URL => $protocol."://".$host.":".$port."/jmx?qry=java.lang:type=OperatingSystem", + CURLOPT_RETURNTRANSFER => true, + CURLOPT_HTTPAUTH => CURLAUTH_ANY, + CURLOPT_USERPWD => "$username:", + CURLOPT_SSL_VERIFYPEER => FALSE )); + $json_string = curl_exec($ch); + $info = curl_getinfo($ch); + if (intval($info['http_code']) == 401){ + logout(); + $json_string = curl_exec($ch); + } + $info = curl_getinfo($ch); + curl_close($ch); + $json_array = json_decode($json_string, true); + + $object = $json_array['beans'][0]; + + if (count($object) > 0) { + $jmx_response_available = true; + $jmx_response = $object; + } + } + + if ($jmx_response_available === false) { + echo "CRITICAL: Data inaccessible, Status code = ". $info['http_code'] ."\n"; + exit(2); + } + + $cpu_load = $jmx_response['SystemCpuLoad']; + + if (!isset($jmx_response['SystemCpuLoad']) || $cpu_load < 0.0) { + echo "WARNING: Data unavailable, SystemCpuLoad is not set\n"; + exit(1); + } + + $cpu_count = $jmx_response['AvailableProcessors']; + + $cpu_percent = $cpu_load*100; + + $out_msg = $cpu_count . " CPU, load " . number_format($cpu_percent, 1, '.', '') . '%'; + + if ($cpu_percent > $crit) { + echo $out_msg . ' > ' . $crit . "% : CRITICAL\n"; + exit(2); + } + if ($cpu_percent > $warn) { + echo $out_msg . ' > ' . $warn . "% : WARNING\n"; + exit(1); + } + + echo $out_msg . ' < ' . $warn . "% : OK\n"; + exit(0); + + /* print usage */ + function usage () { + echo "Usage: $0 -h <host> -p port -w <warn%> -c <crit%> -k keytab_path -r principal_name -t kinit_path -u security_enabled -e ssl_enabled\n"; + } +?> http://git-wip-us.apache.org/repos/asf/ambari/blob/09ffe319/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/files/check_hbase_cpu.php ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/files/check_hbase_cpu.php b/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/files/check_hbase_cpu.php deleted file mode 100644 index 91a7c64..0000000 --- a/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/files/check_hbase_cpu.php +++ /dev/null @@ -1,116 +0,0 @@ -<?php -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - include "hdp_nagios_init.php"; - - $options = getopt ("h:p:w:c:k:r:t:u:e"); - if (!array_key_exists('h', $options) || !array_key_exists('p', $options) || !array_key_exists('w', $options) - || !array_key_exists('c', $options)) { - usage(); - exit(3); - } - - $hosts=$options['h']; - $port=$options['p']; - $warn=$options['w']; $warn = preg_replace('/%$/', '', $warn); - $crit=$options['c']; $crit = preg_replace('/%$/', '', $crit); - $keytab_path=$options['k']; - $principal_name=$options['r']; - $kinit_path_local=$options['t']; - $security_enabled=$options['u']; - $ssl_enabled=$options['e']; - - /* Kinit if security enabled */ - $status = kinit_if_needed($security_enabled, $kinit_path_local, $keytab_path, $principal_name); - $retcode = $status[0]; - $output = $status[1]; - - if ($output != 0) { - echo "CRITICAL: Error doing kinit for nagios. $output"; - exit (2); - } - - $protocol = ($ssl_enabled == "true" ? "https" : "http"); - - $jmx_response_available = false; - $jmx_response; - - foreach (preg_split('/,/', $hosts) as $host) { - /* Get the json document */ - - $ch = curl_init(); - $username = rtrim(`id -un`, "\n"); - curl_setopt_array($ch, array( CURLOPT_URL => $protocol."://".$host.":".$port."/jmx?qry=java.lang:type=OperatingSystem", - CURLOPT_RETURNTRANSFER => true, - CURLOPT_HTTPAUTH => CURLAUTH_ANY, - CURLOPT_USERPWD => "$username:", - CURLOPT_SSL_VERIFYPEER => FALSE )); - $json_string = curl_exec($ch); - $info = curl_getinfo($ch); - if (intval($info['http_code']) == 401){ - logout(); - $json_string = curl_exec($ch); - } - $info = curl_getinfo($ch); - curl_close($ch); - $json_array = json_decode($json_string, true); - - $object = $json_array['beans'][0]; - - if (count($object) > 0) { - $jmx_response_available = true; - $jmx_response = $object; - } - } - - if ($jmx_response_available === false) { - echo "CRITICAL: Data inaccessible, Status code = ". $info['http_code'] ."\n"; - exit(2); - } - - $cpu_load = $jmx_response['SystemCpuLoad']; - - if (!isset($jmx_response['SystemCpuLoad']) || $cpu_load < 0.0) { - echo "WARNING: Data unavailable, SystemCpuLoad is not set\n"; - exit(1); - } - - $cpu_count = $jmx_response['AvailableProcessors']; - - $cpu_percent = $cpu_load*100; - - $out_msg = $cpu_count . " CPU, load " . number_format($cpu_percent, 1, '.', '') . '%'; - - if ($cpu_percent > $crit) { - echo $out_msg . ' > ' . $crit . "% : CRITICAL\n"; - exit(2); - } - if ($cpu_percent > $warn) { - echo $out_msg . ' > ' . $warn . "% : WARNING\n"; - exit(1); - } - - echo $out_msg . ' < ' . $warn . "% : OK\n"; - exit(0); - - /* print usage */ - function usage () { - echo "Usage: $0 -h <host> -p port -w <warn%> -c <crit%> -k keytab_path -r principal_name -t kinit_path -u security_enabled -e ssl_enabled\n"; - } -?> http://git-wip-us.apache.org/repos/asf/ambari/blob/09ffe319/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/scripts/nagios_server_config.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/scripts/nagios_server_config.py b/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/scripts/nagios_server_config.py index b6c7144..ee7f149 100644 --- a/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/scripts/nagios_server_config.py +++ b/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/scripts/nagios_server_config.py @@ -50,7 +50,7 @@ def nagios_server_config(): nagios_server_check( 'check_cpu.pl') nagios_server_check( 'check_cpu.php') - nagios_server_check( 'check_hbase_cpu.php') + nagios_server_check( 'check_cpu_ha.php') nagios_server_check( 'check_datanode_storage.php') nagios_server_check( 'check_aggregate.php') nagios_server_check( 'check_hdfs_blocks.php') http://git-wip-us.apache.org/repos/asf/ambari/blob/09ffe319/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/templates/hadoop-commands.cfg.j2 ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/templates/hadoop-commands.cfg.j2 b/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/templates/hadoop-commands.cfg.j2 index fa2010b..3035f30 100644 --- a/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/templates/hadoop-commands.cfg.j2 +++ b/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/templates/hadoop-commands.cfg.j2 @@ -46,8 +46,8 @@ define command { } define command { - command_name check_hbase_cpu - command_line $USER1$/check_wrapper.sh php $USER1$/check_hbase_cpu.php -h $ARG1$ -p $ARG2$ -w $ARG3$ -c $ARG4$ -e $ARG5$ -k $ARG6$ -r $ARG7$ -t $ARG8$ -u $ARG9$ + command_name check_cpu_ha + command_line $USER1$/check_wrapper.sh php $USER1$/check_cpu_ha.php -h $ARG1$ -p $ARG2$ -w $ARG3$ -c $ARG4$ -e $ARG5$ -k $ARG6$ -r $ARG7$ -t $ARG8$ -u $ARG9$ } {% endif %} http://git-wip-us.apache.org/repos/asf/ambari/blob/09ffe319/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/templates/hadoop-services.cfg.j2 ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/templates/hadoop-services.cfg.j2 b/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/templates/hadoop-services.cfg.j2 index 8c37bc9..1a68bfd 100644 --- a/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/templates/hadoop-services.cfg.j2 +++ b/ambari-server/src/main/resources/stacks/HDP/1.3.2/services/NAGIOS/package/templates/hadoop-services.cfg.j2 @@ -496,7 +496,7 @@ define service { service_description HBASEMASTER::HBase Master CPU utilization servicegroups HBASE # check_command check_cpu!200%!250% - check_command check_hbase_cpu!{{ hbase_master_hosts_in_str }}!{{ hbase_master_port }}!200%!250%!{{ str(hadoop_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }} + check_command check_cpu_ha!{{ hbase_master_hosts_in_str }}!{{ hbase_master_port }}!200%!250%!{{ str(hadoop_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }} normal_check_interval 5 retry_check_interval 2 max_check_attempts 5 http://git-wip-us.apache.org/repos/asf/ambari/blob/09ffe319/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_cpu_ha.php ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_cpu_ha.php b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_cpu_ha.php new file mode 100644 index 0000000..91a7c64 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_cpu_ha.php @@ -0,0 +1,116 @@ +<?php +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + include "hdp_nagios_init.php"; + + $options = getopt ("h:p:w:c:k:r:t:u:e"); + if (!array_key_exists('h', $options) || !array_key_exists('p', $options) || !array_key_exists('w', $options) + || !array_key_exists('c', $options)) { + usage(); + exit(3); + } + + $hosts=$options['h']; + $port=$options['p']; + $warn=$options['w']; $warn = preg_replace('/%$/', '', $warn); + $crit=$options['c']; $crit = preg_replace('/%$/', '', $crit); + $keytab_path=$options['k']; + $principal_name=$options['r']; + $kinit_path_local=$options['t']; + $security_enabled=$options['u']; + $ssl_enabled=$options['e']; + + /* Kinit if security enabled */ + $status = kinit_if_needed($security_enabled, $kinit_path_local, $keytab_path, $principal_name); + $retcode = $status[0]; + $output = $status[1]; + + if ($output != 0) { + echo "CRITICAL: Error doing kinit for nagios. $output"; + exit (2); + } + + $protocol = ($ssl_enabled == "true" ? "https" : "http"); + + $jmx_response_available = false; + $jmx_response; + + foreach (preg_split('/,/', $hosts) as $host) { + /* Get the json document */ + + $ch = curl_init(); + $username = rtrim(`id -un`, "\n"); + curl_setopt_array($ch, array( CURLOPT_URL => $protocol."://".$host.":".$port."/jmx?qry=java.lang:type=OperatingSystem", + CURLOPT_RETURNTRANSFER => true, + CURLOPT_HTTPAUTH => CURLAUTH_ANY, + CURLOPT_USERPWD => "$username:", + CURLOPT_SSL_VERIFYPEER => FALSE )); + $json_string = curl_exec($ch); + $info = curl_getinfo($ch); + if (intval($info['http_code']) == 401){ + logout(); + $json_string = curl_exec($ch); + } + $info = curl_getinfo($ch); + curl_close($ch); + $json_array = json_decode($json_string, true); + + $object = $json_array['beans'][0]; + + if (count($object) > 0) { + $jmx_response_available = true; + $jmx_response = $object; + } + } + + if ($jmx_response_available === false) { + echo "CRITICAL: Data inaccessible, Status code = ". $info['http_code'] ."\n"; + exit(2); + } + + $cpu_load = $jmx_response['SystemCpuLoad']; + + if (!isset($jmx_response['SystemCpuLoad']) || $cpu_load < 0.0) { + echo "WARNING: Data unavailable, SystemCpuLoad is not set\n"; + exit(1); + } + + $cpu_count = $jmx_response['AvailableProcessors']; + + $cpu_percent = $cpu_load*100; + + $out_msg = $cpu_count . " CPU, load " . number_format($cpu_percent, 1, '.', '') . '%'; + + if ($cpu_percent > $crit) { + echo $out_msg . ' > ' . $crit . "% : CRITICAL\n"; + exit(2); + } + if ($cpu_percent > $warn) { + echo $out_msg . ' > ' . $warn . "% : WARNING\n"; + exit(1); + } + + echo $out_msg . ' < ' . $warn . "% : OK\n"; + exit(0); + + /* print usage */ + function usage () { + echo "Usage: $0 -h <host> -p port -w <warn%> -c <crit%> -k keytab_path -r principal_name -t kinit_path -u security_enabled -e ssl_enabled\n"; + } +?> http://git-wip-us.apache.org/repos/asf/ambari/blob/09ffe319/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_hbase_cpu.php ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_hbase_cpu.php b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_hbase_cpu.php deleted file mode 100644 index 91a7c64..0000000 --- a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_hbase_cpu.php +++ /dev/null @@ -1,116 +0,0 @@ -<?php -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - - include "hdp_nagios_init.php"; - - $options = getopt ("h:p:w:c:k:r:t:u:e"); - if (!array_key_exists('h', $options) || !array_key_exists('p', $options) || !array_key_exists('w', $options) - || !array_key_exists('c', $options)) { - usage(); - exit(3); - } - - $hosts=$options['h']; - $port=$options['p']; - $warn=$options['w']; $warn = preg_replace('/%$/', '', $warn); - $crit=$options['c']; $crit = preg_replace('/%$/', '', $crit); - $keytab_path=$options['k']; - $principal_name=$options['r']; - $kinit_path_local=$options['t']; - $security_enabled=$options['u']; - $ssl_enabled=$options['e']; - - /* Kinit if security enabled */ - $status = kinit_if_needed($security_enabled, $kinit_path_local, $keytab_path, $principal_name); - $retcode = $status[0]; - $output = $status[1]; - - if ($output != 0) { - echo "CRITICAL: Error doing kinit for nagios. $output"; - exit (2); - } - - $protocol = ($ssl_enabled == "true" ? "https" : "http"); - - $jmx_response_available = false; - $jmx_response; - - foreach (preg_split('/,/', $hosts) as $host) { - /* Get the json document */ - - $ch = curl_init(); - $username = rtrim(`id -un`, "\n"); - curl_setopt_array($ch, array( CURLOPT_URL => $protocol."://".$host.":".$port."/jmx?qry=java.lang:type=OperatingSystem", - CURLOPT_RETURNTRANSFER => true, - CURLOPT_HTTPAUTH => CURLAUTH_ANY, - CURLOPT_USERPWD => "$username:", - CURLOPT_SSL_VERIFYPEER => FALSE )); - $json_string = curl_exec($ch); - $info = curl_getinfo($ch); - if (intval($info['http_code']) == 401){ - logout(); - $json_string = curl_exec($ch); - } - $info = curl_getinfo($ch); - curl_close($ch); - $json_array = json_decode($json_string, true); - - $object = $json_array['beans'][0]; - - if (count($object) > 0) { - $jmx_response_available = true; - $jmx_response = $object; - } - } - - if ($jmx_response_available === false) { - echo "CRITICAL: Data inaccessible, Status code = ". $info['http_code'] ."\n"; - exit(2); - } - - $cpu_load = $jmx_response['SystemCpuLoad']; - - if (!isset($jmx_response['SystemCpuLoad']) || $cpu_load < 0.0) { - echo "WARNING: Data unavailable, SystemCpuLoad is not set\n"; - exit(1); - } - - $cpu_count = $jmx_response['AvailableProcessors']; - - $cpu_percent = $cpu_load*100; - - $out_msg = $cpu_count . " CPU, load " . number_format($cpu_percent, 1, '.', '') . '%'; - - if ($cpu_percent > $crit) { - echo $out_msg . ' > ' . $crit . "% : CRITICAL\n"; - exit(2); - } - if ($cpu_percent > $warn) { - echo $out_msg . ' > ' . $warn . "% : WARNING\n"; - exit(1); - } - - echo $out_msg . ' < ' . $warn . "% : OK\n"; - exit(0); - - /* print usage */ - function usage () { - echo "Usage: $0 -h <host> -p port -w <warn%> -c <crit%> -k keytab_path -r principal_name -t kinit_path -u security_enabled -e ssl_enabled\n"; - } -?> http://git-wip-us.apache.org/repos/asf/ambari/blob/09ffe319/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_rpcq_latency_ha.php ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_rpcq_latency_ha.php b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_rpcq_latency_ha.php new file mode 100644 index 0000000..3e7616c --- /dev/null +++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_rpcq_latency_ha.php @@ -0,0 +1,115 @@ +<?php +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* This plugin makes call to master node, get the jmx-json document + * It checks the rpc wait time in the queue, RpcQueueTime_avg_time + * check_rpcq_latency -h hostaddress -p port -t ServiceName -w 1 -c 1 + * Warning and Critical values are in seconds + * Service Name = JobTracker, NameNode, JobHistoryServer + */ + + include "hdp_nagios_init.php"; + + $options = getopt ("h:p:w:c:n:e:k:r:t:s:"); + if (!array_key_exists('h', $options) || !array_key_exists('p', $options) || !array_key_exists('w', $options) + || !array_key_exists('c', $options) || !array_key_exists('n', $options)) { + usage(); + exit(3); + } + + $hosts=$options['h']; + $port=$options['p']; + $master=$options['n']; + $warn=$options['w']; + $crit=$options['c']; + $keytab_path=$options['k']; + $principal_name=$options['r']; + $kinit_path_local=$options['t']; + $security_enabled=$options['s']; + $ssl_enabled=$options['e']; + + /* Kinit if security enabled */ + $status = kinit_if_needed($security_enabled, $kinit_path_local, $keytab_path, $principal_name); + $retcode = $status[0]; + $output = $status[1]; + + if ($output != 0) { + echo "CRITICAL: Error doing kinit for nagios. $output"; + exit (2); + } + + $protocol = ($ssl_enabled == "true" ? "https" : "http"); + + $jmx_response_available = false; + $jmx_response; + + foreach (preg_split('/,/', $hosts) as $host) { + /* Get the json document */ + $ch = curl_init(); + $username = rtrim(`id -un`, "\n"); + curl_setopt_array($ch, array( CURLOPT_URL => $protocol."://".$host.":".$port."/jmx?qry=Hadoop:service=".$master.",name=RpcActivityForPort*", + CURLOPT_RETURNTRANSFER => true, + CURLOPT_HTTPAUTH => CURLAUTH_ANY, + CURLOPT_USERPWD => "$username:", + CURLOPT_SSL_VERIFYPEER => FALSE )); + $json_string = curl_exec($ch); + $info = curl_getinfo($ch); + if (intval($info['http_code']) == 401){ + logout(); + $json_string = curl_exec($ch); + } + $info = curl_getinfo($ch); + curl_close($ch); + $json_array = json_decode($json_string, true); + $object = $json_array['beans'][0]; + + if (count($object) > 0) { + $jmx_response_available = true; + $jmx_response = $object; + } + } + + if ($jmx_response_available === false) { + echo "CRITICAL: Data inaccessible, Status code = ". $info['http_code'] ."\n"; + exit(2); + } + + $RpcQueueTime_avg_time = round($jmx_response['RpcQueueTime_avg_time'], 2); + $RpcProcessingTime_avg_time = round($jmx_response['RpcProcessingTime_avg_time'], 2); + + $out_msg = "RpcQueueTime_avg_time:<" . $RpcQueueTime_avg_time . + "> Secs, RpcProcessingTime_avg_time:<" . $RpcProcessingTime_avg_time . + "> Secs"; + + if ($RpcQueueTime_avg_time >= $crit) { + echo "CRITICAL: " . $out_msg . "\n"; + exit (2); + } + if ($RpcQueueTime_avg_time >= $warn) { + echo "WARNING: " . $out_msg . "\n"; + exit (1); + } + echo "OK: " . $out_msg . "\n"; + exit(0); + + /* print usage */ + function usage () { + echo "Usage: $0 -h <host> -p port -n <JobTracker/NameNode/JobHistoryServer> -w <warn_in_sec> -c <crit_in_sec> -k keytab path -r principal name -t kinit path -s security enabled -e ssl enabled\n"; + } +?> \ No newline at end of file http://git-wip-us.apache.org/repos/asf/ambari/blob/09ffe319/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_webui_ha.sh ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_webui_ha.sh b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_webui_ha.sh new file mode 100644 index 0000000..1d0f5f3 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/files/check_webui_ha.sh @@ -0,0 +1,64 @@ +#!/usr/bin/env bash +# +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# + +service=$1 +hosts=$2 +port=$3 + +checkurl () { + url=$1 + host=$2 + export no_proxy=$host + curl $url -o /dev/null + echo $? +} + +if [[ -z "$service" || -z "$hosts" ]]; then + echo "UNKNOWN: Invalid arguments; Usage: check_webui_ha.sh service_name, host_name"; + exit 3; +fi + +case "$service" in +resourcemanager) + url_end_part="/cluster" + ;; +*) echo "UNKNOWN: Invalid service name [$service], valid options [resourcemanager]" + exit 3 + ;; +esac + +OIFS="$IFS" +IFS=',' +read -a hosts_array <<< "${hosts}" +IFS="$OIFS" + +for host in "${hosts_array[@]}" +do + weburl="http://${host}:${port}${url_end_part}" + if [[ `checkurl "$weburl" "$host"` -eq 0 ]]; then + echo "OK: Successfully accessed $service Web UI" + exit 0; + fi +done + +echo "WARNING: $service Web UI not accessible : $weburl"; +exit 1; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/ambari/blob/09ffe319/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios_server_config.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios_server_config.py b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios_server_config.py index 40a1bb6..86d5a8a 100644 --- a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios_server_config.py +++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/nagios_server_config.py @@ -50,13 +50,15 @@ def nagios_server_config(): nagios_server_check( 'check_cpu.pl') nagios_server_check( 'check_cpu.php') - nagios_server_check( 'check_hbase_cpu.php') + nagios_server_check( 'check_cpu_ha.php') nagios_server_check( 'check_datanode_storage.php') nagios_server_check( 'check_aggregate.php') nagios_server_check( 'check_hdfs_blocks.php') nagios_server_check( 'check_hdfs_capacity.php') nagios_server_check( 'check_rpcq_latency.php') + nagios_server_check( 'check_rpcq_latency_ha.php') nagios_server_check( 'check_webui.sh') + nagios_server_check( 'check_webui_ha.sh') nagios_server_check( 'check_name_dir_status.php') nagios_server_check( 'check_oozie_status.sh') nagios_server_check( 'check_templeton_status.sh') http://git-wip-us.apache.org/repos/asf/ambari/blob/09ffe319/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/params.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/params.py b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/params.py index 9365922..504fd4e 100644 --- a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/params.py +++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/scripts/params.py @@ -216,6 +216,8 @@ _slave_hosts = default("/clusterHostInfo/slave_hosts", None) _journalnode_hosts = default("/clusterHostInfo/journalnode_hosts", None) _zkfc_hosts = default("/clusterHostInfo/zkfc_hosts", None) _rm_host = default("/clusterHostInfo/rm_host", None) +if type(_rm_host) is list: + rm_hosts_in_str = ','.join(_rm_host) _nm_hosts = default("/clusterHostInfo/nm_hosts", None) _hs_host = default("/clusterHostInfo/hs_host", None) _zookeeper_hosts = default("/clusterHostInfo/zookeeper_hosts", None) http://git-wip-us.apache.org/repos/asf/ambari/blob/09ffe319/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-commands.cfg.j2 ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-commands.cfg.j2 b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-commands.cfg.j2 index bc5b2df..a8a616c 100644 --- a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-commands.cfg.j2 +++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-commands.cfg.j2 @@ -45,8 +45,8 @@ define command { command_line $USER1$/check_wrapper.sh php $USER1$/check_cpu.php -h $HOSTADDRESS$ -p $ARG1$ -w $ARG2$ -c $ARG3$ -e $ARG4$ -k $ARG5$ -r $ARG6$ -t $ARG7$ -u $ARG8$ } define command { - command_name check_hbase_cpu - command_line $USER1$/check_wrapper.sh php $USER1$/check_hbase_cpu.php -h $ARG1$ -p $ARG2$ -w $ARG3$ -c $ARG4$ -e $ARG5$ -k $ARG6$ -r $ARG7$ -t $ARG8$ -u $ARG9$ + command_name check_cpu_ha + command_line $USER1$/check_wrapper.sh php $USER1$/check_cpu_ha.php -h $ARG1$ -p $ARG2$ -w $ARG3$ -c $ARG4$ -e $ARG5$ -k $ARG6$ -r $ARG7$ -t $ARG8$ -u $ARG9$ } {% endif %} @@ -77,6 +77,11 @@ define command{ } define command{ + command_name check_rpcq_latency_ha + command_line $USER1$/check_wrapper.sh php $USER1$/check_rpcq_latency_ha.php -h $ARG1$ -p $ARG3$ -n $ARG2$ -w $ARG4$ -c $ARG5$ -e $ARG6$ -k $ARG7$ -r $ARG8$ -t $ARG9$ -s $ARG10$ + } + +define command{ command_name check_nagios command_line $USER1$/check_wrapper.sh $USER1$/check_nagios -e $ARG1$ -F $ARG2$ -C $ARG3$ } @@ -87,6 +92,11 @@ define command{ } define command{ + command_name check_webui_ha + command_line $USER1$/check_wrapper.sh $USER1$/check_webui_ha.sh $ARG1$ $ARG2$ $ARG3$ + } + +define command{ command_name check_name_dir_status command_line $USER1$/check_wrapper.sh php $USER1$/check_name_dir_status.php -h $HOSTADDRESS$ -p $ARG1$ -e $ARG2$ -k $ARG3$ -r $ARG4$ -t $ARG5$ -s $ARG6$ } http://git-wip-us.apache.org/repos/asf/ambari/blob/09ffe319/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-services.cfg.j2 ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-services.cfg.j2 b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-services.cfg.j2 index 0f4f2ca..045e9ad 100644 --- a/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-services.cfg.j2 +++ b/ambari-server/src/main/resources/stacks/HDP/2.0.6/services/NAGIOS/package/templates/hadoop-services.cfg.j2 @@ -425,11 +425,11 @@ define service { {% if hostgroup_defs['resourcemanager'] %} # YARN::RESOURCEMANAGER Checks define service { - hostgroup_name resourcemanager + hostgroup_name nagios-server use hadoop-service service_description RESOURCEMANAGER::ResourceManager Web UI servicegroups YARN - check_command check_webui!resourcemanager!{{ rm_port }} + check_command check_webui_ha!resourcemanager!{{ rm_hosts_in_str }}!{{ rm_port }} normal_check_interval 1 retry_check_interval 1 max_check_attempts 3 @@ -437,12 +437,12 @@ define service { {% if check_cpu_on %} define service { - hostgroup_name resourcemanager + hostgroup_name nagios-server use hadoop-service service_description RESOURCEMANAGER::ResourceManager CPU utilization servicegroups YARN -# check_command check_cpu!200%!250% - check_command check_cpu!{{ rm_port }}!200%!250%!{{ str(hadoop_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }} +# check_command check_cpu!200%!250% + check_command check_cpu_ha!{{ rm_hosts_in_str }}!{{ rm_port }}!200%!250%!{{ str(hadoop_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }} normal_check_interval 5 retry_check_interval 2 max_check_attempts 5 @@ -450,26 +450,28 @@ define service { {% endif %} define service { - hostgroup_name resourcemanager + hostgroup_name nagios-server use hadoop-service service_description RESOURCEMANAGER::ResourceManager RPC latency servicegroups YARN - check_command check_rpcq_latency!ResourceManager!{{ rm_port }}!3000!5000!{{ str(hadoop_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }} + check_command check_rpcq_latency_ha!{{ rm_hosts_in_str }}!ResourceManager!{{ rm_port }}!3000!5000!{{ str(hadoop_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }} normal_check_interval 5 retry_check_interval 1 max_check_attempts 5 } +{% for rm_host in _rm_host %} define service { - hostgroup_name resourcemanager + host_name {{ rm_host }} use hadoop-service - service_description RESOURCEMANAGER::ResourceManager process + service_description RESOURCEMANAGER::ResourceManager process on {{ rm_host }} servicegroups YARN check_command check_tcp_wrapper!{{ rm_port }}!-w 1 -c 1 normal_check_interval 1 retry_check_interval 0.5 max_check_attempts 3 } +{% endfor %} {% endif %} {% if hostgroup_defs['nodemanagers'] %} @@ -657,7 +659,7 @@ define service { service_description HBASEMASTER::HBase Master CPU utilization servicegroups HBASE # check_command check_cpu!200%!250% - check_command check_hbase_cpu!{{ hbase_master_hosts_in_str }}!{{ hbase_master_port }}!200%!250%!{{ str(hadoop_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }} + check_command check_cpu_ha!{{ hbase_master_hosts_in_str }}!{{ hbase_master_port }}!200%!250%!{{ str(hadoop_ssl_enabled).lower() }}!{{ nagios_keytab_path }}!{{ nagios_principal_name }}!{{ kinit_path_local }}!{{ str(security_enabled).lower() }} normal_check_interval 5 retry_check_interval 2 max_check_attempts 5 http://git-wip-us.apache.org/repos/asf/ambari/blob/09ffe319/ambari-server/src/test/python/stacks/1.3.2/NAGIOS/test_nagios_server.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/test/python/stacks/1.3.2/NAGIOS/test_nagios_server.py b/ambari-server/src/test/python/stacks/1.3.2/NAGIOS/test_nagios_server.py index f3b0f7a..e48a5ca 100644 --- a/ambari-server/src/test/python/stacks/1.3.2/NAGIOS/test_nagios_server.py +++ b/ambari-server/src/test/python/stacks/1.3.2/NAGIOS/test_nagios_server.py @@ -168,8 +168,8 @@ class TestNagiosServer(RMFTestCase): content=StaticFile('check_cpu.php'), mode=0755 ) - self.assertResourceCalled('File', '/usr/lib64/nagios/plugins/check_hbase_cpu.php', - content=StaticFile('check_hbase_cpu.php'), + self.assertResourceCalled('File', '/usr/lib64/nagios/plugins/check_cpu_ha.php', + content=StaticFile('check_cpu_ha.php'), mode=0755 ) self.assertResourceCalled('File', http://git-wip-us.apache.org/repos/asf/ambari/blob/09ffe319/ambari-server/src/test/python/stacks/2.0.6/NAGIOS/test_nagios_server.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/test/python/stacks/2.0.6/NAGIOS/test_nagios_server.py b/ambari-server/src/test/python/stacks/2.0.6/NAGIOS/test_nagios_server.py index f51169c..6e980b5 100644 --- a/ambari-server/src/test/python/stacks/2.0.6/NAGIOS/test_nagios_server.py +++ b/ambari-server/src/test/python/stacks/2.0.6/NAGIOS/test_nagios_server.py @@ -173,8 +173,8 @@ class TestNagiosServer(RMFTestCase): content=StaticFile('check_cpu.php'), mode=0755 ) - self.assertResourceCalled('File', '/usr/lib64/nagios/plugins/check_hbase_cpu.php', - content=StaticFile('check_hbase_cpu.php'), + self.assertResourceCalled('File', '/usr/lib64/nagios/plugins/check_cpu_ha.php', + content=StaticFile('check_cpu_ha.php'), mode=0755 ) self.assertResourceCalled('File', @@ -203,11 +203,21 @@ class TestNagiosServer(RMFTestCase): mode=0755 ) self.assertResourceCalled('File', + '/usr/lib64/nagios/plugins/check_rpcq_latency_ha.php', + content=StaticFile('check_rpcq_latency_ha.php'), + mode=0755 + ) + self.assertResourceCalled('File', '/usr/lib64/nagios/plugins/check_webui.sh', content=StaticFile('check_webui.sh'), mode=0755 ) self.assertResourceCalled('File', + '/usr/lib64/nagios/plugins/check_webui_ha.sh', + content=StaticFile('check_webui_ha.sh'), + mode=0755 + ) + self.assertResourceCalled('File', '/usr/lib64/nagios/plugins/check_name_dir_status.php', content=StaticFile('check_name_dir_status.php'), mode=0755