http://git-wip-us.apache.org/repos/asf/ambari/blob/83efcfea/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_hive_metastore_status.sh ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_hive_metastore_status.sh b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_hive_metastore_status.sh new file mode 100644 index 0000000..640c077 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_hive_metastore_status.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash +# +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# +#The uri is of the form thrift://<hostname>:<port> +HOST=$1 +PORT=$2 +JAVA_HOME=$3 +SEC_ENABLED=$4 +if [[ "$SEC_ENABLED" == "true" ]]; then + NAGIOS_KEYTAB=$5 + NAGIOS_USER=$6 + KINIT_PATH=$7 + out1=`${KINIT_PATH} -kt ${NAGIOS_KEYTAB} ${NAGIOS_USER} 2>&1` + if [[ "$?" -ne 0 ]]; then + echo "CRITICAL: Error doing kinit for nagios [$out1]"; + exit 2; + fi +fi +HCAT_URL=-Dhive.metastore.uris="thrift://$HOST:$PORT" +export JAVA_HOME=$JAVA_HOME +out=`hcat $HCAT_URL -e "show databases" 2>&1` +if [[ "$?" -ne 0 ]]; then + echo "CRITICAL: Error accessing Hive Metastore status [$out]"; + exit 2; +fi +echo "OK: Hive Metastore status OK"; +exit 0;
http://git-wip-us.apache.org/repos/asf/ambari/blob/83efcfea/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_hue_status.sh ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_hue_status.sh b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_hue_status.sh new file mode 100644 index 0000000..076d9b3 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_hue_status.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash +# +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# + +status=`/etc/init.d/hue status 2>&1` + +if [[ "$?" -ne 0 ]]; then + echo "WARNING: Hue is stopped"; + exit 1; +fi + +echo "OK: Hue is running"; +exit 0; http://git-wip-us.apache.org/repos/asf/ambari/blob/83efcfea/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_mapred_local_dir_used.sh ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_mapred_local_dir_used.sh b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_mapred_local_dir_used.sh new file mode 100644 index 0000000..3f9243a --- /dev/null +++ b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_mapred_local_dir_used.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash +# +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# +MAPRED_LOCAL_DIRS=$1 +CRITICAL=`echo $2 | cut -d % -f 1` +IFS="," +for mapred_dir in $MAPRED_LOCAL_DIRS +do + percent=`df -hl $mapred_dir | awk '{percent=$5;} END{print percent}' | cut -d % -f 1` + if [ $percent -ge $CRITICAL ]; then + echo "CRITICAL: MapReduce local dir is full." + exit 2 + fi +done +echo "OK: MapReduce local dir space is available." +exit 0 http://git-wip-us.apache.org/repos/asf/ambari/blob/83efcfea/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_name_dir_status.php ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_name_dir_status.php b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_name_dir_status.php new file mode 100644 index 0000000..186166d --- /dev/null +++ b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_name_dir_status.php @@ -0,0 +1,93 @@ +<?php +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* This plugin makes call to namenode, get the jmx-json document + * check the NameDirStatuses to find any offline (failed) directories + * check_jmx -H hostaddress -p port -k keytab path -r principal name -t kinit path -s security enabled + */ + + include "hdp_nagios_init.php"; + + $options = getopt("h:p:e:k:r:t:s:"); + //Check only for mandatory options + if (!array_key_exists('h', $options) || !array_key_exists('p', $options)) { + usage(); + exit(3); + } + + $host=$options['h']; + $port=$options['p']; + $keytab_path=$options['k']; + $principal_name=$options['r']; + $kinit_path_local=$options['t']; + $security_enabled=$options['s']; + $ssl_enabled=$options['e']; + + /* Kinit if security enabled */ + $status = kinit_if_needed($security_enabled, $kinit_path_local, $keytab_path, $principal_name); + $retcode = $status[0]; + $output = $status[1]; + + if ($output != 0) { + echo "CRITICAL: Error doing kinit for nagios. $output"; + exit (2); + } + + $protocol = ($ssl_enabled == "true" ? "https" : "http"); + + /* Get the json document */ + $ch = curl_init(); + $username = rtrim(`id -un`, "\n"); + curl_setopt_array($ch, array( CURLOPT_URL => $protocol."://".$host.":".$port."/jmx?qry=Hadoop:service=NameNode,name=NameNodeInfo", + CURLOPT_RETURNTRANSFER => true, + CURLOPT_HTTPAUTH => CURLAUTH_ANY, + CURLOPT_USERPWD => "$username:", + CURLOPT_SSL_VERIFYPEER => FALSE )); + $json_string = curl_exec($ch); + $info = curl_getinfo($ch); + if (intval($info['http_code']) == 401){ + logout(); + $json_string = curl_exec($ch); + } + $info = curl_getinfo($ch); + curl_close($ch); + $json_array = json_decode($json_string, true); + $object = $json_array['beans'][0]; + if ($object['NameDirStatuses'] == "") { + echo "WARNING: NameNode directory status not available via ".$protocol."://".$host.":".$port."/jmx url, code " . $info['http_code'] ."\n"; + exit(1); + } + $NameDirStatuses = json_decode($object['NameDirStatuses'], true); + $failed_dir_count = count($NameDirStatuses['failed']); + $out_msg = "CRITICAL: Offline NameNode directories: "; + if ($failed_dir_count > 0) { + foreach ($NameDirStatuses['failed'] as $key => $value) { + $out_msg = $out_msg . $key . ":" . $value . ", "; + } + echo $out_msg . "\n"; + exit (2); + } + echo "OK: All NameNode directories are active" . "\n"; + exit(0); + + /* print usage */ + function usage () { + echo "Usage: $0 -h <host> -p port -k keytab path -r principal name -t kinit path -s security enabled -e ssl enabled"; + } +?> \ No newline at end of file http://git-wip-us.apache.org/repos/asf/ambari/blob/83efcfea/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_namenodes_ha.sh ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_namenodes_ha.sh b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_namenodes_ha.sh new file mode 100644 index 0000000..83c1aca --- /dev/null +++ b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_namenodes_ha.sh @@ -0,0 +1,83 @@ +#!/usr/bin/env bash +# +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# + +IFS=',' read -a namenodes <<< "$1" +port=$2 +totalNN=${#namenodes[@]} +activeNN=() +standbyNN=() +unavailableNN=() + +for nn in "${namenodes[@]}" +do + export no_proxy=$nn + status=$(curl -m 5 -s http://$nn:$port/jmx?qry=Hadoop:service=NameNode,name=FSNamesystem | grep -i "tag.HAState" | grep -o -E "standby|active") + if [ "$status" == "active" ]; then + activeNN[${#activeNN[*]}]="$nn" + elif [ "$status" == "standby" ]; then + standbyNN[${#standbyNN[*]}]="$nn" + elif [ "$status" == "" ]; then + unavailableNN[${#unavailableNN[*]}]="$nn" + fi +done + +message="" +critical=false + +if [ ${#activeNN[@]} -gt 1 ]; then + critical=true + message=$message" Only one NN can have HAState=active;" +elif [ ${#activeNN[@]} == 0 ]; then + critical=true + message=$message" No Active NN available;" +elif [ ${#standbyNN[@]} == 0 ]; then + critical=true + message=$message" No Standby NN available;" +fi + +NNstats=" Active<" +for nn in "${activeNN[@]}" +do + NNstats="$NNstats$nn;" +done +NNstats=${NNstats%\;} +NNstats=$NNstats">, Standby<" +for nn in "${standbyNN[@]}" +do + NNstats="$NNstats$nn;" +done +NNstats=${NNstats%\;} +NNstats=$NNstats">, Unavailable<" +for nn in "${unavailableNN[@]}" +do + NNstats="$NNstats$nn;" +done +NNstats=${NNstats%\;} +NNstats=$NNstats">" + +if [ $critical == false ]; then + echo "OK: NameNode HA healthy;"$NNstats + exit 0 +fi + +echo "CRITICAL:"$message$NNstats +exit 2 http://git-wip-us.apache.org/repos/asf/ambari/blob/83efcfea/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_nodemanager_health.sh ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_nodemanager_health.sh b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_nodemanager_health.sh new file mode 100644 index 0000000..eedcd62 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_nodemanager_health.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash +# +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# +HOST=$1 +PORT=$2 +NODEMANAGER_URL="http://$HOST:$PORT/ws/v1/node/info" +SEC_ENABLED=$3 +export PATH="/usr/bin:$PATH" +if [[ "$SEC_ENABLED" == "true" ]]; then + NAGIOS_KEYTAB=$4 + NAGIOS_USER=$5 + KINIT_PATH=$6 + out1=`${KINIT_PATH} -kt ${NAGIOS_KEYTAB} ${NAGIOS_USER} 2>&1` + if [[ "$?" -ne 0 ]]; then + echo "CRITICAL: Error doing kinit for nagios [$out1]"; + exit 2; + fi +fi + +export no_proxy=$HOST +RESPONSE=`curl --negotiate -u : -s $NODEMANAGER_URL` +if [[ "$RESPONSE" == *'"nodeHealthy":true'* ]]; then + echo "OK: NodeManager healthy"; + exit 0; +fi +echo "CRITICAL: NodeManager unhealthy"; +exit 2; http://git-wip-us.apache.org/repos/asf/ambari/blob/83efcfea/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_oozie_status.sh ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_oozie_status.sh b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_oozie_status.sh new file mode 100644 index 0000000..820ee99 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_oozie_status.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash +# +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# +# OOZIE_URL is of the form http://<hostname>:<port>/oozie +HOST=`echo $1 | tr '[:upper:]' '[:lower:]'` +PORT=$2 +JAVA_HOME=$3 +SEC_ENABLED=$4 +if [[ "$SEC_ENABLED" == "true" ]]; then + NAGIOS_KEYTAB=$5 + NAGIOS_USER=$6 + KINIT_PATH=$7 + out1=`${KINIT_PATH} -kt ${NAGIOS_KEYTAB} ${NAGIOS_USER} 2>&1` + if [[ "$?" -ne 0 ]]; then + echo "CRITICAL: Error doing kinit for nagios [$out1]"; + exit 2; + fi +fi +OOZIE_URL="http://$HOST:$PORT/oozie" +export JAVA_HOME=$JAVA_HOME +out=`oozie admin -oozie ${OOZIE_URL} -status 2>&1` +if [[ "$?" -ne 0 ]]; then + echo "CRITICAL: Error accessing Oozie Server status [$out]"; + exit 2; +fi +echo "OK: Oozie Server status [$out]"; +exit 0; http://git-wip-us.apache.org/repos/asf/ambari/blob/83efcfea/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_rpcq_latency.php ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_rpcq_latency.php b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_rpcq_latency.php new file mode 100644 index 0000000..463f69b --- /dev/null +++ b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_rpcq_latency.php @@ -0,0 +1,104 @@ +<?php +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* This plugin makes call to master node, get the jmx-json document + * It checks the rpc wait time in the queue, RpcQueueTime_avg_time + * check_rpcq_latency -h hostaddress -p port -t ServiceName -w 1 -c 1 + * Warning and Critical values are in seconds + * Service Name = JobTracker, NameNode, JobHistoryServer + */ + + include "hdp_nagios_init.php"; + + $options = getopt ("h:p:w:c:n:e:k:r:t:s:"); + if (!array_key_exists('h', $options) || !array_key_exists('p', $options) || !array_key_exists('w', $options) + || !array_key_exists('c', $options) || !array_key_exists('n', $options)) { + usage(); + exit(3); + } + + $host=$options['h']; + $port=$options['p']; + $master=$options['n']; + $warn=$options['w']; + $crit=$options['c']; + $keytab_path=$options['k']; + $principal_name=$options['r']; + $kinit_path_local=$options['t']; + $security_enabled=$options['s']; + $ssl_enabled=$options['e']; + + /* Kinit if security enabled */ + $status = kinit_if_needed($security_enabled, $kinit_path_local, $keytab_path, $principal_name); + $retcode = $status[0]; + $output = $status[1]; + + if ($output != 0) { + echo "CRITICAL: Error doing kinit for nagios. $output"; + exit (2); + } + + $protocol = ($ssl_enabled == "true" ? "https" : "http"); + + + /* Get the json document */ + $ch = curl_init(); + $username = rtrim(`id -un`, "\n"); + curl_setopt_array($ch, array( CURLOPT_URL => $protocol."://".$host.":".$port."/jmx?qry=Hadoop:service=".$master.",name=RpcActivityForPort*", + CURLOPT_RETURNTRANSFER => true, + CURLOPT_HTTPAUTH => CURLAUTH_ANY, + CURLOPT_USERPWD => "$username:", + CURLOPT_SSL_VERIFYPEER => FALSE )); + $json_string = curl_exec($ch); + $info = curl_getinfo($ch); + if (intval($info['http_code']) == 401){ + logout(); + $json_string = curl_exec($ch); + } + $info = curl_getinfo($ch); + curl_close($ch); + $json_array = json_decode($json_string, true); + $object = $json_array['beans'][0]; + if (count($object) == 0) { + echo "CRITICAL: Data inaccessible, Status code = ". $info['http_code'] ."\n"; + exit(2); + } + $RpcQueueTime_avg_time = round($object['RpcQueueTime_avg_time'], 2); + $RpcProcessingTime_avg_time = round($object['RpcProcessingTime_avg_time'], 2); + + $out_msg = "RpcQueueTime_avg_time:<" . $RpcQueueTime_avg_time . + "> Secs, RpcProcessingTime_avg_time:<" . $RpcProcessingTime_avg_time . + "> Secs"; + + if ($RpcQueueTime_avg_time >= $crit) { + echo "CRITICAL: " . $out_msg . "\n"; + exit (2); + } + if ($RpcQueueTime_avg_time >= $warn) { + echo "WARNING: " . $out_msg . "\n"; + exit (1); + } + echo "OK: " . $out_msg . "\n"; + exit(0); + + /* print usage */ + function usage () { + echo "Usage: $0 -h <host> -p port -n <JobTracker/NameNode/JobHistoryServer> -w <warn_in_sec> -c <crit_in_sec> -k keytab path -r principal name -t kinit path -s security enabled -e ssl enabled\n"; + } +?> http://git-wip-us.apache.org/repos/asf/ambari/blob/83efcfea/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_rpcq_latency_ha.php ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_rpcq_latency_ha.php b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_rpcq_latency_ha.php new file mode 100644 index 0000000..3e7616c --- /dev/null +++ b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_rpcq_latency_ha.php @@ -0,0 +1,115 @@ +<?php +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* This plugin makes call to master node, get the jmx-json document + * It checks the rpc wait time in the queue, RpcQueueTime_avg_time + * check_rpcq_latency -h hostaddress -p port -t ServiceName -w 1 -c 1 + * Warning and Critical values are in seconds + * Service Name = JobTracker, NameNode, JobHistoryServer + */ + + include "hdp_nagios_init.php"; + + $options = getopt ("h:p:w:c:n:e:k:r:t:s:"); + if (!array_key_exists('h', $options) || !array_key_exists('p', $options) || !array_key_exists('w', $options) + || !array_key_exists('c', $options) || !array_key_exists('n', $options)) { + usage(); + exit(3); + } + + $hosts=$options['h']; + $port=$options['p']; + $master=$options['n']; + $warn=$options['w']; + $crit=$options['c']; + $keytab_path=$options['k']; + $principal_name=$options['r']; + $kinit_path_local=$options['t']; + $security_enabled=$options['s']; + $ssl_enabled=$options['e']; + + /* Kinit if security enabled */ + $status = kinit_if_needed($security_enabled, $kinit_path_local, $keytab_path, $principal_name); + $retcode = $status[0]; + $output = $status[1]; + + if ($output != 0) { + echo "CRITICAL: Error doing kinit for nagios. $output"; + exit (2); + } + + $protocol = ($ssl_enabled == "true" ? "https" : "http"); + + $jmx_response_available = false; + $jmx_response; + + foreach (preg_split('/,/', $hosts) as $host) { + /* Get the json document */ + $ch = curl_init(); + $username = rtrim(`id -un`, "\n"); + curl_setopt_array($ch, array( CURLOPT_URL => $protocol."://".$host.":".$port."/jmx?qry=Hadoop:service=".$master.",name=RpcActivityForPort*", + CURLOPT_RETURNTRANSFER => true, + CURLOPT_HTTPAUTH => CURLAUTH_ANY, + CURLOPT_USERPWD => "$username:", + CURLOPT_SSL_VERIFYPEER => FALSE )); + $json_string = curl_exec($ch); + $info = curl_getinfo($ch); + if (intval($info['http_code']) == 401){ + logout(); + $json_string = curl_exec($ch); + } + $info = curl_getinfo($ch); + curl_close($ch); + $json_array = json_decode($json_string, true); + $object = $json_array['beans'][0]; + + if (count($object) > 0) { + $jmx_response_available = true; + $jmx_response = $object; + } + } + + if ($jmx_response_available === false) { + echo "CRITICAL: Data inaccessible, Status code = ". $info['http_code'] ."\n"; + exit(2); + } + + $RpcQueueTime_avg_time = round($jmx_response['RpcQueueTime_avg_time'], 2); + $RpcProcessingTime_avg_time = round($jmx_response['RpcProcessingTime_avg_time'], 2); + + $out_msg = "RpcQueueTime_avg_time:<" . $RpcQueueTime_avg_time . + "> Secs, RpcProcessingTime_avg_time:<" . $RpcProcessingTime_avg_time . + "> Secs"; + + if ($RpcQueueTime_avg_time >= $crit) { + echo "CRITICAL: " . $out_msg . "\n"; + exit (2); + } + if ($RpcQueueTime_avg_time >= $warn) { + echo "WARNING: " . $out_msg . "\n"; + exit (1); + } + echo "OK: " . $out_msg . "\n"; + exit(0); + + /* print usage */ + function usage () { + echo "Usage: $0 -h <host> -p port -n <JobTracker/NameNode/JobHistoryServer> -w <warn_in_sec> -c <crit_in_sec> -k keytab path -r principal name -t kinit path -s security enabled -e ssl enabled\n"; + } +?> \ No newline at end of file http://git-wip-us.apache.org/repos/asf/ambari/blob/83efcfea/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_templeton_status.sh ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_templeton_status.sh b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_templeton_status.sh new file mode 100644 index 0000000..3e2ba0f --- /dev/null +++ b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_templeton_status.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash +# +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# +# out='{"status":"ok","version":"v1"}<status_code:200>' +HOST=$1 +PORT=$2 +VERSION=$3 +SEC_ENABLED=$4 +if [[ "$SEC_ENABLED" == "true" ]]; then + NAGIOS_KEYTAB=$5 + NAGIOS_USER=$6 + KINIT_PATH=$7 + out1=`${KINIT_PATH} -kt ${NAGIOS_KEYTAB} ${NAGIOS_USER} 2>&1` + if [[ "$?" -ne 0 ]]; then + echo "CRITICAL: Error doing kinit for nagios [$out1]"; + exit 2; + fi +fi +regex="^.*\"status\":\"ok\".*<status_code:200>$" +export no_proxy=$HOST +out=`curl --negotiate -u : -s -w '<status_code:%{http_code}>' http://$HOST:$PORT/templeton/$VERSION/status 2>&1` +if [[ $out =~ $regex ]]; then + out=`echo "$out" | sed -e 's/{/[/g' | sed -e 's/}/]/g'` + echo "OK: WebHCat Server status [$out]"; + exit 0; +fi +echo "CRITICAL: Error accessing WebHCat Server, status [$out]"; +exit 2; http://git-wip-us.apache.org/repos/asf/ambari/blob/83efcfea/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_webui.sh ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_webui.sh b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_webui.sh new file mode 100644 index 0000000..e47a74c --- /dev/null +++ b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_webui.sh @@ -0,0 +1,103 @@ +#!/usr/bin/env bash +# +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# + +service=$1 +host=$2 +port=$3 + +checkurl () { + url=$1 + export no_proxy=$host + curl $url -o /dev/null + echo $? +} + +if [[ -z "$service" || -z "$host" ]]; then + echo "UNKNOWN: Invalid arguments; Usage: check_webui.sh service_name host_name"; + exit 3; +fi + +case "$service" in + +jobtracker) + jtweburl="http://$host:$port" + if [[ `checkurl "$jtweburl"` -ne 0 ]]; then + echo "WARNING: Jobtracker web UI not accessible : $jtweburl"; + exit 1; + fi + ;; +namenode) + nnweburl="http://$host:$port" + if [[ `checkurl "$nnweburl"` -ne 0 ]] ; then + echo "WARNING: NameNode Web UI not accessible : $nnweburl"; + exit 1; + fi + ;; +jobhistory) + jhweburl="http://$host:$port/jobhistoryhome.jsp" + if [[ `checkurl "$jhweburl"` -ne 0 ]]; then + echo "WARNING: HistoryServer Web UI not accessible : $jhweburl"; + exit 1; + fi + ;; +hbase) + hbaseweburl="http://$host:$port/master-status" + if [[ `checkurl "$hbaseweburl"` -ne 0 ]]; then + echo "WARNING: HBase Master Web UI not accessible : $hbaseweburl"; + exit 1; + fi + ;; +resourcemanager) + rmweburl="http://$host:$port/cluster" + if [[ `checkurl "$rmweburl"` -ne 0 ]]; then + echo "WARNING: ResourceManager Web UI not accessible : $rmweburl"; + exit 1; + fi + ;; +historyserver2) + hsweburl="http://$host:$port/jobhistory" + if [[ `checkurl "$hsweburl"` -ne 0 ]]; then + echo "WARNING: HistoryServer Web UI not accessible : $hsweburl"; + exit 1; + fi + ;; +storm_ui) + rmweburl="http://$host:$port" + if [[ `checkurl "$rmweburl"` -ne 0 ]]; then + echo "WARNING: Storm Web UI not accessible : $rmweburl"; + exit 1; + fi + ;; +falconserver) + hsweburl="http://$host:$port/" + if [[ `checkurl "$hsweburl"` -ne 0 ]]; then + echo "WARNING: FalconServer Web UI not accessible : $hsweburl"; + exit 1; + fi + ;; +*) echo "UNKNOWN: Invalid service name [$service], valid options [jobtracker|jobhistory|hbase|namenode|resourcemanager|historyserver2|falconserver|storm_ui]" + exit 3 + ;; +esac + +echo "OK: Successfully accessed $service Web UI" +exit 0; http://git-wip-us.apache.org/repos/asf/ambari/blob/83efcfea/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_webui_ha.sh ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_webui_ha.sh b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_webui_ha.sh new file mode 100644 index 0000000..1d0f5f3 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_webui_ha.sh @@ -0,0 +1,64 @@ +#!/usr/bin/env bash +# +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# + +service=$1 +hosts=$2 +port=$3 + +checkurl () { + url=$1 + host=$2 + export no_proxy=$host + curl $url -o /dev/null + echo $? +} + +if [[ -z "$service" || -z "$hosts" ]]; then + echo "UNKNOWN: Invalid arguments; Usage: check_webui_ha.sh service_name, host_name"; + exit 3; +fi + +case "$service" in +resourcemanager) + url_end_part="/cluster" + ;; +*) echo "UNKNOWN: Invalid service name [$service], valid options [resourcemanager]" + exit 3 + ;; +esac + +OIFS="$IFS" +IFS=',' +read -a hosts_array <<< "${hosts}" +IFS="$OIFS" + +for host in "${hosts_array[@]}" +do + weburl="http://${host}:${port}${url_end_part}" + if [[ `checkurl "$weburl" "$host"` -eq 0 ]]; then + echo "OK: Successfully accessed $service Web UI" + exit 0; + fi +done + +echo "WARNING: $service Web UI not accessible : $weburl"; +exit 1; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/ambari/blob/83efcfea/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_wrapper.sh ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_wrapper.sh b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_wrapper.sh new file mode 100644 index 0000000..d350e4f --- /dev/null +++ b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/check_wrapper.sh @@ -0,0 +1,94 @@ +#!/usr/bin/env bash +# +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +# + +function real_service() { + desc=$NAGIOS_SERVICEGROUPNAME + eval "$1='$desc'" +} + +function real_component() { + arrDesc=(${NAGIOS_SERVICEDESC//::/ }) + + compName="${arrDesc[0]}" + + case "$compName" in + HBASEMASTER) + realCompName="HBASE_MASTER" + ;; + REGIONSERVER) + realCompName="HBASE_REGIONSERVER" + ;; + JOBHISTORY) + realCompName="MAPREDUCE2" + ;; + HIVE-METASTORE) + realCompName="HIVE_METASTORE" + ;; + HIVE-SERVER) + realCompName="HIVE_SERVER" + ;; + FLUME) + realCompName="FLUME_HANDLER" + ;; + HUE) + realCompName="HUE_SERVER" + ;; + WEBHCAT) + realCompName="WEBHCAT_SERVER" + ;; + *) + realCompName=$compName + ;; + esac + + eval "$1='$realCompName'" +} + +real_service_var="" +real_service real_service_var + +real_comp_var="" +real_component real_comp_var + + +wrapper_output=`exec "$@"` +wrapper_result=$? + +if [ "$wrapper_result" == "0" ]; then + echo "$wrapper_output" + exit $wrapper_result +fi + +if [ ! -f /var/nagios/ignore.dat ]; then + echo "$wrapper_output" + exit $wrapper_result +else + count=$(grep $NAGIOS_HOSTNAME /var/nagios/ignore.dat | grep $real_service_var | grep $real_comp_var | wc -l) + if [ "$count" -ne "0" ]; then + echo "$wrapper_output\nAMBARIPASSIVE=${wrapper_result}" | sed 's/^[ \t]*//g' + exit 0 + else + echo "$wrapper_output" + exit $wrapper_result + fi +fi + http://git-wip-us.apache.org/repos/asf/ambari/blob/83efcfea/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/hdp_mon_nagios_addons.conf ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/hdp_mon_nagios_addons.conf b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/hdp_mon_nagios_addons.conf new file mode 100644 index 0000000..87717d2 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/hdp_mon_nagios_addons.conf @@ -0,0 +1,24 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +Alias /ambarinagios /usr/share/hdp +<Directory /usr/share/hdp> + Options None + AllowOverride None + Order allow,deny + Allow from all +</Directory> http://git-wip-us.apache.org/repos/asf/ambari/blob/83efcfea/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/hdp_nagios_init.php ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/hdp_nagios_init.php b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/hdp_nagios_init.php new file mode 100644 index 0000000..487eb43 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/hdp_nagios_init.php @@ -0,0 +1,81 @@ +<?php +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Common functions called from other alerts + * + */ + + /* + * Function for kinit. Checks if security enabled and klist for this principal doesn't returns nothing, + * make kinit call in this case. + */ + function kinit_if_needed($security_enabled, $kinit_path_local, $keytab_path, $principal_name) { + if($security_enabled === 'true') { + + $is_logined = is_logined($principal_name); + + if (!$is_logined) + $status = kinit($kinit_path_local, $keytab_path, $principal_name); + else + $status = array(0, ''); + } else { + $status = array(0, ''); + } + + return $status; + } + + + /* + * Checks if user is logined on kerberos + */ + function is_logined($principal_name) { + $check_cmd = "klist|grep $principal_name 1> /dev/null 2>/dev/null ; [[ $? != 0 ]] && echo 1"; + $check_output = shell_exec($check_cmd); + + if ($check_output) + return false; + else + return true; + } + + /* + * Runs kinit command. + */ + function kinit($kinit_path_local, $keytab_path, $principal_name) { + $init_cmd = "$kinit_path_local -kt $keytab_path $principal_name 2>&1"; + $kinit_output = shell_exec($init_cmd); + if ($kinit_output) + $status = array(1, $kinit_output); + else + $status = array(0, ''); + + return $status; + } + + function logout() { + if (shell_exec("rm -f /tmp/krb5cc_".trim(shell_exec('id -u'))) == "" ) + $status = true; + else + $status = false; + + return $status; + } + + ?> \ No newline at end of file http://git-wip-us.apache.org/repos/asf/ambari/blob/83efcfea/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/nagios_alerts.php ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/nagios_alerts.php b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/nagios_alerts.php new file mode 100644 index 0000000..d15b023 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/nagios_alerts.php @@ -0,0 +1,513 @@ +<?php +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** Constants. */ +define("HDP_MON_RESPONSE_OPTION_KEY__PROPERTIES", "Properties"); +define("HDP_MON_RESPONSE_OPTION_KEY__TYPE", "Type"); + +define("HDP_MON_RESPONSE_OPTION_VALUE__PROPERTIES_UNCACHEABLE", "Uncacheable"); +define("HDP_MON_RESPONSE_OPTION_VALUE__TYPE_JSON", "JSON"); +define("HDP_MON_RESPONSE_OPTION_VALUE__TYPE_JAVASCRIPT", "JAVASCRIPT"); + +define("HDP_MON_QUERY_ARG__JSONP", "jsonp"); + +/** Spits out appropriate response headers, as per the options passed in. */ +function hdp_mon_generate_response_headers( $response_options ) +{ + if( $response_options[HDP_MON_RESPONSE_OPTION_KEY__PROPERTIES] == HDP_MON_RESPONSE_OPTION_VALUE__PROPERTIES_UNCACHEABLE ) + { + // Make the response uncache-able. + header("Expires: Mon, 26 Jul 1997 05:00:00 GMT"); // Date in the past + header("Last-Modified: " . gmdate("D, d M Y H:i:s") . " GMT"); // Always modified + header("Cache-Control: no-cache, must-revalidate"); // HTTP/1.1 + header("Pragma: no-cache"); // HTTP/1.0 + } + + switch( $response_options[HDP_MON_RESPONSE_OPTION_KEY__TYPE] ) + { + case HDP_MON_RESPONSE_OPTION_VALUE__TYPE_JSON: + { + header('Content-type: application/json'); + } + break; + + case HDP_MON_RESPONSE_OPTION_VALUE__TYPE_JAVASCRIPT: + { + header('Content-type: application/javascript'); + } + break; + } +} + +/** Given $response_data (which we expect to be a JSON string), generate an + * HTTP response, which includes emitting the necessary HTTP response headers + * followed by the response body (that is either plain ol' $response_data, + * or a JSONP wrapper around it). + */ +function hdp_mon_generate_response( $response_data ) +{ + $jsonpFunctionName = NULL; + if (isset($_GET[HDP_MON_QUERY_ARG__JSONP])) { + $jsonpFunctionName = $_GET[HDP_MON_QUERY_ARG__JSONP]; + } + + hdp_mon_generate_response_headers( array + ( HDP_MON_RESPONSE_OPTION_KEY__PROPERTIES => HDP_MON_RESPONSE_OPTION_VALUE__PROPERTIES_UNCACHEABLE, + HDP_MON_RESPONSE_OPTION_KEY__TYPE => + isset( $jsonpFunctionName ) && $jsonpFunctionName != "" ? + HDP_MON_RESPONSE_OPTION_VALUE__TYPE_JAVASCRIPT : + HDP_MON_RESPONSE_OPTION_VALUE__TYPE_JSON ) ); + + if( isset( $jsonpFunctionName ) ) + { + echo "$jsonpFunctionName( $response_data );"; + } + else + { + echo $response_data; + } +} + + /* alert_type { ok, non-ok, warning, critical, all } */ + define ("all", "-2"); + define ("nok", "-1"); + define ("ok", "0"); + define ("warn", "1"); + define ("critical", "2"); + + define ("HDFS_SERVICE_CHECK", "NAMENODE::NameNode process down"); + define ("MAPREDUCE_SERVICE_CHECK", "JOBTRACKER::JobTracker process down"); + define ("HBASE_SERVICE_CHECK", "HBASEMASTER::HBaseMaster process down"); + define ("ZOOKEEPER_SERVICE_CHECK", "ZOOKEEPER::Percent ZooKeeper Servers down"); + define ("HIVE_SERVICE_CHECK", "HIVE-METASTORE::Hive Metastore status check"); + define ("OOZIE_SERVICE_CHECK", "OOZIE::Oozie Server status check"); + define ("WEBHCAT_SERVICE_CHECK", "WEBHCAT::WebHCat Server status check"); + define ("PUPPET_SERVICE_CHECK", "PUPPET::Puppet agent down"); + + // on SUSE, some versions of Nagios stored data in /var/lib + $status_file = "/var/nagios/status.dat"; + if (!file_exists($status_file) && file_exists("/etc/SuSE-release")) { + $status_file = "/var/lib/nagios/status.dat"; + } + + $q1=""; + if (array_key_exists('q1', $_GET)) { + $q1=$_GET["q1"]; + } + $q2=""; + if (array_key_exists('q2', $_GET)) { + $q2=$_GET["q2"]; + } + $alert_type=""; + if (array_key_exists('alert_type', $_GET)) { + $alert_type=$_GET["alert_type"]; + } + $host=""; + if (array_key_exists('host_name', $_GET)) { + $host=$_GET["host_name"]; + } + $indent=""; + if (array_key_exists('indent', $_GET)) { + $indent=$_GET["indent"]; + } + + $result = array(); + $status_file_content = file_get_contents($status_file); + + if ($q1 == "alerts") { + /* Add the service status object to result array */ + $result['alerts'] = query_alerts ($status_file_content, $alert_type, $host); + } + + if ($q2 == "hosts") { + /* Add the service status object to result array */ + $result['hosts'] = query_hosts ($status_file_content, $alert_type, $host); + } + + /* Add host count object to the results */ + $result['hostcounts'] = query_host_count ($status_file_content); + + /* Add services runtime states */ + $result['servicestates'] = query_service_states ($status_file_content); + + /* Return results */ + if ($indent == "true") { + hdp_mon_generate_response(indent(json_encode($result))); + } else { + hdp_mon_generate_response(json_encode($result)); + } + + # Functions + /* Query service states */ + function query_service_states ($status_file_content) { + $num_matches = preg_match_all("/servicestatus \{([\S\s]*?)\}/", $status_file_content, $matches, PREG_PATTERN_ORDER); + $services_object = array (); + $services_object["PUPPET"] = 0; + foreach ($matches[0] as $object) { + + if (getParameter($object, "service_description") == HDFS_SERVICE_CHECK) { + $services_object["HDFS"] = getParameter($object, "last_hard_state"); + if ($services_object["HDFS"] >= 1) { + $services_object["HDFS"] = 1; + } + continue; + } + if (getParameter($object, "service_description") == MAPREDUCE_SERVICE_CHECK) { + $services_object["MAPREDUCE"] = getParameter($object, "last_hard_state"); + if ($services_object["MAPREDUCE"] >= 1) { + $services_object["MAPREDUCE"] = 1; + } + continue; + } + if (getParameter($object, "service_description") == HBASE_SERVICE_CHECK) { + $services_object["HBASE"] = getParameter($object, "last_hard_state"); + if ($services_object["HBASE"] >= 1) { + $services_object["HBASE"] = 1; + } + continue; + } + if (getParameter($object, "service_description") == HIVE_SERVICE_CHECK) { + $services_object["HIVE"] = getParameter($object, "last_hard_state"); + if ($services_object["HIVE"] >= 1) { + $services_object["HIVE"] = 1; + } + continue; + } + if (getParameter($object, "service_description") == OOZIE_SERVICE_CHECK) { + $services_object["OOZIE"] = getParameter($object, "last_hard_state"); + if ($services_object["OOZIE"] >= 1) { + $services_object["OOZIE"] = 1; + } + continue; + } + if (getParameter($object, "service_description") == WEBHCAT_SERVICE_CHECK) { + $services_object["WEBHCAT"] = getParameter($object, "last_hard_state"); + if ($services_object["WEBHCAT"] >= 1) { + $services_object["WEBHCAT"] = 1; + } + continue; + } + /* In case of zookeeper, service is treated running if alert is ok or warning (i.e partial + * instances of zookeepers are running + */ + if (getParameter($object, "service_description") == ZOOKEEPER_SERVICE_CHECK) { + $services_object["ZOOKEEPER"] = getParameter($object, "last_hard_state"); + if ($services_object["ZOOKEEPER"] <= 1) { + $services_object["ZOOKEEPER"] = 0; + } + continue; + } + if (getParameter($object, "service_description") == PUPPET_SERVICE_CHECK) { + $state = getParameter($object, "last_hard_state"); + if ($state >= 1) { + $services_object["PUPPET"]++; + } + continue; + } + } + if ($services_object["PUPPET"] >= 1) { + $services_object["PUPPET"] = 1; + } + $services_object = array_map('strval', $services_object); + return $services_object; + } + + /* Query host count */ + function query_host_count ($status_file_content) { + $num_matches = preg_match_all("/hoststatus \{([\S\s]*?)\}/", $status_file_content, $matches, PREG_PATTERN_ORDER); + $hostcounts_object = array (); + $up_hosts = 0; + $down_hosts = 0; + + foreach ($matches[0] as $object) { + if (getParameter($object, "last_hard_state") != ok) { + $down_hosts++; + } else { + $up_hosts++; + } + } + $hostcounts_object['up_hosts'] = $up_hosts; + $hostcounts_object['down_hosts'] = $down_hosts; + $hostcounts_object = array_map('strval', $hostcounts_object); + return $hostcounts_object; + } + + /* Query Hosts */ + function query_hosts ($status_file_content, $alert_type, $host) { + $hoststatus_attributes = array ("host_name", "current_state", "last_hard_state", + "plugin_output", "last_check", "current_attempt", + "last_hard_state_change", "last_time_up", "last_time_down", + "last_time_unreachable", "is_flapping", "last_check"); + + $num_matches = preg_match_all("/hoststatus \{([\S\s]*?)\}/", $status_file_content, $matches, PREG_PATTERN_ORDER); + $hosts_objects = array (); + $i = 0; + foreach ($matches[0] as $object) { + $hoststatus = array (); + $chost = getParameter($object, "host_name"); + if (empty($host) || $chost == $host) { + foreach ($hoststatus_attributes as $attrib) { + $hoststatus[$attrib] = htmlentities(getParameter($object, $attrib), ENT_COMPAT); + } + $hoststatus['alerts'] = query_alerts ($status_file_content, $alert_type, $chost); + if (!empty($host)) { + $hosts_objects[$i] = $hoststatus; + $i++; + break; + } + } + if (!empty($hoststatus)) { + $hosts_objects[$i] = $hoststatus; + $i++; + } + } + /* echo "COUNT : " . count ($services_objects) . "\n"; */ + return $hosts_objects; + } + + /* Query Alerts */ + function query_alerts ($status_file_content, $alert_type, $host) { + + $servicestatus_attributes = array ("service_description", "host_name", "current_attempt", + "current_state", "plugin_output", "last_hard_state_change", "last_hard_state", + "last_time_ok", "last_time_warning", "last_time_unknown", + "last_time_critical", "is_flapping", "last_check", + "long_plugin_output"); + + $num_matches = preg_match_all("/servicestatus \{([\S\s]*?)\}/", $status_file_content, $matches, PREG_PATTERN_ORDER); + #echo $matches[0][0] . ", " . $matches[0][1] . "\n"; + #echo $matches[1][0] . ", " . $matches[1][1] . "\n"; + $services_objects = array (); + $i = 0; + foreach ($matches[1] as $object) { + $servicestatus = getParameterMap($object, $servicestatus_attributes); + switch ($alert_type) { + case "all": + if (empty($host) || $servicestatus['host_name'] == $host) { + $servicestatus['service_type'] = get_service_type($servicestatus['service_description']); + $srv_desc = explode ("::",$servicestatus['service_description'],2); + + $servicestatus['service_description'] = $srv_desc[1]; + } + break; + case "nok": + if (getParameterMapValue($map, "last_hard_state") != ok && + (empty($host) || getParameterMapValue($map, "host_name") == $host)) { + foreach ($servicestatus_attributes as $attrib) { + $servicestatus[$attrib] = htmlentities(getParameterMapValue($map, $attrib), ENT_COMPAT); + } + $servicestatus['service_type'] = get_service_type($servicestatus['service_description']); + $srv_desc = explode ("::",$servicestatus['service_description'],2); + $servicestatus['service_description'] = $srv_desc[1]; + } + break; + case "ok": + if (getParameterMapValue($map, "last_hard_state") == ok && + (empty($host) || getParameterMapValue($map, "host_name") == $host)) { + foreach ($servicestatus_attributes as $attrib) { + $servicestatus[$attrib] = htmlentities(getParameterMapValue($map, $attrib), ENT_COMPAT); + } + $servicestatus['service_type'] = get_service_type($servicestatus['service_description']); + $srv_desc = explode ("::",$servicestatus['service_description'],2); + $servicestatus['service_description'] = $srv_desc[1]; + } + break; + case "warn": + if (getParameterMapValue($map, "last_hard_state") == warn && + (empty($host) || getParameterMapValue($map, "host_name") == $host)) { + foreach ($servicestatus_attributes as $attrib) { + $servicestatus[$attrib] = htmlentities(getParameterMapValue($map, $attrib), ENT_COMPAT); + } + $servicestatus['service_type'] = get_service_type($servicestatus['service_description']); + $srv_desc = explode ("::",$servicestatus['service_description'],2); + $servicestatus['service_description'] = $srv_desc[1]; + } + break; + case "critical": + if (getParameterMapValue($map, "last_hard_state") == critical && + (empty($host) || getParameterMapValue($map, "host_name") == $host)) { + foreach ($servicestatus_attributes as $attrib) { + $servicestatus[$attrib] = htmlentities(getParameterMapValue($map, $attrib), ENT_COMPAT); + } + $servicestatus['service_type'] = get_service_type($servicestatus['service_description']); + $srv_desc = explode ("::",$servicestatus['service_description'],2); + $servicestatus['service_description'] = $srv_desc[1]; + } + break; + } + + if (!empty($servicestatus)) { + $services_objects[$i] = $servicestatus; + $i++; + } + } + + // echo "COUNT : " . count ($services_objects) . "\n"; + return $services_objects; + } + + function get_service_type($service_description) + { + $pieces = explode("::", $service_description); + switch ($pieces[0]) { + case "DATANODE": + case "NAMENODE": + case "JOURNALNODE": + $pieces[0] = "HDFS"; + break; + case "JOBTRACKER": + case "TASKTRACKER": + $pieces[0] = "MAPREDUCE"; + break; + case "HBASEMASTER": + case "REGIONSERVER": + $pieces[0] = "HBASE"; + break; + case "HIVE-METASTORE": + case "HIVE-SERVER": + $pieces[0] = "HIVE"; + break; + case "ZKSERVERS": + $pieces[0] = "ZOOKEEPER"; + break; + case "AMBARI": + $pieces[0] = "AMBARI"; + break; + case "FLUME": + $pieces[0] = "FLUME"; + break; + case "JOBHISTORY": + $pieces[0] = "MAPREDUCE2"; + break; + case "RESOURCEMANAGER": + case "APP_TIMELINE_SERVER": + case "NODEMANAGER": + $pieces[0] = "YARN"; + break; + case "STORM_UI_SERVER": + case "NIMBUS": + case "DRPC_SERVER": + case "SUPERVISOR": + case "STORM_REST_API": + $pieces[0] = "STORM"; + break; + case "NAGIOS": + case "HDFS": + case "MAPREDUCE": + case "HBASE": + case "ZOOKEEPER": + case "OOZIE": + case "WEBHCAT": + case "GANGLIA": + case "STORM": + case "FALCON": + case "PUPPET": + break; + default: + $pieces[0] = "UNKNOWN"; + } + return $pieces[0]; + } + + function getParameter($object, $key) + { + $pattern="/\s" . $key . "[\s= ]*([\S, ]*)\n/"; + $num_mat = preg_match($pattern, $object, $matches); + $value = ""; + if ($num_mat) { + $value = $matches[1]; + } + return $value; + } + + function getParameterMapValue($map, $key) { + $value = $map[$key]; + + if (!is_null($value)) + return "" . $value; + + return ""; + } + + + function getParameterMap($object, $keynames) { + + $cnt = preg_match_all('/\t([\S]*)=[\n]?[\t]?([\S= ]*)/', $object, $matches, PREG_PATTERN_ORDER); + + $tmpmap = array_combine($matches[1], $matches[2]); + + $map = array(); + foreach ($keynames as $key) { + $map[$key] = htmlentities($tmpmap[$key], ENT_COMPAT); + } + + return $map; + } + +function indent($json) { + + $result = ''; + $pos = 0; + $strLen = strlen($json); + $indentStr = ' '; + $newLine = "\n"; + $prevChar = ''; + $outOfQuotes = true; + + for ($i=0; $i<=$strLen; $i++) { + + // Grab the next character in the string. + $char = substr($json, $i, 1); + + // Are we inside a quoted string? + if ($char == '"' && $prevChar != '\\') { + $outOfQuotes = !$outOfQuotes; + + // If this character is the end of an element, + // output a new line and indent the next line. + } else if(($char == '}' || $char == ']') && $outOfQuotes) { + $result .= $newLine; + $pos --; + for ($j=0; $j<$pos; $j++) { + $result .= $indentStr; + } + } + + // Add the character to the result string. + $result .= $char; + + // If the last character was the beginning of an element, + // output a new line and indent the next line. + if (($char == ',' || $char == '{' || $char == '[') && $outOfQuotes) { + $result .= $newLine; + if ($char == '{' || $char == '[') { + $pos ++; + } + + for ($j = 0; $j < $pos; $j++) { + $result .= $indentStr; + } + } + + $prevChar = $char; + } + + return $result; +} +?> http://git-wip-us.apache.org/repos/asf/ambari/blob/83efcfea/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/sys_logger.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/sys_logger.py b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/sys_logger.py new file mode 100644 index 0000000..8f0a415 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/files/sys_logger.py @@ -0,0 +1,186 @@ +#!/usr/bin/python +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import sys +import syslog + +# dictionary of state->severity mappings +severities = {'UP':'OK', 'DOWN':'Critical', 'UNREACHABLE':'Critical', 'OK':'OK', + 'WARNING':'Warning', 'UNKNOWN':'Warning', 'CRITICAL':'Critical'} + +# List of services which can result in events at the Degraded severity +degraded_alert_services = ['HBASEMASTER::HBaseMaster CPU utilization', + 'HDFS::Namenode RPC Latency', + 'MAPREDUCE::JobTracker RPC Latency', + 'JOBTRACKER::Jobtracker CPU utilization'] + +# List of services which can result in events at the Fatal severity +fatal_alert_services = ['NAMENODE::Namenode Process down', + 'NAMENODE::NameNode process'] + +# dictionary of service->msg_id mappings +msg_ids = {'Host::Ping':'host_down', + 'HBASEMASTER::HBaseMaster CPU utilization':'master_cpu_utilization', + 'HDFS::HDFS Capacity utilization':'hdfs_percent_capacity', + 'HDFS::Corrupt/Missing blocks':'hdfs_block', + 'NAMENODE::Namenode Edit logs directory status':'namenode_edit_log_write', + 'HDFS::Percent DataNodes down':'datanode_down', + 'DATANODE::Process down':'datanode_process_down', + 'HDFS::Percent DataNodes storage full':'datanodes_percent_storage_full', + 'NAMENODE::Namenode Process down':'namenode_process_down', + 'HDFS::Namenode RPC Latency':'namenode_rpc_latency', + 'DATANODE::Storage full':'datanodes_storage_full', + 'JOBTRACKER::Jobtracker Process down':'jobtracker_process_down', + 'MAPREDUCE::JobTracker RPC Latency':'jobtracker_rpc_latency', + 'MAPREDUCE::Percent TaskTrackers down':'tasktrackers_down', + 'TASKTRACKER::Process down':'tasktracker_process_down', + 'HBASEMASTER::HBaseMaster Process down':'hbasemaster_process_down', + 'REGIONSERVER::Process down':'regionserver_process_down', + 'HBASE::Percent region servers down':'regionservers_down', + 'HIVE-METASTORE::HIVE-METASTORE status check':'hive_metastore_process_down', + 'ZOOKEEPER::Percent zookeeper servers down':'zookeepers_down', + 'ZKSERVERS::ZKSERVERS Process down':'zookeeper_process_down', + 'OOZIE::Oozie status check':'oozie_down', + 'TEMPLETON::Templeton status check':'templeton_down', + 'PUPPET::Puppet agent down':'puppet_down', + 'NAGIOS::Nagios status log staleness':'nagios_status_log_stale', + 'GANGLIA::Ganglia [gmetad] Process down':'ganglia_process_down', + 'GANGLIA::Ganglia collector [gmond] Process down alert for hbasemaster':'ganglia_collector_process_down', + 'GANGLIA::Ganglia collector [gmond] Process down alert for jobtracker':'ganglia_collector_process_down', + 'GANGLIA::Ganglia collector [gmond] Process down alert for namenode':'ganglia_collector_process_down', + 'GANGLIA::Ganglia collector [gmond] Process down alert for slaves':'ganglia_collector_process_down', + 'NAMENODE::Secondary Namenode Process down':'secondary_namenode_process_down', + 'JOBTRACKER::Jobtracker CPU utilization':'jobtracker_cpu_utilization', + 'HBASEMASTER::HBase Web UI down':'hbase_ui_down', + 'NAMENODE::Namenode Web UI down':'namenode_ui_down', + 'JOBTRACKER::JobHistory Web UI down':'jobhistory_ui_down', + 'JOBTRACKER::JobTracker Web UI down':'jobtracker_ui_down', + + 'HBASEMASTER::HBase Master CPU utilization':'master_cpu_utilization', + 'HDFS::HDFS capacity utilization':'hdfs_percent_capacity', + 'NAMENODE::NameNode edit logs directory status':'namenode_edit_log_write', + 'DATANODE::DataNode process down':'datanode_process_down', + 'NAMENODE::NameNode process down':'namenode_process_down', + 'HDFS::NameNode RPC latency':'namenode_rpc_latency', + 'DATANODE::DataNode storage full':'datanodes_storage_full', + 'JOBTRACKER::JobTracker process down':'jobtracker_process_down', + 'MAPREDUCE::JobTracker RPC latency':'jobtracker_rpc_latency', + 'TASKTRACKER::TaskTracker process down':'tasktracker_process_down', + 'HBASEMASTER::HBase Master process down':'hbasemaster_process_down', + 'REGIONSERVER::RegionServer process down':'regionserver_process_down', + 'HBASE::Percent RegionServers down':'regionservers_down', + 'HIVE-METASTORE::Hive Metastore status check':'hive_metastore_process_down', + 'ZOOKEEPER::Percent ZooKeeper Servers down':'zookeepers_down', + 'ZOOKEEPER::ZooKeeper Server process down':'zookeeper_process_down', + 'OOZIE::Oozie Server status check':'oozie_down', + 'WEBHCAT::WebHCat Server status check':'templeton_down', + 'GANGLIA::Ganglia [gmetad] process down':'ganglia_process_down', + 'GANGLIA::Ganglia Collector [gmond] process down alert for HBase Master':'ganglia_collector_process_down', + 'GANGLIA::Ganglia Collector [gmond] process down alert for JobTracker':'ganglia_collector_process_down', + 'GANGLIA::Ganglia Collector [gmond] process down alert for NameNode':'ganglia_collector_process_down', + 'GANGLIA::Ganglia Collector [gmond] process down alert for slaves':'ganglia_collector_process_down', + 'NAMENODE::Secondary NameNode process down':'secondary_namenode_process_down', + 'JOBTRACKER::JobTracker CPU utilization':'jobtracker_cpu_utilization', + 'HBASEMASTER::HBase Master Web UI down':'hbase_ui_down', + 'NAMENODE::NameNode Web UI down':'namenode_ui_down', + 'Oozie status check':'oozie_down', + 'WEBHCAT::WebHcat status check':'templeton_down', + + # Ambari Nagios service check descriptions + 'DATANODE::DataNode process':'datanode_process', + 'NAMENODE::NameNode process':'namenode_process', + 'NAMENODE::Secondary NameNode process':'secondary_namenode_process', + 'JOURNALNODE::JournalNode process':'journalnode_process', + 'ZOOKEEPER::ZooKeeper Server process':'zookeeper_server_process', + 'JOBTRACKER::JobTracker process':'jobtracker_process', + 'TASKTRACKER::TaskTracker process':'tasktracker_process', + 'GANGLIA::Ganglia Server process':'ganglia_server_process', + 'GANGLIA::Ganglia Monitor process for Slaves':'ganglia_monitor_process', + 'GANGLIA::Ganglia Monitor process for NameNode':'ganglia_monitor_process', + 'GANGLIA::Ganglia Monitor process for JobTracker':'ganglia_monitor_process', + 'GANGLIA::Ganglia Monitor process for HBase Master':'ganglia_monitor_process', + 'GANGLIA::Ganglia Monitor process for ResourceManager':'ganglia_monitor_process', + 'GANGLIA::Ganglia Monitor process for HistoryServer':'ganglia_monitor_process', + 'HBASEMASTER::HBase Master process':'hbase_master_process', + 'REGIONSERVER::RegionServer process':'regionserver_process', + 'NAGIOS::Nagios status log freshness':'nagios_process', + 'FLUME::Flume Agent process':'flume_agent_process', + 'OOZIE::Oozie Server status':'oozie_server_process', + 'HIVE-METASTORE::Hive Metastore status':'hive_metastore_process', + 'WEBHCAT::WebHCat Server status':'webhcat_server_process', + 'RESOURCEMANAGER::ResourceManager process':'resourcemanager_process', + 'NODEMANAGER::NodeManager process':'nodemanager_process', + 'JOBHISTORY::HistoryServer process':'historyserver_process'} + +# Determine the severity of the TVI alert based on the Nagios alert state. +def determine_severity(state, service): + if severities.has_key(state): + severity = severities[state] + else: severity = 'Warning' + + # For some alerts, warning should be converted to Degraded + if severity == 'Warning' and service in degraded_alert_services: + severity = 'Degraded' + elif severity != 'OK' and service in fatal_alert_services: + severity = 'Fatal' + + return severity + + +# Determine the msg id for the TVI alert from based on the service which generates the Nagios alert. +# The msg id is used to correlate a log msg to a TVI rule. +def determine_msg_id(service, severity): + if msg_ids.has_key(service): + msg_id = msg_ids[service] + if severity == 'OK': + msg_id = '{0}_ok'.format(msg_id) + + return msg_id + else: return 'HADOOP_UNKNOWN_MSG' + + +# Determine the domain. Currently the domain is always 'Hadoop'. +def determine_domain(): + return 'Hadoop' + + +# log the TVI msg to the syslog +def log_tvi_msg(msg): + syslog.openlog('nagios', syslog.LOG_PID) + syslog.syslog(msg) + + +# generate a tvi log msg from a Hadoop alert +def generate_tvi_log_msg(alert_type, attempt, state, service, msg): + # Determine the TVI msg contents + severity = determine_severity(state, service) # The TVI alert severity. + domain = determine_domain() # The domain specified in the TVI alert. + msg_id = determine_msg_id(service, severity) # The msg_id used to correlate to a TVI rule. + + # Only log HARD alerts + if alert_type == 'HARD': + # Format and log msg + log_tvi_msg('{0}: {1}: {2}# {3}'.format(severity, domain, msg_id, msg)) + + +# main method which is called when invoked on the command line +def main(): + generate_tvi_log_msg(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4], sys.argv[5]) + + +# run the main method +if __name__ == '__main__': + main() + sys.exit(0) \ No newline at end of file http://git-wip-us.apache.org/repos/asf/ambari/blob/83efcfea/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/scripts/functions.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/scripts/functions.py b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/scripts/functions.py new file mode 100644 index 0000000..7252f8f --- /dev/null +++ b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/scripts/functions.py @@ -0,0 +1,47 @@ +#!/usr/bin/env python +""" +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Ambari Agent + +""" +from resource_management import * + +# Gets if the java version is greater than 6 +def is_jdk_greater_6(java64_home): + import os + import re + java_bin = os.path.join(java64_home, 'bin', 'java') + ver_check = shell.call([java_bin, '-version']) + + ver = '' + if 0 != ver_check[0]: + # java is not local, try the home name as a fallback + ver = java64_home + else: + ver = ver_check[1] + + regex = re.compile('"1\.([0-9]*)\.0_([0-9]*)"', re.IGNORECASE) + r = regex.search(ver) + if r: + strs = r.groups() + if 2 == len(strs): + minor = int(strs[0]) + if minor > 6: + return True + + return False http://git-wip-us.apache.org/repos/asf/ambari/blob/83efcfea/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/scripts/nagios.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/scripts/nagios.py b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/scripts/nagios.py new file mode 100644 index 0000000..ca2d64c --- /dev/null +++ b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/scripts/nagios.py @@ -0,0 +1,112 @@ +#!/usr/bin/env python +""" +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Ambari Agent + +""" + +from resource_management import * +from nagios_server_config import nagios_server_config + +def nagios(): + import params + + File( params.nagios_httpd_config_file, + owner = params.nagios_user, + group = params.nagios_group, + content = Template("nagios.conf.j2"), + mode = 0644 + ) + + Directory( params.conf_dir, + owner = params.nagios_user, + group = params.nagios_group + ) + + Directory( [params.plugins_dir, params.nagios_obj_dir]) + + Directory( params.nagios_pid_dir, + owner = params.nagios_user, + group = params.nagios_group, + mode = 0755, + recursive = True + ) + + Directory( [params.nagios_var_dir, params.check_result_path, params.nagios_rw_dir, params.ambarinagios_php_dir], + owner = params.nagios_user, + group = params.nagios_group, + recursive = True + ) + + Directory( [params.nagios_log_dir, params.nagios_log_archives_dir], + owner = params.nagios_user, + group = params.nagios_group, + mode = 0755 + ) + + nagios_server_config() + + set_web_permisssions() + + File( format("{conf_dir}/command.cfg"), + owner = params.nagios_user, + group = params.nagios_group + ) + + File( format("{ambarinagios_php_dir}/{ambarinagios_php_filename}"), + content = StaticFile(params.ambarinagios_php_filename), + ) + + File( params.hdp_mon_nagios_addons_path, + content = StaticFile("hdp_mon_nagios_addons.conf"), + ) + + File(format("{nagios_var_dir}/ignore.dat"), + owner = params.nagios_user, + group = params.nagios_group, + mode = 0664) + + if System.get_instance().os_family == "ubuntu": + Link(params.ubuntu_stylesheets_desired_location, + to = params.ubuntu_stylesheets_real_location + ) + + +def set_web_permisssions(): + import params + + cmd = format("{htpasswd_cmd} -c -b {conf_dir}/htpasswd.users {nagios_web_login} {nagios_web_password!p}") + test = format("grep {nagios_web_login} {conf_dir}/htpasswd.users") + Execute( cmd, + not_if = test + ) + + File( format("{conf_dir}/htpasswd.users"), + owner = params.nagios_user, + group = params.nagios_group, + mode = 0640 + ) + + if System.get_instance().os_family == "suse": + command = format("usermod -G {nagios_group} wwwrun") + elif System.get_instance().os_family == "ubuntu": + command = format("usermod -G {nagios_group} www-data") # check -a ??? + elif System.get_instance().os_family == "redhat": + command = format("usermod -a -G {nagios_group} apache") + + Execute( command) http://git-wip-us.apache.org/repos/asf/ambari/blob/83efcfea/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/scripts/nagios_server.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/scripts/nagios_server.py b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/scripts/nagios_server.py new file mode 100644 index 0000000..da35b34 --- /dev/null +++ b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/scripts/nagios_server.py @@ -0,0 +1,111 @@ +#!/usr/bin/env python +""" +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Ambari Agent + +""" + +import sys +from resource_management import * +from nagios import nagios +from nagios_service import nagios_service +from nagios_service import update_active_alerts + + +class NagiosServer(Script): + def install(self, env): + remove_conflicting_packages() + self.install_packages(env) + self.configure(env) + + def configure(self, env): + import params + env.set_params(params) + nagios() + + + def start(self, env): + import params + env.set_params(params) + + update_ignorable(params) + + self.configure(env) # done for updating configs after Security enabled + nagios_service(action='start') + + + def stop(self, env): + import params + env.set_params(params) + + nagios_service(action='stop') + + + def status(self, env): + import status_params + env.set_params(status_params) + check_process_status(status_params.nagios_pid_file) + + # check for alert structures + update_active_alerts() + + +def remove_conflicting_packages(): + Package('hdp_mon_nagios_addons', action = "remove") + + Package('nagios-plugins', action = "remove") + + if System.get_instance().os_family in ["redhat","suse"]: + Execute("rpm -e --allmatches --nopostun nagios", + path = "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", + ignore_failures = True) + +def update_ignorable(params): + if not params.config.has_key('passiveInfo'): + return + else: + buf = "" + count = 0 + for define in params.config['passiveInfo']: + try: + host = str(define['host']) + service = str(define['service']) + component = str(define['component']) + buf += host + " " + service + " " + component + "\n" + count += 1 + except KeyError: + pass + + f = None + try: + f = open('/var/nagios/ignore.dat', 'w') + f.write(buf) + if 1 == count: + Logger.info("Persisted '/var/nagios/ignore.dat' with 1 entry") + elif count > 1: + Logger.info("Persisted '/var/nagios/ignore.dat' with " + str(count) + " entries") + except: + Logger.info("Could not persist '/var/nagios/ignore.dat'") + pass + finally: + if f is not None: + f.close() + + +if __name__ == "__main__": + NagiosServer().execute() http://git-wip-us.apache.org/repos/asf/ambari/blob/83efcfea/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/scripts/nagios_server_config.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/scripts/nagios_server_config.py b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/scripts/nagios_server_config.py new file mode 100644 index 0000000..86d5a8a --- /dev/null +++ b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/NAGIOS/package/scripts/nagios_server_config.py @@ -0,0 +1,98 @@ +#!/usr/bin/env python +""" +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Ambari Agent + +""" + +from resource_management import * + +def nagios_server_config(): + import params + + nagios_server_configfile( 'nagios.cfg', + config_dir = params.conf_dir, + group = params.nagios_group + ) + nagios_server_configfile( 'resource.cfg', + config_dir = params.conf_dir, + group = params.nagios_group + ) + nagios_server_configfile( 'hadoop-hosts.cfg') + nagios_server_configfile( 'hadoop-hostgroups.cfg') + nagios_server_configfile( 'hadoop-servicegroups.cfg') + nagios_server_configfile( 'hadoop-services.cfg') + nagios_server_configfile( 'hadoop-commands.cfg') + nagios_server_configfile( 'contacts.cfg') + + if System.get_instance().os_family != "suse": + nagios_server_configfile( 'nagios', + config_dir = '/etc/init.d', + mode = 0755, + owner = 'root', + group = 'root' + ) + + nagios_server_check( 'check_cpu.pl') + nagios_server_check( 'check_cpu.php') + nagios_server_check( 'check_cpu_ha.php') + nagios_server_check( 'check_datanode_storage.php') + nagios_server_check( 'check_aggregate.php') + nagios_server_check( 'check_hdfs_blocks.php') + nagios_server_check( 'check_hdfs_capacity.php') + nagios_server_check( 'check_rpcq_latency.php') + nagios_server_check( 'check_rpcq_latency_ha.php') + nagios_server_check( 'check_webui.sh') + nagios_server_check( 'check_webui_ha.sh') + nagios_server_check( 'check_name_dir_status.php') + nagios_server_check( 'check_oozie_status.sh') + nagios_server_check( 'check_templeton_status.sh') + nagios_server_check( 'check_hive_metastore_status.sh') + nagios_server_check( 'check_hue_status.sh') + nagios_server_check( 'check_mapred_local_dir_used.sh') + nagios_server_check( 'check_nodemanager_health.sh') + nagios_server_check( 'check_namenodes_ha.sh') + nagios_server_check( 'check_wrapper.sh') + nagios_server_check( 'hdp_nagios_init.php') + nagios_server_check( 'check_checkpoint_time.py' ) + nagios_server_check( 'sys_logger.py' ) + nagios_server_check( 'check_ambari_alerts.py' ) + +def nagios_server_configfile( + name, + owner = None, + group = None, + config_dir = None, + mode = None +): + import params + owner = params.nagios_user if not owner else owner + group = params.user_group if not group else group + config_dir = params.nagios_obj_dir if not config_dir else config_dir + + TemplateConfig( format("{config_dir}/{name}"), + owner = owner, + group = group, + mode = mode + ) + +def nagios_server_check(name): + File( format("{plugins_dir}/{name}"), + content = StaticFile(name), + mode = 0755 + )
