Title: [opsview] [10186] Catch NRPE running on a a different PID
- Revision
- 10186
- Author
- dferguson
- Date
- 2012-09-27 10:33:44 +0100 (Thu, 27 Sep 2012)
Log Message
Catch NRPE running on a a different PID
The NRPE PID file may not hold the correct PID for the running NRPE daemon, so take that into account
Modified Paths
Modified: trunk/opsview-core/bin/rc.opsview-agent
===================================================================
--- trunk/opsview-core/bin/rc.opsview-agent 2012-09-27 09:33:41 UTC (rev 10185)
+++ trunk/opsview-core/bin/rc.opsview-agent 2012-09-27 09:33:44 UTC (rev 10186)
@@ -60,6 +60,11 @@
CONFIG=$AGENT_BASE/etc/nrpe.cfg
# get the correct PID file out of the config file
+if [ ! -f $CONFIG ]; then
+ echo "$CONFIG doesn't exist - exiting"
+ exit 1
+fi
+
PIDFILE=`grep "^pid_file=" $CONFIG | awk -F= '{print $2}'`
if [ "x$PIDFILE" = "x" ]; then
echo "pid_file not set in $CONFIG - exiting"
@@ -80,8 +85,14 @@
PID=`getpid`
if [ "x$PID" != "x" ]; then
if kill -0 $PID 2>/dev/null; then
- echo "NRPE is running as process $PID"
- return 0
+ # check to see if the given pid is nrpe
+ process="`ps -o comm -p $PID | tail -1`"
+ if [ "$process" = "nrpe" ] ||
+ [ "$process" = "/usr/local/nagios/bin/nrpe" ]
+ then
+ echo "NRPE is running as process $PID"
+ return 0
+ fi
fi
fi
@@ -95,8 +106,56 @@
if [ "x$PID" != "x" ]; then
kill -0 $PID 2>/dev/null && kill $2 $PID
fi
+
+ # BEWARE: if this is solaris global zone we may get more NRPE
+ # processes than expected due to all the zones so don't do any
+ # more on Solaris systems to be on safe side
+ uname -s | grep SunOS 1>/dev/null
+ if [ $? == 0 ]; then
+ return
+ fi
+
+
+ # also check to ensure nrpe isnt running on another pid
+ PID=`ps -eo pid,comm | awk '$2 == "nrpe" {print $1}'`
+ if [ "x$PID" != "x" ]; then
+ kill $2 $PID
+ fi
}
+start()
+{
+ if [ ! -f $CONFIG ]; then
+ echo "No nrpe.cfg - exiting"
+ exit 1
+ fi
+
+ # test to see if the pid file can be created
+ touch -a $PIDFILE 1>/dev/null 2>&1
+ if [ $? -ne 0 ]; then
+ echo "Cannot write to $PIDFILE - exiting"
+ exit 1
+ fi
+ # if touch created an remove empty file, remove it
+ test ! -s $PIDFILE && rm -f $PIDFILE
+
+ status nrpe > /dev/null && die "NRPE is already running"
+
+ $DAEMON -c $CONFIG -d
+
+ echo "NRPE started"
+}
+
+stop()
+{
+ killproc_nrpe
+ if ! status nrpe >/dev/null ; then
+ echo "NRPE stopped"
+ else
+ echo "NRPE is not running"
+ fi
+}
+
test -x $DAEMON || exit 0
case "$1" in
@@ -105,41 +164,17 @@
;;
start)
- if [ ! -f $CONFIG ]; then
- echo "No nrpe.cfg - exiting"
- exit 1
- fi
-
- # test to see if the pid file can be created
- touch -a $PIDFILE 1>/dev/null 2>&1
- if [ $? -ne 0 ]; then
- echo "Cannot write to $PIDFILE - exiting"
- exit 1
- fi
- # if touch created an remove empty file, remove it
- test ! -s $PIDFILE && rm -f $PIDFILE
-
- status nrpe > /dev/null && die "NRPE is already running"
-
- $DAEMON -c $CONFIG -d
-
- echo "NRPE started"
+ start
;;
stop)
- if status nrpe >/dev/null ; then
- killproc_nrpe
- status nrpe >/dev/null || echo "NRPE stopped"
- else
- echo "NRPE is not running"
- fi
-
+ stop
;;
restart)
- $0 stop
+ stop
sleep 1
- $0 start
+ start
;;
*)
_______________________________________________
Opsview-checkins mailing list
[email protected]
http://lists.opsview.org/lists/listinfo/opsview-checkins