Hey all,

I need a CPU check that will use vmstat to monitor the CPU usage.  I found one 
on the Nagios Exchange here.

http://www.nagiosexchange.org/cgi-bin/page.cgi?g=Detailed%2F1707.html;d=1

I have hacked it up some for what I want.  But I am not a programmer so would 
someone be willing to review it to make sure I am not doing bad stuff if I 
deploy this on a bunch of production servers?

What I changed:  vmstat runs ever 1 second 4 times.  The last line is taken and 
idle,sys,user,wa is returned from the check so I can graph it in Zenoss.  I 
also added some "print" commands that can be uncommented to verify what the 
check is doing.  I figue I can run it with a check_by_ssh command datasource.

#!/usr/bin/python

import string, os, sys,  re
#from optik import OptionParser
from optparse import OptionParser



#######################################
###  Define a set of strings to handle 
###  any info output requirements.


check_cpu_version = "check_cpu (nagios-plugins 1.5.1!?) 0.4.0a\n"

intro = "The nagios plugins come with ABSOLUTELY NO WARRANTY.  You may 
redistribute\ncopies of the plugins under the terms of the GNU General Public 
License.\nFor more information about these matters, see the file named 
COPYING.\nCopyright (c) 2004 Ethan Galstad/Karl DeBisschop\n\n"

preamble = "This plugin will check the percent of idle CPU usage on the system 
it is\nexecuted on and generate an alert if the percentage is below\none of the 
threshold values.\n\n"

use = "Usage:\tcheck_cpu -w limit -c limit [-t timeout]\n\tcheck_cpu 
(-h|--help)\n\tcheck_cpu (-V|--version)\n"

options = "\n -w, --warning=PERCENT\n\tExit with WARNING status if idle CPU 
percentage is less than PERCENT\n -c, --critical=PERCENT\n\tExit with CRITICAL 
status if idle CPU percentage is less than PERCENT\n -t, 
--timeout=INTEGER\n\tSeconds before check attempt times out (default: 30)\n -s, 
--sample=INTEGER\n\tSeconds to use as sample time. (default: 1)\n -h, 
--help\n\tPrint detailed help screen\n -V, --version\n\tPrint version 
information\n\n"

bugs = "Bugs:\tTimeout does not do what has been documented here.  Rather, it 
does \nnothing. The plugin seems to ignore characters that are not attached to 
\nparameter syntax.  This has been tested on RedHat 8.0+, Fedora Core 1 & 2, 
\nOpenBSD 3.x and Debian.  I would appreciate feedback and/or patches if it 
doesn't\nfunction under your chosen operating system.\n\n"

query = "Send email to [email protected] if you have 
questions\nregarding the use of this software.  To submit patches or suggest 
improvements,\nsend email to [email protected].\n\nfor 
questions and suggestions pertaining to the check_cpu plugin,\nplease send 
email to [email protected].\n" 

fullHelp = check_cpu_version + intro + preamble + use +  options + bugs + query





def helpFunc(option, opt, value, parser):
        print fullHelp
        sys.exit(3)

def verFunc(option, opt, value, parser):
        print check_cpu_version
        sys.exit(3)




#######################################
###  Parse all the parameters.  Define
###  variables for later use.



parser = OptionParser(add_help_option=0)

parser.add_option("-h", "--help", action="callback", callback=helpFunc)
parser.add_option("-V", "--version", action="callback", callback=verFunc)
parser.add_option("-w", "--warning", action="store", type="int", dest="warn", 
default=-1)
parser.add_option("-c", "--critical", action="store", type="int", dest="crit", 
default=-2)
parser.add_option("-s", "--sample", action="store", type="int", dest="sample", 
default=1)
parser.add_option("-t", "--timeout", action="store", type="int", 
dest="timeout", default=30)

(options, args) = parser.parse_args()

critical = options.crit
warning = options.warn
sample = options.sample
timeout = options.timeout


if -3 > critical or 101 < critical:
        print "Critical value is a percentage and must be between 0 and 100.\n" 
+ use
        sys.exit(3)

if -2 > warning or 101 < warning:
        print "Warning value is a percentage and must be between 0 and 100.\n" 
+ use
        sys.exit(3)

if critical >= warning:
        print "Critical value must be less than the warning value.\n" + use 
        sys.exit(3)

if sample >= timeout -1:
        print "Sample time must be 2 seconds less than timeout.\n" + use 
        sys.exit(3)

strSample = str(sample)


#######################################
### Determine what OS we are being run
### on, to figure syntax required.


v = os.popen("uname", "r")
l = v.read()

if l == "Linux\n":
        vmcmd = "/usr/bin/vmstat " + strSample + " 4"
elif l == "OpenBSD\n":
        vmcmd = "/usr/bin/vmstat -c 2 -w " + strSample 
elif l == "FreeBSD\n":
        vmcmd = "/usr/bin/vmstat -c 2 -w " + strSample 
elif l == "NetBSD\n":
        vmcmd = "/usr/bin/vmstat -c 2 -w " + strSample 
elif l == "SunOS\n":
        vmcmd = "/usr/bin/vmstat " + strSample + " 2"
else:
        print "Unknown operating system, unable to continue.\n"
        sys.exit(3)


#######################################
### Grab the CPU sample and convert the 
### the relevent info to an int.


vmstat = os.popen( vmcmd, "r" )
statOut = vmstat.read()


statLines = string.split( statOut, '\n')
lastLine = statLines[-2]

n = len(statLines)
for i in range(0, n):
        idcheck = re.search("id", statLines[i])
        if idcheck:
                idLine = statLines[i]



idStr = re.sub( "[ \t\n\r\v]+", " ", idLine )
idList = string.split(idStr, " ")
idColumn = idList.index("id")

valueStr = re.sub( "[ \t\n\r\v]+", " ", lastLine )
values = string.split( valueStr, ' ')

idleCPU = values[idColumn]
sysCPU = values[-3]
userCPU = values[-4]
waCPU = values[-1]

idleValue = string.atoi(idleCPU)


#######################################
### Finally, determine and report CPU
### state,  and exit the plugin.


if idleValue <= critical:
        status = "CPU CRITICAL"
        #uncomment to see command and output
        #print vmcmd
        #print statOut
        print status + ": CPU is " + idleCPU + "% idle|idle=" + idleCPU + " 
sys=" + sysCPU + " user=" + userCPU + " wa=" + waCPU
        sys.exit(2) 
elif idleValue <= warning:
        status = "WARNING"
        #uncomment to see command and output
        #print vmcmd
        #print statOut
        print status + ": CPU is " + idleCPU + "% idle|idle=" + idleCPU + " 
sys=" + sysCPU + " user=" + userCPU + " wa=" + waCPU
        sys.exit(1)
elif warning < idleValue:
        status = "OK"
        #uncomment to see command and output
        #print vmcmd
        #print statOut
        print status + ": CPU is " + idleCPU + "% idle|idle=" + idleCPU + " 
sys=" + sysCPU + " user=" + userCPU + " wa=" + waCPU 
        sys.exit(0)
else:
        status = "CPU STATUS UNKNOWN"
        print status + ": Could not complete check."
        sys.exit(3)




-------------------- m2f --------------------

Read this topic online here:
http://forums.zenoss.com/viewtopic.php?p=30665#30665

-------------------- m2f --------------------



_______________________________________________
zenoss-users mailing list
[email protected]
http://lists.zenoss.org/mailman/listinfo/zenoss-users

Reply via email to