I wrote one.  I cant find it online anymore but here it is:

Tries to use top if in linux, in solaris and aix i have a new version of
top installed into my nagios tree, otherwise it falls back on prstat or ps

feel free to use, might be buggy though!


On Fri, 13 Aug 2010 12:11:37 +0200, Sebastian Ries
<[email protected]> wrote:
> Hi
> 
>> According to the documentation, the -n option is used to give a regex
>> which selects which process(es) you are monitoring and the -u option
>> should work on the sum of the CPU for those selected processes only.
>> 
>> The example given is:
>> 
>> ./check_snmp_process.pl -H 127.0.0.1 -C public -n http -w 3,8 -c 0,15
>> -m 9,25 -u 70,99
> 
> This is what I tried but as I described all processes have the same
> name :-(
> 
>> which should alert if the total cpu of all processes haveing 'http' in
>> the name is > 70.
> 
> And I want to get an alert if ANY of these processes uses more than the
> given value.
> 
>> To get the plugin to do exactly what you describe, you will need to
>> edit the perl code a bit.  Bear in mind that you can never have more
>> than one process using >90% cpu in the same period!
> 
> Not really true ;-)
> As this machine has 8 real CPU-Cores with HT enabled there can be up to
> 16 Processes with more than 90% CPU usage ;-)
> 
> I will have a look in the perl code. Maybe it is a only small
> adaption...
> 
> Regards
> Sebastian Ries
#!/opt/nagios/bin/perl
#

use strict;
use Getopt::Long;
use POSIX "uname";
use vars qw($PROGNAME);
use lib "/opt/nagios/libexec";
use utils qw (%ERRORS &print_revision &support &usage);

sub print_help ();
sub print_usage ();

# set variables
my ($opt_c, $opt_w, $opt_a, $opt_m, $opt_p, $opt_h, $opt_V, $PROGVER, $opt_t, 
$deftype);
my ($line, $pid, $user, $prior, $nice, $vsz, $rss, $shr, $state, $cpu, $mem, 
$time, $cmd, @args);
my ($chkcmd, $tty, $start, $tty, $date1, $date2, $date3, $startm, $startd);
my ($avg, $m, $sumavg, %add, $message, @arrcmd, $size, $pri, $lwp, $uid, $args, 
$wcpu);
my $loop = 0;
my $sum = 0;
my $pidcount = 0;
my $exitcode = 0;
my $opt_i = 2;
my $opt_d = 1;

$PROGNAME="check_procs_usage";
$PROGVER="1.0";

# set commands to gather data for each OS
my ($sysname, $nodename, $release, $version, $machine ) = uname;

Getopt::Long::Configure('bundling');
GetOptions(
        "V"   => \$opt_V, "version"     => \$opt_V,
        "h"   => \$opt_h, "help"        => \$opt_h,
        "m=s" => \$opt_m, "metric=s"    => \$opt_m,
        "w=f" => \$opt_w, "warn=f"      => \$opt_w,
        "c=f" => \$opt_c, "crit=f"      => \$opt_c,
        "p=s" => \$opt_p, "pidname=s"   => \$opt_p,
        "a=s" => \$opt_a, "args=s"      => \$opt_a,
        "d=f" => \$opt_d, "delay=f"     => \$opt_d,
        "t=s" => \$opt_t, "type=s"      => \$opt_t,
        "i=f" => \$opt_i, "iter=f"      => \$opt_i);

if ($sysname =~ /Linux/) {

        $deftype = "top";

        if (($opt_t =~ /$deftype/) || ($opt_t eq "")) {

                $chkcmd = "/usr/bin/top -b -c -n $opt_i -d $opt_d";
        }
        else {
                print "*** $sysname only supports $deftype ***\n"
        }
}
elsif ($sysname =~ /SunOS/) {

        $deftype = "top";

        if (($opt_t =~ /$deftype/) || ($opt_t eq "")) {
                $chkcmd = "/opt/nagios/bin/top -b -c -C -u -d $opt_i -s $opt_d 
all";
        }
        elsif ($opt_t =~ /prstat/) {
                $chkcmd = "/usr/bin/prstat -n 10000 $opt_d $opt_i | 
/usr/bin/tee";
        }
        else {
                printf "*** $sysname doesn't support $opt_t ***\n";
        }

}
elsif ($sysname =~ /AIX/) {

        $deftype = "ps";

        if (($opt_t =~ /$deftype/) || ($opt_t eq "")) {
                $chkcmd = "/usr/bin/ps auxww";
        }
        elsif ($opt_t =~ /top/) {
                $chkcmd = "/opt/nagios/bin/top -b -c -C -u -d $opt_i -s $opt_d 
all";
        }
        else {
                print "*** $sysname does not support $opt_t ***\n";
        }       
}
else {
        printf "$sysname not supported";
}

if ($opt_V) {
        print "$PROGNAME $PROGVER\n";
        exit $ERRORS{'UNKNOWN'};
}

if ($opt_h) {
        print_help();
        exit $ERRORS{'UNKNOWN'};
}

# if required options not set print usage and exit
if ((! $opt_w) || (! $opt_c) || (! $opt_m) || (! $opt_p)) {
        print_usage();
        exit $ERRORS{'UNKNOWN'};
}

# exit if any of the input does not match requirements
if (($opt_w !~ /[0-9]/) || ($opt_c !~ /[0-9]/) || ($opt_m !~ /(CPU|MEM)/) || 
($opt_i !~ /[0-9]/) || ($opt_d !~ /[0-9]/)) {

        print_help;
        print "\n***unsupported options: warn($opt_w) crit($opt_c) 
metric($opt_m) delay($opt_d) iterations($opt_i) ***\n\n";
        exit $ERRORS{UNKNOWN};
}

sub print_usage () {
        print "\nusage:\n";
        print "  $PROGNAME -w <warn> -c <crit> -m <metric> -p <pidname> [-a 
pidarg] [-d delay] [-i iterations] [-t type] \n";
        print "  $PROGNAME [-h | --help]\n";
        print "  $PROGNAME [-V | --version]\n\n";
}

sub print_help () {
        print "\n$PROGNAME $PROGVER\n\n";
        print "Kyle O'Donnell (03-05-2009)\n";
        print_usage();
        print "\n";
        print "  warn\t\tpercent <metric> used resulting in warning state\n";
        print "  crit\t\tpercent <metric> used resulting in critical status\n";
        print "  metric\tCPU or MEM\n";
        print "  pidname\tname of process to search for\n";
        print "  pidarg\targument of a process fot search for\n";
        print "  delay\t\tdelay in seconds between polling of data (default: 
1)\n";
        print "  iter\t\tnumber of iterations of the top output (default: 2)\n";
        print "  type\t\tthe command type to use for obtaining metrics 
(default: see below)\n\n";
        print "example:  $PROGNAME -w 80 -c 90 -m CPU -p nscd -d 2 -i 5\n";
        print "  monitor cpu usage of the nscd process, wait 2 secs between 
collection of 5 data sets\n\n";
        print "example:  $PROGNAME -w 80 -c 90 -m CPU -p java -a xyz\n";
        print "  monitor cpu usage of a java process with the argument ayx\n\n";

        if ($sysname =~ /SunOS/) {
        
                print "*** SunOS detected ... ***\n";
                print "*** MEM is not supported! ***\n";
                print "*** types supported top ps prstat (default: top) ***\n";
                print "*** prstat does not print arguments -a ignored! ***\n\n";
        }

        if ($sysname =~ /AIX/) {

                print "*** AIX detected ... ***\n";
                print "*** types supported: top* ps (default: ps) ***\n";
                print "*** man ps to determine if the way CPU% is calcuated 
matches your requirements ***\n\n";
                print "*** expirimental* unixtop does not work fully in AIX 
***\n";
                print "*** top does not print command names properly ***\n";
                print "*** top does not print MEM% -m MEM ignored! ***\n";
                print "*** top does not print command arguments ***\n\n";
        }

        if ($sysname =~ /Linux/) {

                print "*** Linux detected ... ***\n";
                print "*** types supported: top (default: top) ***\n\n";
        }
        
}

####################################################
#
# get data by running the command and looping where appropriate
#                                                  
####################################################

sub get_data () {

my @output = `$chkcmd`;

# this is the most accurate because we can output multiple instaces of top and 
loop through each set
# i have chosen to considered process or processes with the same arguments as 
one metric
# ie: if there are 8 processes with the word httpd and each is consuming 1.0% 
memory this script
# considers httpd to be consuming 8.0% memory (8 processes * 1.0% memory)


if ($sysname =~ /Linux/) {

        foreach $line (@output) {

                chomp $line;

                # start the loop on PID, and calculate the sums for each 
processes until the next PID
                # PID is a unique identifier used to determine when the next 
dataset from 'top' starts

                if ($line =~ /PID/) {

                        $add{$loop} = $sum;
                        undef $sum;
                        $loop++;
                }

                else {
                        $line =~ s/^[\s]+//g;

                        ($pid, $user, $prior, $nice, $vsz, $rss, $shr, $state, 
$cpu, $mem, $time, $cmd, @args) =  split(/[\s]+/, $line);

                        if ($cmd =~ /^\//) {

                                # extract the binary name of the command 
running if path is present                        
                                @arrcmd = split(/\/+/, $cmd);
                                $cmd = $arrcmd[$#arrcmd];
                        }

                        if (!defined($opt_a)) {
                
                                if (($cmd =~ /$opt_p/) && ($line !~ 
/$PROGNAME/)) {             
                                
                                        if ($opt_m eq "CPU") { $sum = $sum + 
$cpu; }
                                        if ($opt_m eq "MEM") { $sum = $sum + 
$mem; }

                                        # this variable is used to determine 
how many $opt_p's are running
                                        $pidcount = $pidcount + 1;
                                }
                                else {
                                        $message = "OK - process $opt_p is not 
running";
                                }
                        }
                        elsif (defined($opt_a)) {
                        
                                if (($cmd =~ /$opt_p/) && ("@args" =~ /$opt_a/) 
&& ($line !~ /$PROGNAME/)) {
                                
                                        if ($opt_m eq "CPU") { $sum = $sum + 
$cpu; }
                                        if ($opt_m eq "MEM") { $sum = $sum + 
$mem; }
                                        $pidcount = $pidcount + 1;
                                }
                                else {
                                        $message = "OK - process $opt_p with 
argument $opt_a is not running";
                                }
                        }
                        else {
                                print "something whacky!";
                        }

                } #else

        } #foreach $line

        # this is sneaky

        $add{$loop} = $sum;

        foreach $m (keys %add) {
                if ($m > 0) {
                        $sumavg = $add{$m} + $sumavg;
                }
        } #foreach $m

        # calculate average based on sum divided by iterations
        $avg = $sumavg / $opt_i;

        # since linux uses top and can get multiple datasets we divide 
$pidcount by $loop to normalize the number

        $pidcount = $pidcount / $loop;

} #if Linux

elsif ($sysname =~ /AIX/) {

# aix doesn't have top, or anything equiv out of the box
# I've decided to use ps with berkeley options as this gives me the data needed
# since this is only one data set it is less accurate as Linux 
# because we do not calculate average based iterations

foreach $line (@output) {

        chomp $line;

        if ($line =~ /PID/) {

                $add{$loop} = $sum;
                undef $sum;
                $loop++;
        }

        else {

                $line =~ s/^[\s]+//g;
                                    
                if ($chkcmd =~ /top/) {
                                       
                        # top 3.6.1 values
                        ($pid, $uid, $pri, $nice, $size, $rss, $state, $time, 
$wcpu, $cpu, $cmd) = split(/[\s]+/, $line);
                }
               
                else {
                        # ps values
                        ($user, $pid, $cpu, $mem, $vsz, $rss, $tty, $state, 
$start, $time, $cmd, @args) =  split(/[\s]+/, $line);

                        if ($start =~ /[a-z]/) {
                                ($user, $pid, $cpu, $mem, $vsz, $rss, $tty, 
$state, $startm, $startd, $time, $cmd, @args) =  split(/[\s]+/, $line);
                        }
                        chomp @args;
                }
              
                if ($cmd =~ /^\//) {

                        @arrcmd = split(/\/+/, $cmd);
                        $cmd = $arrcmd[$#arrcmd];
                }

                if (!defined($opt_a)) {

                        if (($cmd =~ /$opt_p/) && ($line !~ /$PROGNAME/)) {

                                if ($opt_m eq "CPU") { $sum = $sum + $cpu; }
                                if ($opt_m eq "MEM") { $sum = $sum + $mem; }
                                $pidcount = $pidcount + 1;
                        }

                        else { 
                                $message = "OK - process $opt_p is not running";
                        }

                } #if !defined

                elsif (defined($opt_a)) {

                        if ($chkcmd =~ /ps/) {

                                if (($cmd =~ /$opt_p/) && (@args =~ /$opt_a/) 
&& ($line !~ /$PROGNAME/)) {
        
                                        if ($opt_m eq "CPU") { $sum = $sum + 
$cpu; }
                                        if ($opt_m eq "MEM") { $sum = $sum + 
$mem; }
                                        $pidcount = $pidcount + 1;
                                }

                                else {
                                        $message = "OK - process $opt_p with 
argument $opt_a is not running";
                                }

                        } #if chkcmd ps
                        
                        else {
                                print "*** -a option cannot be used with top 
***\n";
                                exit 1;

                        } #else chkcmd ps

                } #elsif defined'

                else {
                        print "something whacky!";
                }

        } #else PID

} #foreach $line

$add{$loop} = $sum;

foreach $m (keys %add) {
        if ($m > 0) {
                $sumavg = $add{$m} + $sumavg;
        }
} #foreach $m

$avg = $sumavg;

$pidcount = $pidcount / $loop


} #elsif AIX

elsif ($sysname =~ /SunOS/) {

foreach $line (@output) {

        chomp $line;

        # start the loop on PID, and calculate the sums for each processes 
until the next PID
        # PID is a unique identifier used to determine when the next dataset 
from 'prstat' starts

        if ($line =~ /PID/) {

                $add{$loop} = $sum;
                undef $sum;
                $loop++;
        } #if PID

        else {

                $line =~ s/^[\s]+//g;

                if ($chkcmd =~ /top/) {
                
                        # top 3.6.1 values
                        ($pid, $uid, $lwp, $pri, $nice, $size, $rss, $state, 
$time, $cpu, $cmd, @args) = split(/[\s]+/, $line);
                        chomp @args;
                } 

                else {
                        # prstat values
                        ($pid, $user, $size, $rss, $state, $pri, $nice, $time, 
$cpu, $cmd) =  split(/[\s]+/, $line);
                }

                if ($cmd =~ /^\//) {
                
                        @arrcmd = split(/\/+/, $cmd);
                        $cmd = $arrcmd[$#arrcmd];

                } #if $cmd

                if ($cpu =~ /%/) {

                        $cpu =~ s/%//;
                }

                if (!defined($opt_a)) {
        
                        if (($cmd =~ /$opt_p/) && ($line !~ /$PROGNAME/)) {

                                if ($opt_m eq "CPU") { $sum = $sum + $cpu; }
                                if ($opt_m eq "MEM") { $sum = $sum + $mem; }

                                # this variable is used to determine how many 
$opt_p's are running
                                $pidcount = $pidcount + 1;
                        }
                        else {
                                $message = "OK - process $opt_p is not running";
                        }

                } #if !defined

                elsif (defined($opt_a)) {

                        if ($chkcmd =~ /top/) {

                                if (($cmd =~ /$opt_p/) && ("@args" =~ /$opt_a/) 
&& ($line !~ /$PROGNAME/)) {
                
                                        if ($opt_m eq "CPU") { $sum = $sum + 
$cpu; }
                                        if ($opt_m eq "MEM") { $sum = $sum + 
$mem; }
                        
                                        $pidcount = $pidcount + 1;
                                }
                                else {
                                        $message = "OK - process $opt_p with 
argument $opt_a is not running";
                                }

                        } #if chkcmd top

                        else {
                                print "*** -a option cannot be used with 
prstat, please install top ***\n";
                                exit 1;

                        } #else chkcmd top

                } #elsif defined

                else {
                        print "something whacky\n";
                }
        
        } #else PID

} #foreach $line

$add{$loop} = $sum;

foreach $m (keys %add) {

        if ($m > 0) {
                $sumavg = $add{$m} + $sumavg;
        }

} #foreach $m

$avg = $sumavg / $opt_i;

$pidcount = $pidcount / $loop;


} #elsif SunOS

else {
        printf "what did i tell you about $sysname!\n";
}
                
} #get_data

####################################################
#
# this sub routine just evaluates output and exits with appropriate 
message/exitcode
#
####################################################

sub do_math() {

get_data();

if (!defined($sum)) {
        
        $message = $message;
}
elsif ($avg < $opt_w) {

        if (!defined($opt_a)) {

                $message = "OK - $pidcount instance(s) of $opt_p using $avg% 
$opt_m (<$opt_w%)";
        }

        elsif (defined($opt_a)) {
                
                $message = "OK - $pidcount instance(s) of $opt_p with arg 
$opt_a using $avg% $opt_m (<$opt_w%)";
        }
}
elsif ($avg > $opt_c) {

        if (!defined($opt_a)) {

                $message = "CRITICAL - $pidcount instance(s) of $opt_p using 
$avg% $opt_m (>$opt_c%)";
        }
        elsif (defined($opt_a)) {

                $message = "CRITICAL - $pidcount instance(s) of $opt_p with arg 
$opt_a using $avg% $opt_m (>$opt_c%)";
        }
        
        $exitcode = 2;          
}
elsif ($avg > $opt_w) {

        if (!defined($opt_a)) {
        
                $message = "WARNING - $pidcount instance(s) of $opt_p using 
$avg% $opt_m (>$opt_w%)";
        }
        elsif (defined($opt_a)) {
        
                $message = "WARNING - $pidcount instance(s) of $opt_p with arg 
$opt_a using $avg% $opt_m (>$opt_w%)";
        }

        $exitcode = 1;
}
else {
        printf "UNKNOWN - logic broken";
        $exitcode = 3;
}

} #do_math

do_math();

print "$message\n";
exit $exitcode;
------------------------------------------------------------------------------
This SF.net email is sponsored by 

Make an app they can't live without
Enter the BlackBerry Developer Challenge
http://p.sf.net/sfu/RIM-dev2dev 
_______________________________________________
Nagios-users mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/nagios-users
::: Please include Nagios version, plugin version (-v) and OS when reporting 
any issue. 
::: Messages without supporting info will risk being sent to /dev/null

Reply via email to