oneohtwo:~# omreport storage vdisk
List of Virtual Disks in the System
[ ... ]
When there is a problem, "Status ? ? ? ? ? ? ?: Ok" change to "Status
? ? ? ?: Failure" ?
Non-Critical , and I think Critical, i whould report anything other
than Ok as an error to be sure
I agree in general on sending alerts for anything other than OK. One
thing to watch for, in this particular case, is that if you upgrade
Dell OpenManage, you may start getting Non-Critical/Degraded being reported
if didn't recently upgrade firmware on the PERC controller. While upgrading
the firmware is the best solution to this, that is something that I like to
schedule downtime for (firmware upgrades make me nervous), and I have had
PERCs running with Dell OM reporting out of date firmware for some time,
and have hacked my check script to check for and optionally ignore these.
(In these cases, the controller reports Non-critical/degraded, but no
subcomponents show issues. Also, it reports Minimum required version fields
for FW, etc.)
Attached is a nagios nrpe script I use.
Tom Payerle
OIT-TSS-DCS [email protected]
University of Maryland (301) 405-6135
College Park, MD 20742-4111
#!/usr/local/bin/perl -T
# nrpe script to do a Nagios check on system hw health using Dell OpenManage
#srvadmin omreport tool
use strict;
use warnings;
#use utils qw(%ERRORS print_revision support );
use lib '/cell_root/software/dellopenmanage/scripts';
use tp_utils qw(noshell_backticks %ERRORS);
use Getopt::Long;
my $VERSION='$Revision: 1.1 $'; #Updated by RCS
my $TIMEOUT = 15;
my $DEBUG=0;
sub usage()
{ print STDERR <<EOF;
Checks basic hardware health of system with Dell OpenManage srvadmin omreport
tool. Meant to be run on remote host via NRPE for Nagios.
In chassis mode, checks for status info about fans, memory, processors, power
supplies, batteries, temperature sensors, and voltage sensors. (AC switch
and current not currently checked as not running on any systems supporting
that). In --temp mode, only checks the temperature sensors (a subset of the
--chassis checks). Note that need to use omconfig, etc. to set threshholds
for temperature and other sensors.
In storage mode, checks the status of all storage controllers, along with
the physical disks and connectors connected to the controller. Also checks
the batteries, virtual disks, and enclosure status.
Basically, if we see anything amiss, will treat as critical.
Usage:
$0 flags
where allowed flags are:
--help: print this text and exit
--chassis: Run chassis checks
--temperature: Run temperature checks only (included in --chassis)
--storage: Run storage checks
--perfdata: If set, will include any performance data results in
the NRPE output
--quiet: Supress warning messages when running code
--debug: DEBUG mode, will likely screw up NRPE
--ignore-degraded-controller: Hack to ignore degraded PERC controller.
"Degraded" flag set if driver/firmware out-of-date, and this
is a longer term issue might not want to be paged about.
At least one of --chassis or --storage must be set to do anything meaningful.
EOF
}
#--------------------------------------------------------------------
# general omreport routines
#--------------------------------------------------------------------
sub run_omreport_cmd(@)
#Sends args as args to an omreport command.
#Returns as list ref the output of the ipmitool command, one element per line.
#Returns a non-list ref value (an error string) on error
{ my @args = @_;
# my $cmd='/opt/dell/srvadmin/oma/bin/omreport';
# Use standard glue version of omreport
my @cmd= ( '/cell_root/software/dellopenmanage/scripts/omreport' );
my @results = noshell_backticks(@cmd, @args);
my $rc = shift @results;
my $argstr = join ' ', @args;
#Ignore return code.
#storage connector and pdisk seem to give non-zero return codes
#even when all is well
#Likely get non zero return code if something wrong also, but
#we still want to continue to parse the results
#return "Bad return code from $cmd $argstr: $rc" if $rc;
chomp @results;
if ( $results[0] eq 'Error! Invalid command: omreport' )
{ #This means openmanage all %^#%&#*#$&#$ up again
return "Dell OpenManage not working";
}
return [ @results ];
}
sub process_generic_output($$)
#Given a list ref of output from an omreport command, it does some basic
#parsing, breaking down into global status and per unit status hashes.
#These status hashes consist of field name => value, and are returned
#as a list ref, the first element being the global status (possibly undef),
#and the rest being the hashes for each unit found.
#
#The argument $initfield should contain the first field in any per unit
#status hash, and this is used in parsing the output into global and per unit
#sections
{ my $lines = shift;
my $initfield = shift;
my $hashlist = [];
my $curhash = {};
my ( $line, $field, $value );
foreach $line (@$lines)
{ next unless $line=~/\:/;
$line=~s/^\s*//; $line=~s/\s*$//;
($field, $value) = split /\s*\:\s*/, $line;
if ( $field eq $initfield)
{ #Starting a new unit
push @$hashlist, $curhash;
$curhash = {};
}
$curhash->{$field} = $value;
}
#And store final unit hash
push @$hashlist, $curhash;
return $hashlist;
}
#--------------------------------------------------------------------
# omreport chassis command routines
#--------------------------------------------------------------------
sub check_chassis_fans()
#Issues an omreport chassis fans command, then checks output for problems.
#Returns 2 list refs, the first a list of any problems found, the second
#a list of perfdata [ key => value ] pairs.
{ my @args=( 'chassis', 'fans' );
my $lines = run_omreport_cmd(@args);
my $abbr = "fans";
return (["$abbr:$lines"],[]) unless $lines && ref($lines) eq 'ARRAY';
my $hlist = process_generic_output($lines,'Index');
return ( ["$abbr:Bad pgo out: $hlist" ], [] )
unless $hlist && ref($hlist) eq 'ARRAY' && scalar(@$hlist);
my $global = shift @$hlist;
my $errors = [];
my $perfdata = [];
my ( $unit, $index, $status, $name, $reading);
foreach $unit (@$hlist )
{ $index = $unit->{Index};
$status = $unit->{Status};
#$name = $unit->{'Probe Name'};
$name = "fan" . $index;
$reading = $unit->{'Reading'};
if ( $reading=~/^\d+\s+RPM$/ )
{ $reading=~s/\s+RPM$//;
push @$perfdata, [ $name, $reading ];
}
if ( $status ne 'Ok' )
{ push @$errors, "$name status=$status";
}
}
print STDERR "Errors in chassis fans\n" if $DEBUG && scalar(@$errors);
return ( $errors, $perfdata );
}
sub check_chassis_memory()
#Issues an omreport chassis memory command, then checks output for problems.
#Returns 2 list refs, the first a list of any problems found, the second
#a list of perfdata [ key => value ] pairs.
#(No perfdata collected in this routine)
{ my @args=( 'chassis', 'memory' );
my $lines = run_omreport_cmd(@args);
my $abbr = "mem";
return (["$abbr:$lines"],[]) unless $lines && ref($lines) eq 'ARRAY';
my $hlist = process_generic_output($lines,'Index');
return ( ["$abbr:Bad pgo out: $hlist" ], [] )
unless $hlist && ref($hlist) eq 'ARRAY' && scalar(@$hlist);
my $global = shift @$hlist;
my $errors = [];
my $perfdata = [];
unless ( $global->{Health} eq 'Ok' )
{ my $tmp = $global->{Health} || 'unknown';
push @$errors, "Mem Health is $tmp, not Ok";
}
my ( $unit, $index, $status, $name, $type);
foreach $unit (@$hlist )
{ $index = $unit->{Index};
$status = $unit->{Status};
$name = $unit->{'Connector Name'};
$type = $unit->{'Type'};
next if $type eq '[Not Occupied]'; #Skip empty slots
if ( $status ne 'Ok' )
{ push @$errors, "$name status=$status";
}
}
print STDERR "Errors in chassis memory\n" if $DEBUG && scalar(@$errors);
return ( $errors, $perfdata );
}
sub check_chassis_processors()
#Issues an omreport chassis processors command, then checks output for problems.
#Returns 2 list refs, the first a list of any problems found, the second
#a list of perfdata [ key => value ] pairs.
#(No perfdata collected in this routine)
{ my @args=( 'chassis', 'processors' );
my $lines = run_omreport_cmd(@args);
my $abbr = "cpu";
return (["$abbr:$lines"],[]) unless $lines && ref($lines) eq 'ARRAY';
my $hlist = process_generic_output($lines,'Index');
return ( ["$abbr:Bad pgo out: $hlist" ], [] )
unless $hlist && ref($hlist) eq 'ARRAY' && scalar(@$hlist);
my $global = shift @$hlist;
my $errors = [];
my $perfdata = [];
unless ( $global->{Health} eq 'Ok' )
{ my $tmp = $global->{Health} || 'unknown';
push @$errors, "CPU Health is $tmp, not Ok";
}
my ( $unit, $index, $status, $name, $state, $manu);
foreach $unit (@$hlist )
{ $index = $unit->{Index};
$status = $unit->{Status};
$manu = $unit->{'Processor Manufacturer'} ||
$unit->{'Processor Brand'} || '';
next if $manu eq '[Not Occupied]';
$name = $unit->{'Connector Name'};
$state = $unit->{'State'};
if ( $status ne 'Ok' )
{ push @$errors, "$name status=$status";
}
}
print STDERR "Errors in chass procs\n" if $DEBUG && scalar(@$errors);
return ( $errors, $perfdata );
}
sub check_chassis_batteries()
#Issues an omreport chassis batteries command, then checks output for problems.
#Returns 2 list refs, the first a list of any problems found, the second
#a list of perfdata [ key => value ] pairs.
#(No perfdata collected in this routine)
{ my @args=( 'chassis', 'batteries' );
my $lines = run_omreport_cmd(@args);
my $abbr = "bat";
return (["$abbr:$lines"],[]) unless $lines && ref($lines) eq 'ARRAY';
my $hlist = process_generic_output($lines,'Index');
return ( ["$abbr:Bad pgo out: $hlist" ], [] )
unless $hlist && ref($hlist) eq 'ARRAY' && scalar(@$hlist);
my $global = shift @$hlist;
my $errors = [];
my $perfdata = [];
if ( scalar(keys %$global) )
{ #We skip this test if nothing in $global, test not supported
unless ( $global->{Health} eq 'Ok' )
{ my $tmp = $global->{Health} || 'unknown';
push @$errors, "Batt Health is $tmp, not Ok";
}
}
my ( $unit, $index, $status, $name, $reading);
foreach $unit (@$hlist )
{ $index = $unit->{Index};
$status = $unit->{Status};
$name = $unit->{'Connector Name'};
$reading = $unit->{'Reading'};
if ( $status ne 'Ok' )
{ push @$errors, "$name status=$status";
}
if ( $reading ne 'Good' )
{ push @$errors, "$name Reading=$reading";
}
}
print STDERR "Errors in chass batts\n" if $DEBUG && scalar(@$errors);
return ( $errors, $perfdata );
}
sub check_chassis_power_supplies()
#Issues an omreport chassis pwrsupplies command, then check output for problems.
#Returns 2 list refs, the first a list of any problems found, the second
#a list of perfdata [ key => value ] pairs.
#(No perfdata collected in this routine)
{ my @args=( 'chassis', 'pwrsupplies' );
my $lines = run_omreport_cmd(@args);
my $abbr = "ps";
return (["$abbr:$lines"],[]) unless $lines && ref($lines) eq 'ARRAY';
my $hlist = process_generic_output($lines,'Index');
return ( ["$abbr:Bad pgo out: $hlist" ], [] )
unless $hlist && ref($hlist) eq 'ARRAY' && scalar(@$hlist);
my $global = shift @$hlist;
my $errors = [];
my $perfdata = [];
if ( scalar(keys %$global) )
{ #Skip tests if $global={}, tests not supported
my $tmp="Main System Chassis Power Supplies";
unless ( $global->{$tmp} eq 'Ok' )
{ my $tmp = $global->{$tmp} || 'unknown';
push @$errors, "Main PS is $tmp, not Ok";
}
$tmp="Power Supply Redundancy";
unless ( $global->{$tmp} eq 'Ok' )
{ my $tmp = $global->{$tmp} || 'unknown';
push @$errors, "PS Redun is $tmp, not Ok";
}
}
my ( $unit, $index, $status, $name, $state);
foreach $unit (@$hlist )
{ $index = $unit->{Index};
$status = $unit->{Status};
#$name = $unit->{'Location'};
$name = 'ps' . ( $index + 1);
$state = $unit->{'Online Status'};
if ( $status ne 'Ok' )
{ push @$errors, "$name status=$status";
}
if ( $state ne 'Presence Detected' )
{ push @$errors, "$name online stat=$state";
}
}
print STDERR "Errors in chass pwrsupp\n" if $DEBUG && scalar(@$errors);
return ( $errors, $perfdata );
}
sub check_chassis_temps()
#Issues an omreport chassis temps command, then check output for problems.
#Returns 2 list refs, the first a list of any problems found, the second
#a list of perfdata [ key => value ] pairs.
{ my @args=( 'chassis', 'temps' );
my $lines = run_omreport_cmd(@args);
my $abbr = "tmp";
return (["$abbr:$lines"],[]) unless $lines && ref($lines) eq 'ARRAY';
my $hlist = process_generic_output($lines,'Index');
return ( ["$abbr:Bad pgo out: $hlist" ], [] )
unless $hlist && ref($hlist) eq 'ARRAY' && scalar(@$hlist);
my $global = shift @$hlist;
my $errors = [];
my $perfdata = [];
my $tmp="Main System Chassis Temperatures";
unless ( $global->{$tmp} eq 'Ok' )
{ my $tmp = $global->{$tmp} || 'unknown';
push @$errors, "Main Temp is $tmp, not Ok";
}
my %varnames=
( 'System Board Ambient Temp' => 'temp_sysbd',
'BMC Ambient Temp' => 'temp_sysbd',
'BMC Planar Temp' => 'temp_planar',
'BMC Riser Temp' => 'temp_riser',
'PROC_1 Temp' => 'temp_cpu1',
'PROC_2 Temp' => 'temp_cpu2',
'PROC_3 Temp' => 'temp_cpu3',
'PROC_4 Temp' => 'temp_cpu4',
);
my ( $unit, $index, $status, $name, $reading);
foreach $unit (@$hlist )
{ $index = $unit->{Index};
$status = $unit->{Status};
$name = $unit->{'Probe Name'};
$reading = $unit->{Reading};
if ( $status ne 'Ok' )
{ push @$errors, "$name status=$status";
}
if ( $reading =~ /^[\d\.]+\s+C$/ )
{ $name=$varnames{$name} if exists $varnames{$name};
$name=~s/[^\w]//g; #Clean up name
$reading=~s/\s+C$//;
push @$perfdata, [ $name, $reading ];
}
}
print STDERR "Errors in chass temps\n" if $DEBUG && scalar(@$errors);
return ( $errors, $perfdata );
}
sub check_chassis_volts()
#Issues an omreport chassis volts command, then check output for problems.
#Returns 2 list refs, the first a list of any problems found, the second
#a list of perfdata [ key => value ] pairs.
#(No perfdata collected in this routine)
{ my @args=( 'chassis', 'volts' );
my $lines = run_omreport_cmd(@args);
my $abbr = "V";
return (["$abbr:$lines"],[]) unless $lines && ref($lines) eq 'ARRAY';
my $hlist = process_generic_output($lines,'Index');
return ( ["$abbr:Bad pgo out: $hlist" ], [] )
unless $hlist && ref($hlist) eq 'ARRAY' && scalar(@$hlist);
my $global = shift @$hlist;
my $errors = [];
my $perfdata = [];
my $tmp="Health";
unless ( $global->{$tmp} eq 'Ok' )
{ my $tmp = $global->{$tmp} || 'unknown';
push @$errors, "Main PS is $tmp, not Ok";
}
my %varnames=
( 'BMC CMOS Battery' => 'cmos_batt',
);
my ( $unit, $index, $status, $name, $reading);
foreach $unit (@$hlist )
{ $index = $unit->{Index};
$status = $unit->{Status};
$name = $unit->{'Probe Name'};
$reading = $unit->{'Reading'};
if ( $status ne 'Ok' )
{ push @$errors, "$name status=$status";
}
if ( $reading =~ /^[\d\.]+\s+V$/ )
{ $reading =~s/\s+V$//;
$name = $varnames{$name} if exists $varnames{$name};
$name=~s/[^\w]//g; #Clean up name
push @$perfdata, [ $name, $reading ];
}
#if ( $reading ne 'Good' )
#{ push @$errors, "$name reading=$reading";
#}
}
print STDERR "Errors in chass volts\n" if $DEBUG && scalar(@$errors);
return ( $errors, $perfdata );
}
sub check_chassis()
#Runs all the chassis mode tests.
#Returns two list refs, ($errors, $perfdata)
#Errors is a list of all problems found.
#Perfdata is a list of [ field=>value] pairs performance data
{
my $errors = [];
my $perfdata = [];
my ($tmperrs, $tmpperf);
#Trying to put in order of importance, but not really sure
($tmperrs, $tmpperf ) = check_chassis_processors;
push @$errors, @$tmperrs;
push @$perfdata, @$tmpperf;
($tmperrs, $tmpperf ) = check_chassis_fans;
push @$errors, @$tmperrs;
push @$perfdata, @$tmpperf;
($tmperrs, $tmpperf ) = check_chassis_volts;
push @$errors, @$tmperrs;
push @$perfdata, @$tmpperf;
($tmperrs, $tmpperf ) = check_chassis_temps;
push @$errors, @$tmperrs;
push @$perfdata, @$tmpperf;
($tmperrs, $tmpperf ) = check_chassis_memory;
push @$errors, @$tmperrs;
push @$perfdata, @$tmpperf;
($tmperrs, $tmpperf ) = check_chassis_power_supplies;
push @$errors, @$tmperrs;
push @$perfdata, @$tmpperf;
($tmperrs, $tmpperf ) = check_chassis_batteries;
push @$errors, @$tmperrs;
push @$perfdata, @$tmpperf;
return ($errors, $perfdata);
}
sub check_temp()
#Runs all the temp(erature) mode tests.
#Returns two list refs, ($errors, $perfdata)
#Errors is a list of all problems found.
#Perfdata is a list of [ field=>value] pairs performance data
{
my $errors = [];
my $perfdata = [];
my ($tmperrs, $tmpperf);
#Trying to put in order of importance, but not really sure
($tmperrs, $tmpperf ) = check_chassis_temps;
push @$errors, @$tmperrs;
push @$perfdata, @$tmpperf;
return ($errors, $perfdata);
}
#--------------------------------------------------------------------
# omreport storage command routines
#--------------------------------------------------------------------
sub check_storage_controller($)
#Issues an omreport storage controller command, then check output for problems.
#Returns 3 list refs, the first a list of any problems found, the second
#a list of perfdata [ key => value ] pairs, and the third a list of
#[ controller ID => name ] pairs.
#(No perfdata collected in this routine, so perfdata array is [])
#
#If $ignore_degraded is set, ignore the degraded/non-critical flag if
#looks like just a FW or driver out-of-date (Minimum required versions given)
{ my $ignore_degraded = shift;
my @args=( 'storage', 'controller' );
my $lines = run_omreport_cmd(@args);
my $abbr = "ctlr";
return (["$abbr:$lines"],[]) unless $lines && ref($lines) eq 'ARRAY';
my $hlist = process_generic_output($lines,'ID');
return ( ["$abbr:Bad pgo out: $hlist" ], [] )
unless $hlist && ref($hlist) eq 'ARRAY' && scalar(@$hlist);
my $global = shift @$hlist;
my $errors = [];
my $perfdata = [];
my $clist = [];
my ( $unit, $id, $status, $name, $state);
my ($old_fw, $tmp, $ignore_old_fw);
foreach $unit (@$hlist )
{ $id = $unit->{ID};
$status = $unit->{Status};
$name = $unit->{Name};
$state = $unit->{State};
push @$clist, [ $id, $name ];
#---- Check if looks like old FW/driver
#Default to false
$old_fw = 0;
#Set if either Min Req FW/Driver Versions listed
$tmp = $unit->{'Minimum Required Firmware Version'};
$old_fw = 1 if ( $tmp && $tmp ne 'Not Applicable');
$tmp = $unit->{'Minimum Required Driver Version'};
$old_fw = 1 if ( $tmp && $tmp ne 'Not Applicable');
#But only set if status is Non-critical and Degraded
$old_fw = 0 unless $status eq 'Non-Critical' &&
$state eq 'Degraded';
$ignore_old_fw = $old_fw && $ignore_degraded;
print STDERR "Ignoring degraded controller (old FW/driver?)\n"
if $ignore_old_fw && $DEBUG;
if ( $status ne 'Ok' )
{ push @$errors, "$name status=$status"
unless $ignore_old_fw;
}
if ( $state ne 'Ready' )
{ push @$errors, "$name state=$state"
unless $ignore_old_fw;
}
}
print STDERR "Errors in stor ctrlrs\n" if $DEBUG && scalar(@$errors);
return ( $errors, $perfdata, $clist );
}
sub check_storage_connector($)
#Issues an omreport storage connector command, then check output for problems.
#Returns 2 list refs, the first a list of any problems found, the second
#a list of perfdata [ key => value ] pairs
#
#Requires as input parameter the controller id
#
#(No perfdata collected in this routine, so perfdata array is [])
{ my $controller = shift;
my @args=( 'storage', 'connector', "controller=$controller" );
my $lines = run_omreport_cmd(@args);
my $abbr = "conn";
return (["$abbr:$lines"],[]) unless $lines && ref($lines) eq 'ARRAY';
my $hlist = process_generic_output($lines,'ID');
return ( ["$abbr:Bad pgo out: $hlist" ], [] )
unless $hlist && ref($hlist) eq 'ARRAY' && scalar(@$hlist);
my $global = shift @$hlist;
my $errors = [];
my $perfdata = [];
my ( $unit, $id, $status, $name, $state);
foreach $unit (@$hlist )
{ $id = $unit->{ID};
$status = $unit->{Status};
#$name = $unit->{Name};
$name = "Conn$id on ctrl$controller";
$state = $unit->{State};
if ( $status ne 'Ok' )
{ push @$errors, "$name status=$status";
}
if ( $state ne 'Ready' )
{ push @$errors, "$name state=$state";
}
}
print STDERR "Errors in stor conns ctrl-$controller\n"
if $DEBUG && scalar(@$errors);
return ( $errors, $perfdata );
}
sub check_storage_pdisk($)
#Issues an omreport storage pdisk command, then check output for problems.
#Returns 2 list refs, the first a list of any problems found, the second
#a list of perfdata [ key => value ] pairs
#
#Requires as input parameter the controller id
#
#(No perfdata collected in this routine, so perfdata array is [])
{ my $controller = shift;
my @args=( 'storage', 'pdisk', "controller=$controller" );
my $lines = run_omreport_cmd(@args);
my $abbr = "pdsk";
return (["$abbr:$lines"],[]) unless $lines && ref($lines) eq 'ARRAY';
my $hlist = process_generic_output($lines,'ID');
return ( ["$abbr:Bad pgo out: $hlist" ], [] )
unless $hlist && ref($hlist) eq 'ARRAY' && scalar(@$hlist);
my $global = shift @$hlist;
my $errors = [];
my $perfdata = [];
my ( $unit, $id, $status, $name, $state, $sn, $fpred);
foreach $unit (@$hlist )
{ $id = $unit->{ID};
$status = $unit->{Status};
$sn = $unit->{'Serial No.'};
#$name = $unit->{Name};
$name = "Disk $id ($sn)";
$state = $unit->{State};
$fpred = $unit->{'Failure Predicted'};
if ( $status ne 'Ok' )
{ push @$errors, "$name status=$status";
}
if ( $state ne 'Online' )
{ push @$errors, "$name state=$state";
}
if ( $fpred ne 'No' )
{ push @$errors, "$name fail pred=$fpred";
}
}
print STDERR "Errors in stor pdisk ctrl=$controller\n"
if $DEBUG && scalar(@$errors);
return ( $errors, $perfdata );
}
sub check_storage_vdisk()
#Issues an omreport storage vdisk command, then check output for problems.
#Returns 2 list refs, the first a list of any problems found, the second
#a list of perfdata [ key => value ] pairs
#
#(No perfdata collected in this routine, so perfdata array is [])
{ my @args=( 'storage', 'vdisk' );
my $lines = run_omreport_cmd(@args);
my $abbr = "vdsk";
return (["$abbr:$lines"],[]) unless $lines && ref($lines) eq 'ARRAY';
my $hlist = process_generic_output($lines,'ID');
return ( ["$abbr:Bad pgo out: $hlist" ], [] )
unless $hlist && ref($hlist) eq 'ARRAY' && scalar(@$hlist);
my $global = shift @$hlist;
my $errors = [];
my $perfdata = [];
my ( $unit, $id, $status, $name, $state);
foreach $unit (@$hlist )
{ $id = $unit->{ID};
$status = $unit->{Status};
$name = $unit->{Name};
$state = $unit->{State};
if ( $status ne 'Ok' )
{ push @$errors, "$name status=$status";
}
if ( $state ne 'Ready' )
{ push @$errors, "$name state=$state";
}
}
print STDERR "Errors in stor vdisk\n" if $DEBUG && scalar(@$errors);
return ( $errors, $perfdata );
}
sub check_storage_enclosure()
#Issues an omreport storage enclosure command, then check output for problems.
#Returns 2 list refs, the first a list of any problems found, the second
#a list of perfdata [ key => value ] pairs
#
#(No perfdata collected in this routine, so perfdata array is [])
{ my @args=( 'storage', 'enclosure' );
my $lines = run_omreport_cmd(@args);
my $abbr = "enc";
return (["$abbr:$lines"],[]) unless $lines && ref($lines) eq 'ARRAY';
my $hlist = process_generic_output($lines,'ID');
return ( ["$abbr:Bad pgo out: $hlist" ], [] )
unless $hlist && ref($hlist) eq 'ARRAY' && scalar(@$hlist);
my $global = shift @$hlist;
my $errors = [];
my $perfdata = [];
my ( $unit, $id, $status, $name, $state);
foreach $unit (@$hlist )
{ $id = $unit->{ID};
$status = $unit->{Status};
$name = $unit->{Name};
$state = $unit->{State};
if ( $status ne 'Ok' )
{ push @$errors, "$name status=$status";
}
if ( $state ne 'Ready' )
{ push @$errors, "$name state=$state";
}
}
print STDERR "Errors in stor enc\n" if $DEBUG && scalar(@$errors);
return ( $errors, $perfdata );
}
sub check_storage_battery()
#Issues an omreport storage enclosure command, then check output for problems.
#Returns 2 list refs, the first a list of any problems found, the second
#a list of perfdata [ key => value ] pairs
#
#(No perfdata collected in this routine, so perfdata array is [])
{ my @args=( 'storage', 'battery' );
my $lines = run_omreport_cmd(@args);
my $abbr = "sbat";
return (["$abbr:$lines"],[]) unless $lines && ref($lines) eq 'ARRAY';
my $hlist = process_generic_output($lines,'ID');
return ( ["$abbr:Bad pgo out: $hlist" ], [] )
unless $hlist && ref($hlist) eq 'ARRAY' && scalar(@$hlist);
my $global = shift @$hlist;
my $errors = [];
my $perfdata = [];
my ( $unit, $id, $status, $name, $state);
foreach $unit (@$hlist )
{ $id = $unit->{ID};
$status = $unit->{Status};
$name = $unit->{Name};
$state = $unit->{State};
if ( $status ne 'Ok' && $status ne 'Non-Critical' )
{ push @$errors, "$name status=$status";
}
if ( $state ne 'Ready' && $state ne 'Charging'
&& $state ne 'Learning' )
{ push @$errors, "$name state=$state";
}
}
print STDERR "Errors in stor batt\n" if $DEBUG && scalar(@$errors);
return ( $errors, $perfdata );
}
sub check_storage($)
#Runs all the storage mode tests.
#Returns two list refs, ($errors, $perfdata)
#Errors is a list of all problems found.
#Perfdata is a list of [ field=>value] pairs performance data (probably empty)
#
#If $ignore_degrade_perc set, warnings about a degraded PERC controller (usually
#firmware or driver out of date) are ignored.
{ my $ignore_degrade_perc = shift;
my $errors = [];
my $perfdata = [];
my ($tmperrs, $tmpperf);
my $clist;
#First check controllers and get list of them
($tmperrs, $tmpperf, $clist) = check_storage_controller(
$ignore_degrade_perc);
push @$errors, @$tmperrs;
push @$perfdata, @$tmpperf;
#Now run controller dependent tests
my ($rec, $cname, $cid);
foreach $rec (@$clist )
{ ($cid, $cname) = @$rec;
($tmperrs, $tmpperf) = check_storage_connector($cid);
push @$errors, @$tmperrs;
push @$perfdata, @$tmpperf;
($tmperrs, $tmpperf) = check_storage_pdisk($cid);
push @$errors, @$tmperrs;
push @$perfdata, @$tmpperf;
}
($tmperrs, $tmpperf) = check_storage_vdisk;
push @$errors, @$tmperrs;
push @$perfdata, @$tmpperf;
($tmperrs, $tmpperf) = check_storage_enclosure;
push @$errors, @$tmperrs;
push @$perfdata, @$tmpperf;
($tmperrs, $tmpperf) = check_storage_battery;
push @$errors, @$tmperrs;
push @$perfdata, @$tmpperf;
return ($errors, $perfdata);
}
#--------------------------------------------------------------------
# Start of program
#--------------------------------------------------------------------
#To make taint happy
$ENV{'PATH'}='/usr/bin:/bin:/usr/sbin:/sbin:/usr/local/bin';
#Read in command line arguments
my $help;
my $quiet=0;
my $chassis_mode = 0;
my $temp_mode = 0;
my $storage_mode = 0;
my $perfdata_mode = 0;
my $ignore_degrade_perc=0;
my $res = GetOptions(
'h|help!' => \$help,
'q|quiet!' => \$quiet,
'chassis|c!' => \$chassis_mode,
'storage|s!' => \$storage_mode,
'temp!' => \$temp_mode,
'perfdata|p!' => \$perfdata_mode,
'debug!' => \$DEBUG,
'ignore-degraded-controller!' => \$ignore_degrade_perc,
);
if ( ! $res )
{ usage();
print "Error parsing options";
exit $ERRORS{UNKNOWN};
}
unless ( $chassis_mode || $storage_mode || $temp_mode )
{ usage;
print "It is pointless to run without one of --chassis, --storage, or
--temp\n";
exit $ERRORS{UNKNOWN};
}
$temp_mode = 0 if $chassis_mode;
my $errors=[];
my $perfdata=[];
my ($tmperrs, $tmpperf);
if ( $chassis_mode )
{ ($tmperrs, $tmpperf) = check_chassis;
push @$errors, @$tmperrs;
push @$perfdata, @$tmpperf;
}
if ( $temp_mode )
{ ($tmperrs, $tmpperf) = check_temp;
push @$errors, @$tmperrs;
push @$perfdata, @$tmpperf;
}
if ( $storage_mode )
{ ($tmperrs, $tmpperf) = check_storage($ignore_degrade_perc);
push @$errors, @$tmperrs;
push @$perfdata, @$tmpperf;
}
my $outperf='';
if ( $perfdata_mode )
{ my @tmp = map { $_->[0] . '=' . $_->[1] } @$perfdata;
$outperf = join ' ', @tmp;
$outperf = "|$outperf";
}
my $outstat = 'CRITICAL';
my $outtext = join "; ", @$errors;
if ( scalar(@$errors) == 0 )
{ #Everything looks OK
$outstat='OK';
$outtext='System seems healthy';
$outtext='All temperatures within threshholds'
if ( $temp_mode && ! ( $chassis_mode || $storage_mode) );
}
print $outtext, $outperf;
exit $ERRORS{$outstat};
_______________________________________________
Linux-PowerEdge mailing list
[email protected]
https://lists.us.dell.com/mailman/listinfo/linux-poweredge
Please read the FAQ at http://lists.us.dell.com/faq