Deleted: trunk/opsview-core/nagios-plugins/check_opsview_slave
===================================================================
--- trunk/opsview-core/nagios-plugins/check_opsview_slave 2013-05-09 15:01:08 UTC (rev 12249)
+++ trunk/opsview-core/nagios-plugins/check_opsview_slave 2013-05-09 15:44:47 UTC (rev 12250)
@@ -1,207 +0,0 @@
-#!/usr/bin/perl
-#
-#
-# SYNTAX:
-# check_opsview_slave
-#
-# DESCRIPTION:
-# From master, runs a connection test to all slaves using the usual
-# connection method
-# TODO: Remove use of parallel::forker. Should be able to run
-# with open() pipes and select() to see if a command has finished
-# Major limitation right now is you cannot get stdout from the ssh
-# command, so a slave failure could be a connection problem or
-# an nsca error. Can see on command line
-#
-# TODO: Investigate use of IPC::Run instead of using Parallel::Forker
-#
-# AUTHORS:
-# Copyright (C) 2003-2013 Opsview Limited. All rights reserved
-#
-# This file is part of Opsview
-#
-# Opsview is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# Opsview is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with Opsview; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-#
-
-use strict;
-use FindBin qw($Bin);
-use lib "$Bin/../lib", "$Bin/../etc", "$Bin/../perl/lib";
-use Opsview::Connections;
-use Parallel::Forker;
-use Nagios::Plugin;
-use Time::HiRes qw(gettimeofday usleep);
-use Opsview::Config;
-
-my $np = Nagios::Plugin->new(
- usage =>
- "Usage: %s [-v|--verbose] [-h|--help] [-r|--restart_tunnels]\n Will check slave connection from master based on entries in connections.dat",
- shortname => "SLAVE",
-);
-
-$np->add_arg(
- spec => "restart_tunnels|r",
- help => "-r, --restart_tunnels\n Restart any hung ssh tunnels",
-);
-
-$np->add_arg(
- spec => "notimecheck|T",
- help => "-T, --notimecheck\n Do not check time synchronisation",
-);
-
-my @good_slaves = ();
-my @bad_slaves = ();
-
-my $alarm_triggered = 0;
-
-# Need to do this before $SIG{ALRM} below because N::P will define a default routine
-$np->getopts;
-
-my $Fork = Parallel::Forker->new;
-$SIG{CHLD} = sub { $Fork->sig_child() if $Fork };
-
-# We don't trap the TERM signal because children will inherit it
-#$SIG{TERM} = sub { $Fork->kill_tree_all('TERM') if $Fork; die "Quitting...\n" };
-$SIG{ALRM} = sub { $alarm_triggered++ };
-
-# Can test an ssh connection by trying to connect to a slave on port 5666
-# This causes ssh to timeout
-alarm $np->opts->timeout;
-
-my $start = gettimeofday;
-
-eval { @_ = Opsview::Connections->slaves };
-$np->nagios_exit( CRITICAL, "No slaves defined" ) if ($@);
-
-# Do this for each slave
-foreach my $slave ( Opsview::Connections->slaves ) {
- next unless $slave->active;
- my @cmd = $slave->ssh_command( "cat /usr/local/nagios/var/nsca.status" );
-
- $Fork->schedule(
- run_on_start => sub {
- setpgrp( 0, 0 ); # Set this as progress group leader
- my $errors = 0;
- open F, "-|", @cmd
- or die "Cannot run ssh command on master to slave\n";
- $_ = <F>;
- close F or die "Cannot communicate with slave\n";
- chomp;
- if ( $_ != 0 ) {
- print STDERR "NSCA status on " . $slave->name . " is bad\n";
- if ( $np->opts->restart_tunnels ) {
- print STDERR "Requesting opsviewd restart SSH tunnel to "
- . $slave->name . "\n";
- system( "/usr/local/nagios/bin/send_opsview_cmd",
- "restart_tunnel", $slave->name );
- }
- $errors++;
- }
- else {
-
- # check time synchronisation
- if ( !$np->opts->notimecheck ) {
- @cmd = $slave->ssh_command( "perl -e 'print time'" );
- open my $ssh, "-|", @cmd
- or die "Cannot run ssh command on master to slave";
- my $remote_time = <$ssh>;
- close($ssh);
- my $difference = time - $remote_time;
-
- # 5 seconds leeway in time
- if ( $difference < -4 || $difference > 5 ) {
- print STDERR 'Error: time on ', $slave->name,
- ' is not in sync with master', $/;
- $errors++;
- }
- }
-
- # check to see if status.dat is over 60 seconds old
- @cmd = $slave->ssh_command(
- "perl -e '\$i= time() - (stat(shift))[9] > 60 ? 1 : 0; print \$i,\$/' "
- . Opsview::Config->status_dat );
- open F, "-|", @cmd
- or die "Cannot run ssh command on master to slave\n";
- $_ = <F>;
- close F
- or die "Cannot communicate with slave for status.dat check\n";
- chomp;
- if ( $_ != 0 ) {
- print STDERR "Status.dat out of date on "
- . $slave->name
- . $/;
- $errors++;
- }
- }
- exit $errors == 0 ? 0 : 1;
- },
- run_on_finish => sub {
- my ( $process, $status ) = @_;
- if ($status) {
- push @bad_slaves, $slave->name;
- }
- else {
- push @good_slaves, $slave->name;
- }
- },
- name => $slave->name,
- label => "slave_check",
- );
-}
-
-$Fork->ready_all;
-
-# Duplicate the wait_all call, with checking for alarm triggering
-while ( !$alarm_triggered && $Fork->is_any_left ) {
- $Fork->poll;
- usleep 100 * 1000;
-}
-
-if ($alarm_triggered) {
- my @running = ();
-
- # We kill the process group, to kill all other grandchildren processes
- # This is better than using P::Forker's built-in kill_tree_all as that
- # needs Proc::ProcessTable to get all the children ids
- # Worse, it also calls Storable which has a single file which may get
- # stomped over by the other children
- foreach my $proc ( $Fork->running ) {
- push @running, $proc->name;
- my $pid = $proc->pid;
- $pid = -$pid;
- kill 'TERM', $pid;
- }
- $np->nagios_exit( CRITICAL,
- "Plugin timed out. Good slaves=@good_slaves, Bad slaves=@bad_slaves, Unknown=@running"
- );
-}
-
-my $duration = gettimeofday - $start;
-
-# Debug
-#if (@bad_slaves) {
-# $Fork->write_tree( filename => "/tmp/check_opsview_slave.errors" );
-#}
-
-if (@bad_slaves) {
- $np->nagios_exit( CRITICAL, "Problem with slaves: @bad_slaves" );
-}
-else {
- $np->add_perfdata(
- label => "time",
- uom => "s",
- value => sprintf( "%0.5f", $duration ),
- );
- $np->nagios_exit( OK, "All slaves okay" );
-}
Deleted: trunk/opsview-core/nagios-plugins/check_opsview_slave_cluster
===================================================================
--- trunk/opsview-core/nagios-plugins/check_opsview_slave_cluster 2013-05-09 15:01:08 UTC (rev 12249)
+++ trunk/opsview-core/nagios-plugins/check_opsview_slave_cluster 2013-05-09 15:44:47 UTC (rev 12250)
@@ -1,111 +0,0 @@
-#!/usr/bin/perl
-#
-#
-# SYNTAX:
-# check_opsview_slave_cluster {nodeip}
-#
-# DESCRIPTION:
-# From other nodes, runs a check to test the other node's connectivity
-# Is auto-generated by Cluster-node embeddedservice
-#
-# AUTHORS:
-# Copyright (C) 2003-2013 Opsview Limited. All rights reserved
-#
-# This file is part of Opsview
-#
-# Opsview is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# Opsview is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with Opsview; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-#
-
-use strict;
-use FindBin qw($Bin);
-use lib "$Bin/../lib", "$Bin/../etc", "$Bin/../perl/lib";
-use Nagios::Plugin;
-
-my $np = Nagios::Plugin->new(
- usage => "Usage: %s\n Will check cluster node is running correctly",
- shortname => "CLUSTER",
-);
-
-$np->getopts;
-
-require Opsview::Slave::Config;
-
-my $nodeip = shift @ARGV;
-
-$np->nagios_exit( UNKNOWN, "No node ip specified" ) unless $nodeip;
-
-my $now = time;
-my $status_dat = Opsview::Slave::Config->status_dat;
-
-# use '-q' here as ssh banners can affect output
-my @cmd = (
- "/usr/local/nagios/libexec/check_by_ssh",
- "-t",
- $np->opts->timeout,
- "-q",
- "-H",
- $nodeip,
- "-C",
- '/usr/local/nagios/libexec/check_nagios -C /usr/local/nagios/bin/nagios -F '
- . $status_dat
- . ' -e 60 -v -v'
-);
-warn "cmd: ", join( ' ', @cmd ), $/ if ( $np->opts->verbose );
-my ( $info, $rc ) = run_command(@cmd);
-
-warn "info: ", $info, $/ if ( $np->opts->verbose );
-
-# NOTE: there is a race condition here when nagios is writing out status.dat
-# so give it chance and try again
-if ( $info =~ m/Cannot open status log for reading/ ) {
- my $delay = 5;
- if ( $np->opts->verbose ) {
- warn
- "status.dat not available - sleeping for $delay seconds and trying again",
- $/;
- }
- sleep $delay;
- ( $info, $rc ) = run_command(@cmd);
- warn "info: ", $info, $/ if ( $np->opts->verbose );
-}
-
-if ($rc) {
- $np->nagios_exit( CRITICAL, "Error: $info ($?)" );
-}
-
-$np->nagios_exit( OK, "" );
-
-sub run_command {
- my (@cmd) = @_;
-
- open F, "-|", @cmd
- or $np->nagios_exit( CRITICAL, "Cannot run ssh command" );
- my $info;
- {
-
- # Grab all lines and convert linefeeds to \n
- local $/ = undef;
- $info = <F>;
- $info =~ s/\n/\\n/g
- };
- $info =~ s/\\n$//;
- close F; # or $np->nagios_exit( CRITICAL, "Error: $info ($?)" );
- my $rc = $?;
-
- #die "info=$info";
-
- return ( $info, $rc );
-
-}
Deleted: trunk/opsview-core/nagios-plugins/check_opsview_slave_node
===================================================================
--- trunk/opsview-core/nagios-plugins/check_opsview_slave_node 2013-05-09 15:01:08 UTC (rev 12249)
+++ trunk/opsview-core/nagios-plugins/check_opsview_slave_node 2013-05-09 15:44:47 UTC (rev 12250)
@@ -1,212 +0,0 @@
-#!/usr/bin/perl
-#
-#
-# SYNTAX:
-# check_opsview_slave_node [-r] {slave}
-#
-# DESCRIPTION:
-# From master, runs a connection test to slave using the usual
-# connection method
-# Better than check_opsview_slave because can grab output from ssh call
-# Is auto-generated by Slave-node embeddedservice
-#
-# AUTHORS:
-# Copyright (C) 2003-2013 Opsview Limited. All rights reserved
-#
-# This file is part of Opsview
-#
-# Opsview is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# Opsview is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with Opsview; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-#
-
-use strict;
-use warnings;
-use FindBin qw($Bin);
-use lib "$Bin/../lib", "$Bin/../etc", "$Bin/../perl/lib";
-use Opsview::Config;
-use Opsview::Connections;
-use Nagios::Plugin;
-use Time::HiRes qw(gettimeofday);
-
-my $np = Nagios::Plugin->new(
- usage =>
- "Usage: %s [-r|--restart_tunnels]\n Will check slave connection from master based on entries in connections.dat",
- shortname => "SLAVE",
-);
-
-$np->add_arg(
- spec => "restart_tunnels|r",
- help =>
- "-r, --restart_tunnels\n Restart ssh tunnels if NSCA/NRD reports a problem",
-);
-
-$np->add_arg(
- spec => "notimecheck|T",
- help => "-T, --notimecheck\n Do not check time synchronisation",
-);
-
-$np->getopts;
-
-my $start = gettimeofday;
-
-eval { @_ = Opsview::Connections->slaves };
-$np->nagios_exit( CRITICAL, "No slaves defined" ) if ($@);
-
-my $slavenodename = shift @ARGV;
-
-$np->nagios_exit( UNKNOWN, "No slave specified" ) unless $slavenodename;
-
-# Find the slave
-my $slavenode;
-foreach my $slave ( Opsview::Connections->slaves ) {
- if ( $slave->name eq $slavenodename ) {
- $slavenode = $slave;
- last;
- }
-}
-$np->nagios_exit( CRITICAL, "Slave not found: $slavenodename" )
- unless $slavenode;
-
-my @cmd =
- $slavenode->ssh_command( "/usr/local/nagios/bin/retrieve_opsview_info" );
-open my $f, "-|", @cmd
- or $np->nagios_exit( CRITICAL, "Cannot run ssh command on master to slave" );
-my @info = <$f>;
-
-# close can return false when called program exits with non-0 status.
-# $! will be false in this case so test for that before dying
-# Also, $info will be empty if the scrript doesnt exist or cannot be run
-if ( ( !close($f) && $! ) || !@info ) {
- $np->nagios_exit( CRITICAL,
- "Error retrieving slave information - slave is likely to be down"
- );
-}
-my $now = time;
-
-if ( $info[0] !~ /^OK$/ ) {
- $np->nagios_exit( CRITICAL, "Error returned from retrieval: " . $info[0] );
-}
-
-shift @info;
-
-my $slaveinfo = {};
-foreach my $line (@info) {
- $line =~ /^(\w+)=(.*)$/;
- $slaveinfo->{$1} = $2;
-}
-
-my $duration = gettimeofday - $start;
-$np->add_perfdata(
- label => "time",
- uom => "s",
- value => sprintf( "%0.5f", $duration ),
-);
-
-my $return_code = 0;
-my $message = "";
-if ( Opsview::Config->slave_send_method eq "nsca" ) {
-
- # nsca could return 3, so only error if 2
- if ( $slaveinfo->{nsca} == 2 ) {
- my $error = "NSCA problem - status code: $slaveinfo->{nsca}";
- if ( $slaveinfo->{nsca_error} ) {
- $error .= ", message: '" . $slaveinfo->{nsca_error} . "'";
- }
- if ( $np->opts->restart_tunnels ) {
- system( "/usr/local/nagios/bin/send_opsview_cmd",
- "restart_tunnel", $slavenodename );
- $np->nagios_exit( CRITICAL, "$error - restarting tunnel" );
- }
- else {
- $np->nagios_exit( CRITICAL, $error );
- }
- }
-}
-else {
- $np->add_perfdata(
- label => "backlog",
- value => $slaveinfo->{slaveresults_backlog}
- );
- $np->add_perfdata(
- label => "last_import",
- uom => "s",
- value => $slaveinfo->{slaveresults_maxage}
- );
- if ( $slaveinfo->{slaveresults_error} ) {
- my $error = "Slave sending error: " . $slaveinfo->{slaveresults_error};
- if ( $np->opts->restart_tunnels ) {
- system( "/usr/local/nagios/bin/send_opsview_cmd",
- "restart_tunnel", $slavenodename );
- $np->nagios_exit( CRITICAL, "$error - restarting tunnel" );
- }
- else {
- $np->nagios_exit( CRITICAL, $error );
- }
- }
- my $age_threshold = 70;
- if ( $slaveinfo->{slaveresults_maxage} > $age_threshold ) {
- $np->nagios_exit( CRITICAL,
- "Last import > $age_threshold seconds - is import_slavesresultsd running on slave?"
- );
- }
-}
-
-if ( !$np->opts->notimecheck ) {
- my $difference = $now - $slaveinfo->{now};
- if ( abs($difference) > 5 ) {
- $np->nagios_exit( CRITICAL,
- "Time is out of sync by $difference seconds"
- );
- }
-}
-
-my $difference = $now - $slaveinfo->{status};
-if ( abs($difference) > 60 ) {
- $np->nagios_exit( CRITICAL,
- "Status.dat is out of date on slave by $difference seconds"
- );
-}
-
-my @port_forwards = qw( 2345 4125 5667 5669 );
-my @failed_ports;
-
-foreach my $port (@port_forwards) {
- if ( $slaveinfo->{"port_$port"} ne 'ok' ) {
- push( @failed_ports, $port );
- }
-}
-
-if (@failed_ports) {
- if ( $np->opts->restart_tunnels ) {
- system( "/usr/local/nagios/bin/send_opsview_cmd",
- "restart_tunnel", $slavenodename );
- $np->nagios_exit( CRITICAL,
- "Port(s) "
- . join( ',', @failed_ports )
- . " not being forwarded correctly - restarting tunnel"
- );
- }
- else {
- $np->nagios_exit( CRITICAL,
- "Port(s) "
- . join( ',', @failed_ports )
- . " not being forwarded correctly"
- );
- }
-}
-
-$np->nagios_exit(
- return_code => $return_code,
- message => $message,
-);
Modified: trunk/opsview-core/nagios-plugins/filelist
===================================================================
--- trunk/opsview-core/nagios-plugins/filelist 2013-05-09 15:01:08 UTC (rev 12249)
+++ trunk/opsview-core/nagios-plugins/filelist 2013-05-09 15:44:47 UTC (rev 12250)
@@ -7,9 +7,6 @@
f nagios:nagios 0755 /usr/local/nagios/libexec/check_opsview_master check_opsview_master
f nagios:nagios 0755 /usr/local/nagios/libexec/check_opsview_ndo_import check_opsview_ndo_import
f nagios:nagios 0755 /usr/local/nagios/libexec/check_opsview_checkresults check_opsview_checkresults
-f nagios:nagios 0755 /usr/local/nagios/libexec/check_opsview_slave check_opsview_slave
-f nagios:nagios 0755 /usr/local/nagios/libexec/check_opsview_slave_cluster check_opsview_slave_cluster
-f nagios:nagios 0755 /usr/local/nagios/libexec/check_opsview_slave_node check_opsview_slave_node
f nagios:nagios 0755 /usr/local/nagios/libexec/check_opsview_sessions check_opsview_sessions
f nagios:nagios 0755 /usr/local/nagios/libexec/check_opsview_tablespace_free check_opsview_tablespace_free
f nagios:nagios 0755 /usr/local/nagios/libexec/check_opsview_tablespace_used check_opsview_tablespace_used