Hi,
















A DRBD monitor script is available at 
http://ben.timby.com/?p=12








Unfortunately it no longer works with drbd 8.3 since the /proc/drbd output
has changed somewhat. This is an updated script which seems to work fine
with DRBD 8.3.8. I have also updated the checks a little bit and made the
output prettier :)
















Best regards,
















gulikoza








#!/usr/bin/perl -w
# Copyright (c) 2007 Ben Timby
# Written by Ben Timby <bti...@gmail.com>
# Based on check_drbd by Igor Genibel <i...@jexiste.org>
# Updated for DRBD 8.3 by gulikoza <gulikoza at users.sourceforge.net>
#
# Released under the GNU Public License
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
#Checking remote hosts requires ssh key authentication be configured. See the 
link
#below for informain on configuring this.
#
#http://sial.org/howto/openssh/publickey-auth/
#
#~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*
#Usage: ./drbd.monitor options [remote host list]
#
#Required Options
#
#  -p host               : name of host which must be "Primary"
#
#Optional Options
#
#  -h                    : prints this message
#  -d                    : print debugging information to STDERR
#
#These options apply only if checking remote hosts
#
#  -u user               : ssh username for remote host checks
#  -i path               : ssh identity (key) file for remote host checks
#
#Example (checks drbd0 on remote node1, ensuring that it is primary):
#
#./drbd.monitor -du mon -i /home/mon/.ssh/id_rsa -p node1 node1
#
#Example (check drbd1 locally ensuring that it is primary):
#
#./drbd.monitor -p node1 localhost
#~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*
# /etc/mon/mon.cf:
# --
#hostgroup servers thishost remotehost
#
#watch servers
#    service drbd
#        interval 1m
#        monitor drbd.monitor -p -i /home/mon/.ssh/id_rsa -u mon thishost
#        period wd {Mon-Fri} hr {7am-10pm}
#            alertevery 1h
#            alertafter 2 30m
#            alert mail.alert root@localhost
# --
#~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*

use strict qw(subs vars refs);
use File::Temp qw/ :mktemp /;
use Getopt::Std;
use Sys::Hostname;

sub debug
{
        if ($main::debug == 1)
        {
                print STDERR "DEBUG: ";
                for $debug::parm (@_)
                {
                        if (defined $debug::parm) {
                                print STDERR "$debug::parm ";
                        } else {
                                print STDERR "undef ";
                        }
                }
                print STDERR "\n";
        }
}

sub parse
{
        #params
        local($parse::file, $parse::up_role);
        ($parse::file, $parse::up_role) = @_;

        debug("up_role: ", $parse::up_role);

        #to merge into globals...
        local($parse::code, $parse::msg, $parse::line);
        local @parse::buf;

        my $i = 0;

        # Store the stats in a parse::buffer
        open (DRBD, "< $parse::file") or ( print 'CRITICAL drbd module not 
loaded!\n' and exit 2);
        while($parse::line = <DRBD>) {
                $parse::line =~ s/\n//g;
                $parse::line =~ s/\r//g;

                if ($parse::line =~ /^version: (\d+)\.(\d+)\.(\d+)/) {
                        if ($1 == 0 && $2 < 7) {
                                print 'CRITICAL DRBD version too old for this 
script (< v0.7.x)\n';
                                # remove the tmpfile if not local
                                unlink $parse::file if ($#ARGV != 1);
                                exit 2;
                        }
                }

               $parse::buf[$i++] = $parse::line;
               debug("parse[$i]: \"$parse::line\"");
        }
        close(DRBD);

        # Parse the parse::buffer
        for ($i = 0; $i <= $#parse::buf; $i++) {
               debug("line: \"$parse::buf[$i]\"");
               #0: cs:Connected ro:Secondary/Secondary ds:UpToDate/UpToDate
               if ($parse::buf[$i] =~ /^\s+([0-9]+): cs:(\w+) ro:(\w+)\/(\w+) 
ds:(\w+)\/(\w+).*$/) {
                        local $parse::dev = $1;
                        local $parse::cs = $2;
                        local $parse::st1 = $3;
                        local $parse::st2 = $4;
                        local $parse::ds1 = $5;
                        local $parse::ds2 = $6;

                        debug("dev=$parse::dev cs=$parse::cs st1=$parse::st1 
st2=$parse::st2 ds1=$parse::ds2 ds2=$parse::ds2");

                        # Alerts have this priority:
                        # - we are sync'ing (WARNING),
                        # - we are inconsistent (CRITICAL),
                        # - peer is inconsistent (WARNING),
                        # - we lost the other host (CRITICAL) and we are 
secondary,
                        # - we lost the other host (WARNING) and we are primary,
                        # - we should be primary or secondary but we are not 
(WARNING),
                        # - Connected and UpToDate (OK),
                        # - not Connected (CRITICAL),
                        # - in any other case, something is bad (CRITICAL).
                        if ($parse::cs =~ /Sync(Source|Target)/) {
                                local $parse::percent = '';
                                local $parse::extra = '';

                                # Get information about sync and ETA
                                if ($i + 2 <= $#parse::buf && $parse::buf[$i + 
2] =~ /(\d+\.\d+)%/) {
                                        $parse::percent = $1;
                                }
                                if ($i + 3 <= $#parse::buf) {
                                        $parse::extra = $parse::buf[$i + 3];
                                        $parse::extra =~ s/^[ \t]*//g;
                                }

                                $parse::code = 1;
                                $parse::msg  = "$parse::dev: $parse::cs - 
$parse::percent% - $parse::extra";
                        } elsif ($parse::ds1 eq 'Inconsistent') {
                                $parse::code = 2;
                                $parse::msg  = "$parse::dev: $parse::cs - 
$parse::st1/$parse::st2 - $parse::ds1 (should be UpToDate)";
                        } elsif ($parse::ds2 eq 'Inconsistent') {
                                $parse::code = 1;
                                $parse::msg  = "$parse::dev: $parse::cs - 
$parse::st1/$parse::st2 - $parse::ds1/$parse::ds2 (should be UpToDate)";
                        } elsif ($parse::st2 eq 'Unknown' and $parse::up_role 
ne 'primary') {
                                $parse::code = 2;
                                $parse::msg  = "$parse::dev: $parse::cs - 
$parse::st1/$parse::st2 (should not be Unknown)";
                        } elsif ($parse::st2 eq 'Unknown' and $parse::st1 eq 
'Primary' ) {
                                $parse::code = 1;
                                $parse::msg  = "$parse::dev: $parse::cs - 
$parse::st1/$parse::st2 (should not be Unknown)";
                        } elsif (defined $parse::up_role and $parse::up_role ne 
$parse::st1) {
                                $parse::code = 1;
                                $parse::msg  = "$parse::dev: $parse::cs - 
$parse::st1 (should be $parse::up_role) / $parse::st2 - 
$parse::ds1/$parse::ds2";
                        } elsif ($parse::cs eq 'Connected' && $parse::ds1 eq 
'UpToDate') {
                                $parse::code = 0;
                                $parse::msg  = "$parse::dev: $parse::cs - 
$parse::st1/$parse::st2 $parse::ds1/$parse::ds2";
                        } elsif ($parse::cs ne 'Connected') {
                                $parse::code = 2;
                                $parse::msg  = "$parse::dev: $parse::cs (should 
be Connected) - $parse::st1/$parse::st2 $parse::ds1/$parse::ds2";
                        } else {
                                # Any case we didn't think about is CRITICAL
                                $parse::code = 2;
                                $parse::msg  = "$parse::dev: $parse::cs - 
$parse::st1/$parse::st2 - $parse::ds1/$parse::ds2 (Unknown error!)";
                        }

                        #merge our status with globals...
                        $main::max_code = $parse::code if ($parse::code > 
$main::max_code);
                        $main::msg .= "\n[".$parse::msg."]";
                }
        }
}

sub ssh
{
        local($ssh::hostspec, $ssh::key);
        ($ssh::hostspec, $ssh::key) = @_;

        debug("hostspec: ", $ssh::hostspec, " key: ", $ssh::key);

        local $ssh::tmpfile = mktemp( "/tmp/tmpfileXXXXX" );
        local $ssh::output = "";
        local $ssh::status = 1;

        # Set a execution timeout
        eval {
                local $SIG{ALRM} = sub { die "alarm\n" }; 
                alarm 10;
                $ssh::output = `ssh -ttt $ssh::hostspec -i $ssh::key cat 
/proc/drbd 2> /dev/null`;
                $ssh::status = 0 if ($? != 0);
                alarm 0;
        };
        if ($@) {
                die unless $@ eq "alarm\n";   # propage des erreurs inattendues
                # Timeout reached
                print "CRITICAL Timeout reached";
                exit 2;
        } else {
                # Timeout not reached
                if ($ssh::status == 0) {
                        print "CRITICAL Host ssh service unreachable\n";
                        exit 2;
                } elsif ($ssh::output eq "" ) {
                        print "CRITICAL module not loaded on host\n";
                        exit 2;
                }
                open (TMPFILE, "> $ssh::tmpfile") or die "Unable to create 
tmpfile!\n";
                print TMPFILE $ssh::output;
                close(TMPFILE);
        }

        return $ssh::tmpfile;
}

sub usage
{
        if (defined $_[0]) {
                print "Error in parameters. $_[0]\n\n";
        }
        
        print STDERR << "EOF";
mon System Monitor DRBD monitor

Usage: $0 options [remote host list]

Required Options

  -p host                       : name of host which must be "Primary"

Optional Options

  -h                    : prints this message
  -d                    : print debugging information to STDERR
  
These options apply only if checking remote hosts
  
  -u user               : ssh username for remote host checks
  -i path               : ssh identity (key) file for remote host checks

Example (checks drbd0 on remote node1, ensuring that it is primary):

$0 -du mon -i /home/mon/.ssh/id_rsa -p node1 node1

Example (check drbd1 locally ensuring that it is primary):

$0 -p node1 localhost

EOF
        exit;
}

local $main::proc = '/proc/drbd';
local $main::opts = "hdu:i:p:";
local %main::opt;
local $main::debug = 0;
local @main::code_msg = ( 'OK', 'WARNING', 'CRITICAL' );
local $main::max_code = 0;
local $main::msg = "";
local $main::peer;
local $main::role;
my $i = 0;

getopts("$main::opts", \%main::opt) or usage();
usage() if $main::opt{h};
$main::debug = 1 if $main::opt{d};

debug("user:", $main::opt{u});
debug("key file:", $main::opt{i});
debug("primary host:", $main::opt{p});
debug("#argv: ", $#ARGV);

if (not defined $main::opt{p}) {
        usage("You must define a primary host with -p!");
}

foreach $main::peer (@ARGV)
{
        $main::peer = hostname() if ($main::peer eq "localhost");
        debug("peer: ", "\"$main::peer\"");
        debug("pri: ", "\"$main::opt{p}\"");
        
        if ($main::peer eq $main::opt{p}) {
                $main::role = "Primary";
        } else {
                $main::role = "Secondary";
        }

        if (not $main::peer eq hostname()) {
                if (not defined $main::opt{u} or not defined $main::opt{i}) {
                        usage("You must define a user with -u and a identity 
file with -i when checking remote hosts!");
                }

                $main::proc = ssh($main::opt{u}."@".$main::peer, $main::opt{i});
                debug("proc: ", $main::proc);
                parse($main::proc, $main::role);
                unlink $main::proc;
        } else {
                debug("parsing local file: /proc/drbd"); 

                if (defined $main::opt{r}) {
                        $main::role = $main::opt{r};
                }

                parse("/proc/drbd", $main::role);
        }
}

print $main::code_msg[$main::max_code].$main::msg."\n";
exit $main::max_code;
_______________________________________________
mon mailing list
mon@linux.kernel.org
http://linux.kernel.org/mailman/listinfo/mon

Reply via email to