Hi,

I made the STONITH plug-in that can be used by both Xen and KVM.

I would like to hear any opinion.

Best regards,
Yasumasa OZAKI



#!/bin/sh
#
# External STONITH module for Xen/KVM hypervisor through ssh.
# Uses Xen/KVM hypervisor as a STONITH device to control guest.
#
# Copyright (c) 2010 NIPPON TELEGRAPH AND TELEPHONE CORPORATION
#

STOP_COMMAND="virsh destroy"
START_COMMAND="virsh start"
DUMP_COMMAND="virsh dump"
SSH_COMMAND="/usr/bin/ssh -q -x -n"

# Rewrite the hostlist to accept "," as a delimeter for hostnames too.
hostlist=`echo $hostlist | tr ',' ' '`

CheckIfDead() {
    for j in 1 2 3 4 5
    do
        if ! ping -w1 -c1 "$1" >/dev/null 2>&1
        then
            return 0
        fi
        sleep 1
    done
    
    return 1
}

CheckHostList() {
    if [ "x" = "x$hostlist" ]
    then
        ha_log.sh err "hostlist isn't set"
        exit 1
    fi
}

CheckHypervisor() {
    if [ "x" = "x$hypervisor" ]
    then
        ha_log.sh err "hypervisor isn't set"
        exit 1
    fi
}

RunCommand() {
    CheckHostList
    CheckHypervisor
    
    for node in $hostlist
    do
        if [ "$node" != "$1" ]
        then
            continue
        fi
          
        case $2 in
            stop)
                if [ "x$run_dump" != "x" ]
                then
                    #Need to run core dump
                    if [ "x$dump_dir" != "x" ]
                    then
                        TIMESTAMP=`date +%Y-%m%d-%H%M.%S`
                        DOMAINNAME=`printf "%s" $node`
                        COREFILE=$dump_dir/$TIMESTAMP-$DOMAINNAME.core
                        #Run core dump
                        command_result="$($SSH_COMMAND $hypervisor " \
pgrep -f ^virsh_$node >/dev/null 2>&1; \
if [ \$? = 0 ]; then echo RUNNING; exit; fi; \
mkdir -p $dump_dir; \
if [ \$? != 0 ]; then echo MKDIR_FAILED; exit; fi; \
(exec -a virsh_$node $DUMP_COMMAND $node $COREFILE >/dev/null 2>&1); \
if [ \$? != 0 ]; then echo DUMP_FAILED; exit; fi; \
echo OK")"
                        ssh_result=$?
                        if [ $ssh_result = 0 ]
                        then
                            case "$command_result" in
                                RUNNING)
                                    ha_log.sh info "Dump is already running"
                                    exit 0
                                    ;;
                                SUSPEND_FAILED)
                                    ha_log.sh err "Failed to suspend domain 
$node"
                                    ;;
                                MKDIR_FAILED)
                                    ha_log.sh err "Failed to create directory 
$dump_dir"
                                    ;;
                                DUMP_FAILED)
                                    ha_log.sh err "Failed to core dump domain 
$node to $COREFILE"
                                    ;;
                                OK)
                                    ha_log.sh notice "Domain $node dumped to 
$COREFILE"
                                    ;;
                            esac
                        else
                            ha_log.sh err "Couldn't connect to hypervisor 
$hypervisor"
                        fi
                    else
                        ha_log.sh err "dump_dir isn't set"
                    fi
                fi

                command_result=$($SSH_COMMAND $hypervisor "((sleep 2; 
$STOP_COMMAND $node) >/dev/null 2>&1 &); echo \$?")
                ssh_result=$?
                if [ $ssh_result = 0 ]
                then
                    if [ $command_result = 0 ]
                    then
                        ha_log.sh notice "Domain $node is stoped"
                    else
                        ha_log.sh err "Failed to stop domain $node"
                    fi
                else
                    ha_log.sh err "Couldn't connect to hypervisor $hypervisor"
                fi
                break;;
            start)
                command_result=$($SSH_COMMAND $hypervisor "((sleep 2; 
$START_COMMAND $node) >/dev/null 2>&1 &); echo \$?")
                ssh_result=$?
                if [ $ssh_result = 0 ]
                then
                    if [ $command_result = 0 ]
                    then
                        ha_log.sh notice "Domain $node is started"
                    else
                        ha_log.sh err "Failed to start domain $node"
                    fi
                else
                    ha_log.sh err "Couldn't connect to hypervisor $hypervisor"
                fi
                break;;
        esac
        exit 0
    done
}


# Main code

case $1 in
gethosts)
    CheckHostList
    
    for node in $hostlist ; do
        echo $node
    done
    exit 0
    ;;
on)
    RunCommand $2 start
    exit $?
    ;;
off)
    if RunCommand $2 stop
    then
        if CheckIfDead $2
        then 
            exit 0
        fi
    fi
           
    exit 1
    ;;
reset)
    RunCommand $2 stop
        
    if CheckIfDead $2
    then
        RunCommand $2 start 
        exit 0
    fi

    exit 1
    ;;
status)
    exit 0
    ;;
getconfignames)
    echo "hostlist hypervisor"
    exit 0
    ;;
getinfo-devid)
    echo "virsh STONITH device"
    exit 0
    ;;
getinfo-devname)
    echo "virsh STONITH external device"
    exit 0
    ;;
getinfo-devdescr)
    echo "ssh-based Linux host reset for Xen/KVM guest domain trough hypervisor"
    echo "Fine for testing, but not really suitable for production!"
    exit 0
    ;;
getinfo-devurl)
    echo "http://openssh.org http://www.xensource.com/ http://linux-ha.org/wiki";
    exit 0
    ;;
getinfo-xml)
    cat << SSHXML
<parameters>
<parameter name="hostlist" unique="1" required="1">
<content type="string" />
<shortdesc lang="en">
Hostlist
</shortdesc>
<longdesc lang="en">
The list of controlled nodes.
For example: "node1 node2"
</longdesc>
</parameter>
<parameter name="hypervisor" unique="1" required="1">
<content type="string" />
<shortdesc lang="en">
Hypervisor hostname
</shortdesc>
<longdesc lang="en">
Host name to execute hypervisor. Root user shall be able to ssh to that node.
</longdesc>
</parameter>
<parameter name="run_dump" unique="0" required="0">
<content type="string" />
<shortdesc lang="en">
Run core dump
</shortdesc>
<longdesc lang="en">
If set plugin will call "virsh dump" before killing guest domain
</longdesc>
</parameter>
<parameter name="dump_dir" unique="1" required="0">
<content type="string" />
<shortdesc lang="en">
Run dump core with the specified directory 
When the "run_dump" parameter is set, this parameter is indispensable 
</shortdesc>
<longdesc lang="en">
This parameter can indicate the dump destination.
Should be set as a full path format, ex.) "/var/log/dump"
The above example would dump the core, like;
/var/log/dump/2009-0316-1403.37-GuestDomain.core
</longdesc>
</parameter>
</parameters>
SSHXML
    exit 0
    ;;
*)
    exit 1
    ;;
esac
_______________________________________________________
Linux-HA-Dev: [email protected]
http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
Home Page: http://linux-ha.org/

Reply via email to