Hi,

16.02.2012 02:02, Jake Smith wrote:
> When using upstart jobs in Pacemaker I haven't been able to find
> much of anything for documentation. After reading a post a few minutes ago by
> Andreas I wanted to verify...
> 
> Are upstart jobs expected to conform to the LSB spec with regards to exit 
> codes, etc?
> Is there any reference documentation using upstart resources in Pacemaker?
> Or any good advice :-)

Newer versions of pacemaker and lrmd are able to deal with upstart
resources via dbus.
However I do not like this way, so please find resource-agent attached,
which is able to manage arbitrary upstart job (just like Anything but
for upstart resources). It already saved me much time and nerves
managing libvirtd (with my own upstart job) which you probably already
know always wants to SIGABRT (btw I even know the main reason for that
and now testing patch which I will hopefully send to libvirt ml).

Best,
Vladislav
#!/bin/bash
#
# OCF resource agent which manages upstart jobs.
#
# Copyright (c) 2011 Vladislav Bogdanov <bub...@hoster-ok.com>
#
# OCF instance parameters:
#    OCF_RESKEY_job_name:     name of upstart job
#    OCF_RESKEY_process_name: name of process
#
# Initialization:

: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs

# Defaults

meta_data() {
        cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="upstart-resource">
<version>1.0</version>

<longdesc lang="en">
This RA manages upstart jobs as HA resources.
</longdesc>
<shortdesc lang="en">Manage upstart job</shortdesc>

<parameters>

<parameter name="job_name" unique="1" required="1">
<longdesc lang="en">
The name of the upstart job.
Can also contain job instance appended after space.
Example: job_name="my_job INSTANCE=1"
</longdesc>
<shortdesc lang="en">Job name</shortdesc>
<content type="string"/>
</parameter>

<parameter name="process_name" unique="0" required="1">
<longdesc lang="en">
The name of the process which is to be launched by upstart job.
</longdesc>
<shortdesc lang="en">Process name</shortdesc>
<content type="string"/>
</parameter>

<parameter name="check_command" unique="0" required="0">
<longdesc lang="en">
Additional command to run on mointor.
</longdesc>
<shortdesc lang="en">Additional monitor command</shortdesc>
<content type="string"/>
</parameter>

<parameter name="check_timeout" unique="0" required="0">
<longdesc lang="en">
How many seconds to wait for check command to finish.
</longdesc>
<shortdesc lang="en">Monitor command timeout</shortdesc>
<content type="integer" default="5" />
</parameter>

<parameter name="check_action" unique="0" required="0">
<longdesc lang="en">
What to run if monitor command fails or times out.
</longdesc>
<shortdesc lang="en">Monitor failure action</shortdesc>
<content type="string"/>
</parameter>

</parameters>

<actions>
<action name="start"   timeout="30" />
<action name="stop"    timeout="30" />
<action name="reload"  timeout="30" />
<action name="monitor" depth="0"  timeout="30" interval="10"/>
<action name="meta-data"  timeout="5" />
<action name="validate-all"  timeout="30" />
</actions>
</resource-agent>
END
}

usage() {
    cat <<END
usage: $0 {start|stop|reload|monitor|validate-all|meta-data}

Expects to have a fully populated OCF RA-compliant environment set.
END
}

start() {
    local status=$1

    monitor "${status}"
    if [ $? -eq $OCF_SUCCESS ]; then
        return $OCF_SUCCESS
    fi
    status=$( initctl start ${OCF_RESKEY_job_name} 2>&1 )
    monitor "${status}"
    case $? in
        $OCF_SUCCESS)
            ocf_log info "Upstart job ${OCF_RESKEY_job_name} started 
successfully."
            ret=$OCF_SUCCESS
            ;;
        *)
            ocf_log err "Failed to start upstart job ${OCF_RESKEY_job_name}."
            ret=$OCF_ERR_GENERIC
            ;;
    esac
    return ${ret}
}

stop() {
    local status=$1

    monitor "${status}"
    if [ $? -eq $OCF_NOT_RUNNING ]; then
        return $OCF_SUCCESS
    fi
    status=$( initctl stop ${OCF_RESKEY_job_name} 2>&1 )
    monitor "${status}"
    case $? in
        $OCF_NOT_RUNNING)
            ocf_log info "Upstart job ${OCF_RESKEY_job_name} stopped 
successfully."
            ret=$OCF_SUCCESS
            ;;
        *)
            ocf_log err "Failed to stop upstart job ${OCF_RESKEY_job_name}."
            ret=$OCF_ERR_GENERIC
            ;;
    esac
    return ${ret}
}

get_status() {
    local _output

    _output=$( initctl status ${OCF_RESKEY_job_name} 2>&1 )
    if echo "${_output}" | grep -q "Unknown job" ; then
        ocf_log err "Unknown upstart job ${OCF_RESKEY_job_name}"
        exit $OCF_ERR_INSTALLED
    fi
    # Leave only first line (main process)
    _output=$( echo "${_output}" | awk '{print $0; exit}' )

    # Store job status for later consumption
    eval $1=\${_output}
}

monitor() {
    local status=$1
    local pid
    local ret=$OCF_NOT_RUNNING
    local process
    # Operation timeout minus 5 seconds
    local attempts=$((($OCF_RESKEY_CRM_meta_timeout/1000) - 5))
    local i=0

    if ocf_is_decimal ${OCF_RESKEY_check_timeout} ; then
        attempts=$(( attempts - OCF_RESKEY_check_timeout ))
    fi

    if [ ${attempts} -le 0 ] ; then
        attempts=0
    fi

    # We first receive output from outside, then re-poll for it
    while [ ${ret} -eq $OCF_NOT_RUNNING ] ; do
        # upstart can report:
        # <job_name> (instance) start/[running|pre-start], process (item0) pid
        if [[ "${status}" =~ (^${OCF_RESKEY_job_name}( \(.+\)){0,1} 
start/([a-z-]+), process (\(.+\) ){0,1}([0-9]+)) ]] ; then
            state=${BASH_REMATCH[3]}
            case ${state} in
                running)
                    pid=${BASH_REMATCH[5]}
                    if [ -n "${pid}" ] ; then
                        kill -0 ${pid}
                        if [ $? -eq 0 ] ; then
                            process=$( awk '/^Name:/ {print $2}' < 
/proc/${pid}/status )
                            if [ "${process}" != "${OCF_RESKEY_process_name}" ] 
; then
                                # job is started, but it did not yet launched 
process itself
                                (( i == 0 )) && ocf_log info "pid ${pid} 
corresponds to process ${process} instead of ${OCF_RESKEY_process_name}, 
waiting."
                                ret=$OCF_NOT_RUNNING
                            else
                                ret=$OCF_SUCCESS
                            fi
                        else
                            # This will cause resource to be marked as 'Started 
FAILED'
                            # with subsequent stop and start
                            (( i == 0 )) && ocf_log info "upstart reports 
process ${pid} is running, but it really isn't, waiting."
                            ret=$OCF_NOT_RUNNING
                        fi
                    fi
                    ;;
                pre-start)
                    : Just waiting
                    ;;
                *)
                    : Ditto
                    ;;
            esac

            if [ ${ret} -eq $OCF_NOT_RUNNING ] ; then
                # Wait for upstart to recover started job
                if (( i++ >= attempts )) ; then
                    ocf_log err "Timed out waiting for process 
${OCF_RESKEY_process_name} pid ${pid} to appear."
                    break
                fi
                sleep 1
                get_status status
            fi
        else
            # Job is not launched
            ocf_log info "${status}"
            ret=$OCF_NOT_RUNNING
            break
        fi
    done
    if [ $ret = $OCF_SUCCESS ] ; then
        if [ -n "${OCF_RESKEY_check_command}" ] ; then
            local rc
            if ! ocf_is_decimal ${OCF_RESKEY_check_timeout} ; then
                OCF_RESKEY_check_timeout=5
            fi
            timeout -s KILL ${OCF_RESKEY_check_timeout} 
${OCF_RESKEY_check_command} >/dev/null 2>&1
            rc=$?
            if [ ${rc} -ne 0 ] ; then
                ocf_log warn "check_command '${OCF_RESKEY_check_command}' 
exited with status ${rc}."
                if [ -n "${OCF_RESKEY_check_action}" ] ; then
                    ocf_log warn "Running repair command 
'${OCF_RESKEY_check_action}'."
                    ${OCF_RESKEY_check_action} >/dev/null 2>&1
                fi
            fi
        fi
    fi

    return ${ret}
}

validate() {
    check_binary initctl

    # Check the interface parameter
    if [ -z "${OCF_RESKEY_job_name}" ]; then 
        ocf_log err "Empty job_name parameter."
        exit $OCF_ERR_CONFIGURED
    fi

    if [ -z "${OCF_RESKEY_process_name}" ]; then 
        ocf_log err "Empty process_name parameter."
        exit $OCF_ERR_CONFIGURED
    fi

    return $OCF_SUCCESS
}

case $__OCF_ACTION in
    meta-data)
        meta_data
        exit $OCF_SUCCESS
        ;;
    usage|help)
        usage
        exit $OCF_SUCCESS
        ;;
esac

if [ `uname` != "Linux" ] ; then
    ocf_log err "This RA works only on linux."
    exit $OCF_ERR_INSTALLED
fi

validate || exit $?

status=""
get_status status

case $__OCF_ACTION in
    start)
        start "${status}"
        ;;
    stop)
        stop "${status}"
        ;;
    monitor)
        monitor "${status}"
        ;;
    reload)
        if monitor "${status}" ; then
            if stop ; then
                # Re-poll job status
                get_status status
                start "${status}"
            else
                exit $OCF_ERR_GENERIC
            fi
        else
            start "${status}"
        fi
        ;;
    validate-all)
        ;;
    *)
        usage
        exit $OCF_ERR_UNIMPLEMENTED
        ;;
esac

exit $?
_______________________________________________
Pacemaker mailing list: Pacemaker@oss.clusterlabs.org
http://oss.clusterlabs.org/mailman/listinfo/pacemaker

Project Home: http://www.clusterlabs.org
Getting started: http://www.clusterlabs.org/doc/Cluster_from_Scratch.pdf
Bugs: http://bugs.clusterlabs.org

Reply via email to