Hi, all I improved logging of the oracle/oralsnr RA. This patch has aimed to record the execution result of the sqlplus (and other commands) and output a detailed failure log using ocf_log when some problems occurred in RA operation.
Best Regards, NAKAHIRA Kazutomo -- ---------------------------------------- NAKAHIRA Kazutomo NTT DATA INTELLILINK CORPORATION Open Source Business Unit Software Services Integration Business Division
# HG changeset patch # User r...@prec370b # Date 1264145021 -32400 # Node ID 0ecef9560522601936888ce168dae5f563662402 # Parent 3024963150433960c51aa1bdccde39839efb09b7 oracle: improve logging diff -r 302496315043 -r 0ecef9560522 heartbeat/oracle --- a/heartbeat/oracle Thu Jan 21 16:42:40 2010 +0100 +++ b/heartbeat/oracle Fri Jan 22 16:23:41 2010 +0900 @@ -231,11 +231,23 @@ ora_info() { testoraenv() { # Let's make sure a few important things are set... - [ x != "x$ORACLE_HOME" -a x != "x$ORACLE_OWNER" ] || + if [ x == "x$ORACLE_HOME" -o x == "x$ORACLE_OWNER" ]; then + ocf_log err "Either of ORACLE_HOME or ORACLE_OWNER is NULL. ORACLE_HOME=$ORACLE_HOME, ORACLE_OWNER=$ORACLE_OWNER." return 1 + fi # and some important things are there - [ -x "$sqlplus" -a -x "$lsnrctl" -a -x "$tnsping" ] || + if [ ! -x "$sqlplus" ]; then + ocf_log err "Executeble sqlplus command($sqlplus) dose not exist." return 1 + fi + if [ ! -x "$lsnrctl" ]; then + ocf_log err "Executeble lsnrctl command($lsnrctl) dose not exist." + return 1 + fi + if [ ! -x "$tnsping" ]; then + ocf_log err "Executeble tnsping command($tnsping) dose not exist." + return 1 + fi return 0 } @@ -367,13 +379,20 @@ showdbstat() { # Part 1: Oracle dumpinstipc() { local dumpdest=`dbasql getdumpdest` - [ "x$dumpdest" != x -a -d "$dumpdest" ] || return 1 + if [ "x$dumpdest" == x -o ! -d "$dumpdest" ]; then + ocf_log warn "dumpdest($dumpdest) is not a regular directory." + return 1 + fi local -i fcount=`ls -rt $dumpdest | wc -l` - dbasql getipc >/dev/null 2>&1 + output=`dbasql getipc` local lastf=`ls -rt $dumpdest | grep -v '^\.*$' | tail -1` local -i fcount2=`ls -rt $dumpdest | wc -l` - [ $((fcount+1)) -eq $fcount2 ] || return 1 # more than one file created - echo $dumpdest/$lastf + if [ $((fcount+1)) -eq $fcount2 ]; then + echo $dumpdest/$lastf + else + ocf_log warn "dumpinstipc failed bacause the number of output files is wrong. before dump file count=$fcount, after dump file count=$fcount2, getipc result=$output" + return 1 + fi } parseipc() { local inf=$1 @@ -440,7 +459,13 @@ is_oracle_up() { } # instance in OPEN state? instance_live() { - [ "`dbasql dbstat`" = OPEN ] + output=`dbasql dbstat` + if [ "$output" = OPEN ]; then + return 0 + else + ocf_log info "Instance state is not OPEN. dbstat result=$output" + return 1 + fi } ora_cleanup() { @@ -498,6 +523,7 @@ oracle_start() { # try to cleanup in case of # ORA-01081: cannot start already-running ORACLE - shut it down first if echo "$output" | grep ORA-01081 >/dev/null 2>&1; then + ocf_log info "ORA-01081 error was found. try to cleanup oracle. DB start output=$output" ora_cleanup output=`dbasql dbstart_mount` fi @@ -510,7 +536,7 @@ oracle_start() { ;; *) : error!! - ocf_log error "Oracle $ORACLE_SID can not mount." + ocf_log error "Oracle $ORACLE_SID can not mount. DB status=$status, DB start output=$output" return $OCF_ERR_GENERIC ;; esac @@ -523,13 +549,16 @@ oracle_start() { fi output=`dbasql dbopen` - if is_oracle_up && instance_live; then + if ! is_oracle_up; then + ocf_log err "Oracle process is not started: $output" + return $OCF_ERR_GENERIC + elif ! instance_live; then + ocf_log err "Oracle instance $ORACLE_SID not started: $output" + return $OCF_ERR_GENERIC + else : cool, we are up and running ocf_log info "Oracle instance $ORACLE_SID started: $output" return $OCF_SUCCESS - else - ocf_log err "Oracle instance $ORACLE_SID not started: $output" - return $OCF_ERR_GENERIC fi } @@ -562,13 +591,21 @@ killprocs() { killprocs() { local sig=$1 shift 1 - kill -$sig $* >/dev/null 2>&1 + # Record stderr + kill -$sig $* >/dev/null } ora_kill() { - killprocs TERM `eval $procs | awk '{print $1}'` + oraprocs=`eval $procs | awk '{print $1}'` + if [ -z "$oraprocs" ]; then + ocf_log debug "All oracle processes are already stopped." + return + fi + killprocs TERM $oraprocs for i in 1 2 3 4 5; do - killprocs 0 `eval $procs | awk '{print $1}'` || + if [ -z "`eval $procs | awk '{print $1}'`" ]; then + ocf_log debug "All oracle processes are killed." return + fi sleep 5 done killprocs KILL `eval $procs | awk '{print $1}'` @@ -578,14 +615,16 @@ ora_kill() { # oracle_monitor: Can the Oracle instance do anything useful? # oracle_monitor() { - if is_oracle_up && instance_live - then - #ocf_log info "Oracle instance $ORACLE_SID is alive" - return $OCF_SUCCESS - else + if ! is_oracle_up; then + ocf_log info "Oracle process is down" + return $OCF_NOT_RUNNING + fi + if ! instance_live; then ocf_log info "Oracle instance $ORACLE_SID is down" return $OCF_NOT_RUNNING fi + #ocf_log info "Oracle instance $ORACLE_SID is alive" + return $OCF_SUCCESS } # @@ -675,10 +714,10 @@ case "$1" in status) if is_oracle_up then - echo Oracle instance $ORACLE_SID is running + ocf_log info "Oracle instance $ORACLE_SID is running" exit $OCF_SUCCESS else - echo Oracle instance $ORACLE_SID is stopped + ocf_log info "Oracle instance $ORACLE_SID is stopped" exit $OCF_NOT_RUNNING fi ;;
# HG changeset patch # User r...@prec370b # Date 1264145083 -32400 # Node ID b36abb1554500f4bdf33858989205d02606c609d # Parent 0ecef9560522601936888ce168dae5f563662402 oralsnr: improve logging diff -r 0ecef9560522 -r b36abb155450 heartbeat/oralsnr --- a/heartbeat/oralsnr Fri Jan 22 16:23:41 2010 +0900 +++ b/heartbeat/oralsnr Fri Jan 22 16:24:43 2010 +0900 @@ -158,11 +158,23 @@ ora_info() { testoraenv() { # Let's make sure a few important things are set... - [ x != "x$ORACLE_HOME" -a x != "x$ORACLE_OWNER" ] || + if [ x == "x$ORACLE_HOME" -o x == "x$ORACLE_OWNER" ]; then + ocf_log err "Either of ORACLE_HOME or ORACLE_OWNER is NULL. ORACLE_HOME=$ORACLE_HOME, ORACLE_OWNER=$ORACLE_OWNER." return 1 + fi # and some important things are there - [ -x "$sqlplus" -a -x "$lsnrctl" -a -x "$tnsping" ] || + if [ ! -x "$sqlplus" ]; then + ocf_log err "Executeble sqlplus command($sqlplus) dose not exist." return 1 + fi + if [ ! -x "$lsnrctl" ]; then + ocf_log err "Executeble lsnrctl command($lsnrctl) dose not exist." + return 1 + fi + if [ ! -x "$tnsping" ]; then + ocf_log err "Executeble tnsping command($tnsping) dose not exist." + return 1 + fi return 0 } @@ -247,10 +259,17 @@ oralsnr_stop() { # kill the listener procs # give them 10 secs to exit cleanly (5 times 2) oralsnr_kill() { - killprocs TERM `eval $procs | awk '{print $1}'` + oraprocs=`eval $procs | awk '{print $1}'` + if [ -z "$oraprocs" ]; then + ocf_log debug "All oralsnr processes are already stopped." + return + fi + killprocs TERM $oraprocs for i in 1 2 3 4 5; do - killprocs 0 `eval $procs | awk '{print $1}'` || + if [ -z "`eval $procs | awk '{print $1}'`" ]; then + ocf_log debug "All oralsnr processes are killed." return + fi sleep 2 done killprocs KILL `eval $procs | awk '{print $1}'` @@ -258,7 +277,8 @@ killprocs() { killprocs() { sig=$1 shift 1 - kill -$sig $* >/dev/null 2>&1 + # Record stderr + kill -$sig $* >/dev/null } # @@ -269,11 +289,23 @@ is_oralsnr_up() { [ x != "x`eval $procs`" ] } oralsnr_status() { - $lsnrctl status $listener | tail -1 | grep -qs 'completed successfully' + output=`$lsnrctl status $listener` + echo "$output" | tail -1 | grep -qs 'completed successfully' + RET=$? + if [ $RET -ne 0 ]; then + ocf_log info "$listener status failed: $output" + fi + return $RET } # and does it work? tnsping() { - $tnsping $ORACLE_SID | tail -1 | grep -qs '^OK' + output=`$tnsping $ORACLE_SID` + echo "$output" | tail -1 | grep -qs '^OK' + RET=$? + if [ $RET -ne 0 ]; then + ocf_log info "$tnsping $ORACLE_SID failed: $output" + fi + return $RET } # @@ -285,10 +317,10 @@ oralsnr_monitor() { : good #ocf_log info "Listener $listener running" return $OCF_SUCCESS - else + else ocf_log info "Listener $listener not running" return $OCF_NOT_RUNNING - fi + fi } # @@ -368,10 +400,10 @@ case "$1" in status) if oralsnr_status then - echo Listener $listener is running + ocf_log info "Listener $listener is running" exit $OCF_SUCCESS else - echo Listener $listener is stopped + ocf_log info "Listener $listener is stopped" exit $OCF_NOT_RUNNING fi ;;
_______________________________________________________ Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev Home Page: http://linux-ha.org/