Hi, all

I improved logging of the oracle/oralsnr RA.
This patch has aimed to record the execution result of the sqlplus
(and other commands) and output a detailed failure log using ocf_log
when some problems occurred in RA operation.

Best Regards,
NAKAHIRA Kazutomo

-- 
----------------------------------------
NAKAHIRA Kazutomo
NTT DATA INTELLILINK CORPORATION
Open Source Business Unit
Software Services Integration Business Division
# HG changeset patch
# User r...@prec370b
# Date 1264145021 -32400
# Node ID 0ecef9560522601936888ce168dae5f563662402
# Parent  3024963150433960c51aa1bdccde39839efb09b7
oracle: improve logging

diff -r 302496315043 -r 0ecef9560522 heartbeat/oracle
--- a/heartbeat/oracle	Thu Jan 21 16:42:40 2010 +0100
+++ b/heartbeat/oracle	Fri Jan 22 16:23:41 2010 +0900
@@ -231,11 +231,23 @@ ora_info() {
 
 testoraenv() {
 	#	Let's make sure a few important things are set...
-	[ x != "x$ORACLE_HOME" -a x != "x$ORACLE_OWNER" ] ||
+	if [ x == "x$ORACLE_HOME" -o x == "x$ORACLE_OWNER" ]; then
+		ocf_log err "Either of ORACLE_HOME or ORACLE_OWNER is NULL. ORACLE_HOME=$ORACLE_HOME, ORACLE_OWNER=$ORACLE_OWNER."
 		return 1
+	fi
 	#	and some important things are there
-	[ -x "$sqlplus" -a -x "$lsnrctl" -a -x "$tnsping" ] ||
+	if [ ! -x "$sqlplus" ]; then
+		ocf_log err "Executeble sqlplus command($sqlplus) dose not exist."
 		return 1
+	fi
+	if [ ! -x "$lsnrctl" ]; then
+		ocf_log err "Executeble lsnrctl command($lsnrctl) dose not exist."
+		return 1
+	fi
+	if [ ! -x "$tnsping" ]; then
+		ocf_log err "Executeble tnsping command($tnsping) dose not exist."
+		return 1
+	fi
 	return 0
 }
 
@@ -367,13 +379,20 @@ showdbstat() {
 # Part 1: Oracle
 dumpinstipc() {
 	local dumpdest=`dbasql getdumpdest`
-	[ "x$dumpdest" != x -a -d "$dumpdest" ] || return 1
+	if [ "x$dumpdest" == x -o ! -d "$dumpdest" ]; then
+		ocf_log warn "dumpdest($dumpdest) is not a regular directory."
+		return 1
+	fi
 	local -i fcount=`ls -rt $dumpdest | wc -l`
-	dbasql getipc >/dev/null 2>&1
+	output=`dbasql getipc`
 	local lastf=`ls -rt $dumpdest | grep -v '^\.*$' | tail -1`
 	local -i fcount2=`ls -rt $dumpdest | wc -l`
-	[ $((fcount+1)) -eq $fcount2 ] || return 1  # more than one file created
-	echo $dumpdest/$lastf
+	if [ $((fcount+1)) -eq $fcount2 ]; then
+		echo $dumpdest/$lastf
+	else
+		ocf_log warn "dumpinstipc failed bacause the number of output files is wrong. before dump file count=$fcount, after dump file count=$fcount2, getipc result=$output"
+		return 1
+	fi
 }
 parseipc() {
 	local inf=$1
@@ -440,7 +459,13 @@ is_oracle_up() {
 }
 # instance in OPEN state?
 instance_live() {
-	[ "`dbasql dbstat`" = OPEN ]
+	output=`dbasql dbstat`
+	if [ "$output" = OPEN ]; then
+		return 0
+	else
+		ocf_log info "Instance state is not OPEN. dbstat result=$output"
+		return 1
+	fi
 }
 
 ora_cleanup() {
@@ -498,6 +523,7 @@ oracle_start() {
 		# try to cleanup in case of
 		# ORA-01081: cannot start already-running ORACLE - shut it down first
 		if echo "$output" | grep ORA-01081 >/dev/null 2>&1; then
+			ocf_log info "ORA-01081 error was found. try to cleanup oracle. DB start output=$output"
 			ora_cleanup
 			output=`dbasql dbstart_mount`
 		fi
@@ -510,7 +536,7 @@ oracle_start() {
 		;;
 	*)
 		: error!!
-		ocf_log error "Oracle $ORACLE_SID can not mount."
+		ocf_log error "Oracle $ORACLE_SID can not mount. DB status=$status, DB start output=$output"
 		return $OCF_ERR_GENERIC
 		;;
 	esac
@@ -523,13 +549,16 @@ oracle_start() {
 	fi
 	output=`dbasql dbopen`
 
-	if is_oracle_up && instance_live; then
+	if ! is_oracle_up; then
+		ocf_log err "Oracle process is not started: $output"
+		return $OCF_ERR_GENERIC
+	elif ! instance_live; then
+		ocf_log err "Oracle instance $ORACLE_SID not started: $output"
+		return $OCF_ERR_GENERIC
+	else
 		: cool, we are up and running
 		ocf_log info "Oracle instance $ORACLE_SID started: $output"
 		return $OCF_SUCCESS
-	else
-		ocf_log err "Oracle instance $ORACLE_SID not started: $output"
-		return $OCF_ERR_GENERIC
 	fi
 }
 
@@ -562,13 +591,21 @@ killprocs() {
 killprocs() {
 	local sig=$1
 	shift 1
-	kill -$sig $* >/dev/null 2>&1
+	# Record stderr
+	kill -$sig $* >/dev/null
 }
 ora_kill() {
-	killprocs TERM `eval $procs | awk '{print $1}'`
+	oraprocs=`eval $procs | awk '{print $1}'`
+	if [ -z "$oraprocs" ]; then
+		ocf_log debug "All oracle processes are already stopped."
+		return
+	fi
+	killprocs TERM $oraprocs
 	for i in 1 2 3 4 5; do
-		killprocs 0 `eval $procs | awk '{print $1}'` ||
+		if [ -z "`eval $procs | awk '{print $1}'`" ]; then
+			ocf_log debug "All oracle processes are killed."
 			return
+		fi
 		sleep 5
 	done
 	killprocs KILL `eval $procs | awk '{print $1}'`
@@ -578,14 +615,16 @@ ora_kill() {
 # oracle_monitor: Can the Oracle instance do anything useful?
 #
 oracle_monitor() {
-	if is_oracle_up && instance_live
-	then
-		#ocf_log info "Oracle instance $ORACLE_SID is alive"
-		return $OCF_SUCCESS
-	else
+	if ! is_oracle_up; then
+		ocf_log info "Oracle process is down"
+		return $OCF_NOT_RUNNING
+	fi
+	if ! instance_live; then
 		ocf_log info "Oracle instance $ORACLE_SID is down"
 		return $OCF_NOT_RUNNING
 	fi
+	#ocf_log info "Oracle instance $ORACLE_SID is alive"
+	return $OCF_SUCCESS
 }
 
 #
@@ -675,10 +714,10 @@ case "$1" in
 
   status)	if is_oracle_up
 		then
-		  echo Oracle instance $ORACLE_SID is running
+		  ocf_log info "Oracle instance $ORACLE_SID is running"
 		  exit $OCF_SUCCESS
 		else
-		  echo Oracle instance $ORACLE_SID is stopped
+		  ocf_log info "Oracle instance $ORACLE_SID is stopped"
 		  exit $OCF_NOT_RUNNING
 		fi
 		;;
# HG changeset patch
# User r...@prec370b
# Date 1264145083 -32400
# Node ID b36abb1554500f4bdf33858989205d02606c609d
# Parent  0ecef9560522601936888ce168dae5f563662402
oralsnr: improve logging

diff -r 0ecef9560522 -r b36abb155450 heartbeat/oralsnr
--- a/heartbeat/oralsnr	Fri Jan 22 16:23:41 2010 +0900
+++ b/heartbeat/oralsnr	Fri Jan 22 16:24:43 2010 +0900
@@ -158,11 +158,23 @@ ora_info() {
 
 testoraenv() {
 	#	Let's make sure a few important things are set...
-	[ x != "x$ORACLE_HOME" -a x != "x$ORACLE_OWNER" ] ||
+	if [ x == "x$ORACLE_HOME" -o x == "x$ORACLE_OWNER" ]; then
+		ocf_log err "Either of ORACLE_HOME or ORACLE_OWNER is NULL. ORACLE_HOME=$ORACLE_HOME, ORACLE_OWNER=$ORACLE_OWNER."
 		return 1
+	fi
 	#	and some important things are there
-	[ -x "$sqlplus" -a -x "$lsnrctl" -a -x "$tnsping" ] ||
+	if [ ! -x "$sqlplus" ]; then
+		ocf_log err "Executeble sqlplus command($sqlplus) dose not exist."
 		return 1
+	fi
+	if [ ! -x "$lsnrctl" ]; then
+		ocf_log err "Executeble lsnrctl command($lsnrctl) dose not exist."
+		return 1
+	fi
+	if [ ! -x "$tnsping" ]; then
+		ocf_log err "Executeble tnsping command($tnsping) dose not exist."
+		return 1
+	fi
 	return 0
 }
 
@@ -247,10 +259,17 @@ oralsnr_stop() {
 # kill the listener procs
 # give them 10 secs to exit cleanly (5 times 2)
 oralsnr_kill() {
-	killprocs TERM `eval $procs | awk '{print $1}'`
+	oraprocs=`eval $procs | awk '{print $1}'`
+	if [ -z "$oraprocs" ]; then
+		ocf_log debug "All oralsnr processes are already stopped."
+		return
+	fi
+	killprocs TERM $oraprocs
 	for i in 1 2 3 4 5; do
-		killprocs 0 `eval $procs | awk '{print $1}'` ||
+		if [ -z "`eval $procs | awk '{print $1}'`" ]; then
+			ocf_log debug "All oralsnr processes are killed."
 			return
+		fi
 		sleep 2
 	done
 	killprocs KILL `eval $procs | awk '{print $1}'`
@@ -258,7 +277,8 @@ killprocs() {
 killprocs() {
 	sig=$1
 	shift 1
-	kill -$sig $* >/dev/null 2>&1
+	# Record stderr
+	kill -$sig $* >/dev/null
 }
 
 #
@@ -269,11 +289,23 @@ is_oralsnr_up() {
 	[ x != "x`eval $procs`" ]
 }
 oralsnr_status() {
-	$lsnrctl status $listener | tail -1 | grep -qs 'completed successfully'
+	output=`$lsnrctl status $listener`
+	echo "$output" | tail -1 | grep -qs 'completed successfully'
+	RET=$?
+	if [ $RET -ne 0 ]; then
+		ocf_log info "$listener status failed: $output"
+	fi
+	return $RET
 }
 # and does it work?
 tnsping() {
-	$tnsping $ORACLE_SID | tail -1 | grep -qs '^OK'
+	output=`$tnsping $ORACLE_SID`
+	echo "$output" | tail -1 | grep -qs '^OK'
+	RET=$?
+	if [ $RET -ne 0 ]; then
+		ocf_log info "$tnsping $ORACLE_SID failed: $output"
+	fi
+	return $RET
 }
 
 #
@@ -285,10 +317,10 @@ oralsnr_monitor() {
 		: good
 		#ocf_log info "Listener $listener running"
 		return $OCF_SUCCESS
-    else
+	else
 		ocf_log info "Listener $listener not running"
 		return $OCF_NOT_RUNNING
-    fi
+	fi
 }
 
 #
@@ -368,10 +400,10 @@ case "$1" in
 
   status)	if oralsnr_status
 		then
-		  echo Listener $listener is running
+		  ocf_log info "Listener $listener is running"
 		  exit $OCF_SUCCESS
 		else
-		  echo Listener $listener is stopped
+		  ocf_log info "Listener $listener is stopped"
 		  exit $OCF_NOT_RUNNING
 		fi
 		;;
_______________________________________________________
Linux-HA-Dev: Linux-HA-Dev@lists.linux-ha.org
http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
Home Page: http://linux-ha.org/

Reply via email to