[Linux-ha-dev] [PATCH] High: SAPInstance: Fixed monitor_clone function for pacemaker

Alexander Krauth Tue, 28 Dec 2010 05:44:53 -0800

# HG changeset patch
# User Alexander Krauth <[email protected]>
# Date 1293543879 -3600
# Node ID 21799a0e1aa04c1a5fb973388bf6cbfce9804408
# Parent  bdaecdef3b6af70db1c21b41bf09c1f95ed817b7
High: SAPInstance: Fixed monitor_clone function for pacemaker


diff -r bdaecdef3b6a -r 21799a0e1aa0 heartbeat/SAPInstance
--- a/heartbeat/SAPInstance     Tue Dec 28 14:43:56 2010 +0100
+++ b/heartbeat/SAPInstance     Tue Dec 28 14:44:39 2010 +0100
@@ -32,6 +32,10 @@
 #      OCF_RESKEY_PRE_STOP_USEREXIT    (optional, lists a script which can be 
executed before the resource is stopped)
 #      OCF_RESKEY_POST_STOP_USEREXIT   (optional, lists a script which can be 
executed after the resource is stopped)
 #
+#  TODO: - Option to shutdown sapstartsrv for non-active instances -> that 
means: do probes only with OS tools (sapinstance_status)
+#        - Option for better standalone enqueue server monitoring, using 
ensmon (test enque-deque)
+#        - Option for cleanup abandoned enqueue replication tables
+#
 #######################################################################
 # Initialization:
 
@@ -68,7 +72,7 @@
 <?xml version="1.0"?>
 <!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
 <resource-agent name="SAPInstance">
-<version>2.11</version>
+<version>2.12</version>
 
 <shortdesc lang="en">Manages a SAP instance as an HA resource.</shortdesc>
 <longdesc lang="en">
@@ -705,7 +709,7 @@
 #
 sapinstance_start_clone() {
   sapinstance_init $OCF_RESKEY_ERS_InstanceName
-  ${HA_SBIN_DIR}/crm_master -v 100 -l reboot
+  ${HA_SBIN_DIR}/crm_master -v 50 -l reboot
   sapinstance_start
   return $?
 }
@@ -726,17 +730,38 @@
 # sapinstance_monitor_clone
 #
 sapinstance_monitor_clone() {
-  # Check status of potential master first
+  # first check with the status function (OS tools) if there could be 
something like a SAP instance running
+  # as we do not know here, if we are in master or slave state we do not want 
to start our monitoring
+  # agents (sapstartsrv) on the wrong host
+
   sapinstance_init $OCF_RESKEY_InstanceName
-  sapinstance_monitor
+  sapinstance_status
   rc=$?
-  [ $rc -eq $OCF_SUCCESS ] && return $OCF_RUNNING_MASTER
-  [ $rc -ne $OCF_NOT_RUNNING ] && return $OCF_FAILED_MASTER
-
-  # The master isn't running, and there were no errors, try ERS
-  sapinstance_init $OCF_RESKEY_ERS_InstanceName
-  sapinstance_monitor
-  rc=$?
+  if [ $rc -eq $OCF_SUCCESS ]; then
+    sapinstance_monitor
+    rc=$?
+    if [ $rc -eq $OCF_SUCCESS ]; then
+      ${HA_SBIN_DIR}/crm_master -Q -v 100 -l reboot
+      return $OCF_RUNNING_MASTER
+    else
+      ${HA_SBIN_DIR}/crm_master -v 10 -l reboot     # by nature of the SAP 
enqueue server we have to make sure
+                                                    # that we do a failover to 
the slave (enqueue replication server)
+                                                    # in case the enqueue 
process has failed. We signal this to the
+                                                    # cluster by setting our 
master preference to a lower value than the slave.
+      return $OCF_FAILED_MASTER
+    fi
+  else
+    sapinstance_init $OCF_RESKEY_ERS_InstanceName
+    sapinstance_status
+    rc=$?
+    if [ $rc -eq $OCF_SUCCESS ]; then
+      sapinstance_monitor
+      rc=$?
+      if [ $rc -eq $OCF_SUCCESS ]; then
+        ${HA_SBIN_DIR}/crm_master -Q -v 100 -l reboot
+      fi
+    fi
+  fi
 
   return $rc
 }
@@ -782,16 +807,25 @@
 
 
 #
-# sapinstance_notify: After promotion of one master in the cluster, we make 
sure that all clones reset thier master
-#                     value back to 100. This is because a failed monitor on a 
master might have degree one clone
-#                     instance to score 10.
+# sapinstance_notify: Handle master scoring - to make sure a slave gets the 
next master
 #
 sapinstance_notify() {
   local n_type="$OCF_RESKEY_CRM_meta_notify_type"
   local n_op="$OCF_RESKEY_CRM_meta_notify_operation"
 
   if [ "${n_type}_${n_op}" = "post_promote" ]; then
+    # After promotion of one master in the cluster, we make sure that all 
clones reset thier master
+    # value back to 100. This is because a failed monitor on a master might 
have degree one clone
+    # instance to score 10.
     ${HA_SBIN_DIR}/crm_master -v 100 -l reboot
+  elif [ "${n_type}_${n_op}" = "pre_demote" ]; then
+    # if we are a slave and a demote event is anounced, make sure we have the 
highes wish to became master
+    # that is, when a slave resource was startet after the promote event of a 
already running master (e.g. node of slave was down)
+    # We also have to make sure to overrule the globaly set 
resource_stickiness or any fail-count factors => INFINITY
+    local n_uname="$OCF_RESKEY_CRM_meta_notify_demote_uname"
+    if [ ${n_uname} != ${HOSTNAME} ]; then
+      ${HA_SBIN_DIR}/crm_master -v INFINITY -l reboot
+    fi
   fi
 }
 
_______________________________________________________
Linux-HA-Dev: [email protected]
http://lists.linux-ha.org/mailman/listinfo/linux-ha-dev
Home Page: http://linux-ha.org/

[Linux-ha-dev] [PATCH] High: SAPInstance: Fixed monitor_clone function for pacemaker

Reply via email to