CVSROOT:        /cvs/cluster
Module name:    cluster
Branch:         RHEL5
Changes by:     [EMAIL PROTECTED]       2007-12-04 21:59:54

Modified files:
        rgmanager/src/resources: Makefile netfs.sh 
Added files:
        rgmanager/src/resources: default_event_script.sl 

Log message:
        Port force-unmount from RHEL4 branch

Patches:
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/default_event_script.sl.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=NONE&r2=1.1.2.1
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/Makefile.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.13.2.6&r2=1.13.2.7
http://sourceware.org/cgi-bin/cvsweb.cgi/cluster/rgmanager/src/resources/netfs.sh.diff?cvsroot=cluster&only_with_tag=RHEL5&r1=1.7.2.2&r2=1.7.2.3

/cvs/cluster/cluster/rgmanager/src/resources/default_event_script.sl,v  -->  
standard output
revision 1.1.2.1
--- cluster/rgmanager/src/resources/default_event_script.sl
+++ -   2007-12-04 21:59:54.602852000 +0000
@@ -0,0 +1,291 @@
+define node_in_set(node_list, node)
+{
+       variable x, len;
+
+       len = length(node_list);
+       for (x = 0; x < len; x++) {
+               if (node_list[x] == node)
+                       return 1;
+       }
+
+       return 0;
+}
+
+define move_or_start(service, node_list)
+{
+       variable len;
+       variable state, owner;
+       variable depends;
+
+       depends = service_property(service, "depend");
+       if (depends != "") {
+               (owner, state) = service_status(depends);
+               if (owner < 0) {
+                       debug(service, " is not runnable; dependency not met");
+                       return ERR_DEPEND;
+               }
+       }
+
+       (owner, state) = service_status(service);
+       debug("Evaluating ", service, " state=", state, " owner=", owner);
+
+       len = length(node_list);
+       if (len == 0) {
+               debug(service, " is not runnable");
+               return ERR_DOMAIN;
+       }
+
+       if (((event_type != EVENT_USER) and (state == "disabled")) or (state == 
"failed")) {
+               %
+               % Commenting out this block will -not- allow you to
+               % recover failed services from event scripts.  Sorry.
+               % All it will get you is a false log message about
+               % starting this service.
+               %
+               % You may enable disabled services, but I recommend
+               % against it.
+               %
+               debug(service, " is not runnable");
+               return -1;
+       }
+
+       if (node_list[0] == owner) {
+               debug(service, " is already running on best node");
+               return ERR_RUNNING;
+       }
+
+       if ((owner >= 0) and (node_in_set(node_list, owner) == 1)) {
+               notice("Moving ", service, " from ", owner,
+                      " to ", node_list);
+               if (service_stop(service) < 0) {
+                       return ERR_ABORT;
+               }
+       } else {
+               notice("Starting ", service, " on ", node_list);
+       }
+
+       return service_start(service, node_list);
+}
+
+
+%
+% Returns the set of online nodes in preferred/shuffled order which
+% are allowed to run this service.  Gives highest preference to current
+% owner if nofailback is specified.
+% 
+define allowed_nodes(service)
+{
+       variable anodes;
+       variable online;
+       variable nodes_domain;
+       variable ordered, restricted, nofailback;
+       variable state, owner;
+       variable depends;
+
+       (nofailback, restricted, ordered, nodes_domain) =
+                       service_domain_info(service);
+
+       (owner, state) = service_status(service);
+
+       anodes = nodes_online();
+
+       % Shuffle the array so we don't start all services on the same
+       % node.  TODO - add RR, Least-services, placement policies...
+       online = shuffle(anodes);
+
+       if (restricted == 1) {
+               anodes = intersection(nodes_domain, online);
+       } else {
+               % Ordered failover domains (nodes_domain) unioned with the
+               % online nodes basically just reorders the online node list
+               % according to failover domain priority rules.
+               anodes = union(intersection(nodes_domain, online),
+                              online);
+       }
+
+       if ((nofailback == 1) or (ordered == 0)) {
+               
+               if ((owner < 0) or (node_in_set(anodes, owner) == 0)) {
+                       return anodes;
+               }
+               
+               % Because union takes left as priority, we can
+               % return the union of the current owner with the
+               % allowed node list.  This means the service will
+               % remain on the same node it's currently on.
+               return union(owner, anodes);
+       }
+
+       return anodes;
+}
+
+
+define default_node_event_handler()
+{
+       variable services = service_list();
+       variable x;
+       variable nodes;
+
+       % debug("Executing default node event handler");
+       for (x = 0; x < length(services); x++) {
+               nodes = allowed_nodes(services[x]);
+               ()=move_or_start(services[x], nodes);
+       }
+}
+
+
+define default_service_event_handler()
+{
+       variable services = service_list();
+       variable x;
+       variable depends;
+       variable policy;
+       variable nodes;
+       variable tmp;
+       variable owner;
+       variable state;
+
+       % debug("Executing default service event handler");
+
+       if (service_state == "recovering") {
+
+               policy = service_property(service_name, "recovery");
+               debug("Recovering",
+                     " Service: ", service_name,
+                     " Last owner: ", service_last_owner,
+                     " Policy: ", policy);
+
+               if (policy == "disable") {
+                       () = service_stop(service_name, 1);
+                       return;
+               }
+
+               nodes = allowed_nodes(service_name);
+               if (policy == "restart") {
+                       tmp = union(service_last_owner, nodes);
+               } else {
+                       % relocate 
+                       tmp = subtract(nodes, service_last_owner);
+                       nodes = tmp;
+                       tmp = union(nodes, service_last_owner);
+               }
+
+               ()=move_or_start(service_name, nodes);
+
+               return;
+       }
+
+       for (x = 0; x < length(services); x++) {
+               if (service_name == services[x]) {
+                       % don't do anything to ourself! 
+                       continue;
+               }
+
+               %
+               % Simplistic dependency handling
+               %
+               depends = service_property(services[x], "depend");
+
+               % No dependency; do nothing
+               if (depends != service_name) {
+                       continue;
+               }
+
+               (owner, state) = service_status(services[x]);
+               if ((service_state == "started") and (owner < 0)) {
+                       info("Dependency met; starting ", services[x]);
+                       nodes = allowed_nodes(services[x]);
+                       ()=move_or_start(services[x], nodes);
+               }
+
+               % service died - stop service(s) that depend on the dead
+               if ((service_owner < 0) and (owner >= 0)) {
+                       info("Dependency lost; stopping ", services[x]);
+                       ()=service_stop(services[x]);
+               }
+       }
+}
+
+define default_config_event_handler()
+{
+       % debug("Executing default config event handler");
+}
+
+define default_user_event_handler()
+{
+       variable ret;
+       variable nodes;
+       variable reordered;
+       variable x;
+       variable target = user_target;
+       variable found = 0;
+       variable owner, state;
+
+       nodes = allowed_nodes(service_name);
+       (owner, state) = service_status(service_name);
+
+       if (user_request == USER_RESTART) {
+
+               if (owner >= 0) {
+                       reordered = union(owner, nodes);
+                       nodes = reordered;
+               }
+
+               notice("Stopping ", service_name, " for relocate to ", nodes);
+
+               found = service_stop(service_name);
+               if (found < 0) {
+                       return ERR_ABORT;
+               }
+
+               ret = move_or_start(service_name, nodes);
+
+       } else if ((user_request == USER_RELOCATE) or 
+                  (user_request == USER_ENABLE)) {
+
+               if (user_target > 0) {
+                       for (x = 0; x < length(nodes); x++) {
+                               if (nodes[x] == user_target) {
+                                       reordered = union(user_target, nodes);
+                                       nodes = reordered;
+                                       found = 1;
+                               }
+                       }
+       
+                       if (found == 0) {
+                               warning("User specified node ", user_target,
+                                       " is offline");
+                       }
+               }
+
+               if ((owner >= 0) and (user_request == USER_RELOCATE)) {
+                       if (service_stop(service_name) < 0) {
+                               return ERR_ABORT;
+                       }
+               }
+
+               ret = move_or_start(service_name, nodes);
+
+       } else if (user_request == USER_DISABLE) {
+
+               ret = service_stop(service_name, 1);
+
+       } else if (user_request == USER_STOP) {
+
+               ret = service_stop(service_name);
+
+       } 
+       % todo - migrate
+
+       return ret;
+}
+
+if (event_type == EVENT_NODE)
+       default_node_event_handler();
+if (event_type == EVENT_SERVICE)
+       default_service_event_handler();
+if (event_type == EVENT_CONFIG)
+       default_config_event_handler();
+if (event_type == EVENT_USER)
+       user_return=default_user_event_handler();
+
--- cluster/rgmanager/src/resources/Makefile    2007/07/12 11:23:16     1.13.2.6
+++ cluster/rgmanager/src/resources/Makefile    2007/12/04 21:59:54     1.13.2.7
@@ -34,6 +34,9 @@
        utils/httpd-parse-config.pl utils/tomcat-parse-config.pl \
        utils/member_util.sh
 
+EVENT_TARGETS= \
+       default_event_script.sl
+
 all:
 
 install: all
@@ -44,6 +47,7 @@
        install $(TARGETS) ${sharedir}
        install $(UTIL_TARGETS) ${sharedir}/utils
        install -m 644 $(METADATA) ${sharedir}
+       install -m 644 $(EVENT_TARGETS) ${sharedir}
 
 uninstall:
        ${UNINSTALL} ${UTIL_TARGETS} ${sharedir}/utils
--- cluster/rgmanager/src/resources/netfs.sh    2007/10/03 16:44:15     1.7.2.2
+++ cluster/rgmanager/src/resources/netfs.sh    2007/12/04 21:59:54     1.7.2.3
@@ -348,6 +348,112 @@
        return $NO
 }
 
+#
+# killMountProcesses mount_point
+#
+# Using lsof or fuser try to unmount the mount by killing of the processes
+# that might be keeping it busy.
+#
+killMountProcesses()
+{
+        typeset -i ret=$SUCCESS
+        typeset have_lsof=""
+        typeset have_fuser=""
+        typeset try
+
+        if [ $# -ne 1 ]; then
+                ocf_log err \
+                        "Usage: killMountProcesses mount_point"
+                return $FAIL
+        fi
+
+        typeset mp=$1
+
+        ocf_log notice "Forcefully unmounting $mp"
+
+        #
+        # Not all distributions have lsof.  If not use fuser.  If it
+        # does, try both.
+        #
+        file=$(which lsof 2>/dev/null)
+        if [ -f "$file" ]; then
+                have_lsof=$YES
+        fi
+
+        file=$(which fuser 2>/dev/null)
+        if [ -f "$file" ]; then
+                have_fuser=$YES
+        fi
+
+        if [ -z "$have_lsof" -a -z "$have_fuser" ]; then
+                ocf_log warn \
+        "Cannot forcefully unmount $mp; cannot find lsof or fuser commands"
+                return $FAIL
+        fi
+
+        for try in 1 2 3; do
+                if [ -n "$have_lsof" ]; then
+                        #
+                        # Use lsof to free up mount point
+                        #
+                        while read command pid user
+                        do
+                                if [ -z "$pid" ]; then
+                                        continue
+                                fi
+
+                                if [ $try -eq 1 ]; then
+                                        ocf_log warn \
+                                  "killing process $pid ($user $command $mp)"
+                                elif [ $try -eq 3 ]; then
+                                        ocf_log crit \
+                                  "Could not clean up mountpoint $mp"
+                                ret=$FAIL
+                                fi
+
+                                if [ $try -gt 1 ]; then
+                                        kill -9 $pid
+                                else
+                                        kill -TERM $pid
+                                fi
+                        done < <(lsof -w -bn 2>/dev/null | \
+                            grep -w -E "$mp(/.*|)\$" | \
+                            awk '{print $1,$2,$3}' | \
+                            sort -u -k 1,3)
+                elif [ -n "$have_fuser" ]; then
+                        #
+                        # Use fuser to free up mount point
+                        #
+                        while read command pid user
+                        do
+                                if [ -z "$pid" ]; then
+                                        continue
+                                fi
+
+                                if [ $try -eq 1 ]; then
+                                        ocf_log warn \
+                                  "killing process $pid ($user $command $mp)"
+                                elif [ $try -eq 3 ]; then
+                                        ocf_log crit \
+                                    "Could not clean up mount point $mp"
+                                        ret=$FAIL
+                                fi
+
+                                if [ $try -gt 1 ]; then
+                                        kill -9 $pid
+                                else
+                                        kill -TERM $pid
+                                fi
+                        done < <(fuser -vm $mp | \
+                            grep -v PID | \
+                            sed 's;^'$mp';;' | \
+                            awk '{print $4,$2,$1}' | \
+                            sort -u -k 1,3)
+                fi
+        done
+
+        return $ret
+}
 
 #
 # startNFSFilesystem
@@ -498,8 +604,8 @@
        #
        if [ -n "$mp" ]; then
                case ${OCF_RESKEY_force_unmount} in
-               $YES_STR)       force_umount="-f" ;;
-               0)              force_umount="-f" ;;
+               $YES_STR)       force_umount="$YES" ;;
+               1)              force_umount="$YES" ;;
                *)              force_umount="" ;;
                esac
        fi
@@ -507,6 +613,7 @@
        #
        # Unmount
        #
+        while [ ! "$done" ]; do
        isMounted $fullpath $mp
        case $? in
        $NO)
@@ -519,26 +626,46 @@
                ;;
        $YES)
                sync; sync; sync
-               ocf_log info "unmounting $fullpath ($mp)"
+                        ocf_log info "unmounting $mp"
 
-               umount $force_umount $mp
+                        umount $mp
                if  [ $? -eq 0 ]; then
-                       return $SUCCESS
+                                umount_failed=
+                                done=$YES
+                                continue
                fi
 
                umount_failed=yes
 
+                        if [ "$force_umount" ]; then
+                                killMountProcesses $mp
+                        fi
+
+                        if [ $try -ge $max_tries ]; then
+                                done=$YES
+                        else
+                                sleep $sleep_time
+                                let try=try+1
+                        fi
                ;;
        *)
                return $FAIL
                ;;
        esac
 
+                if [ $try -ge $max_tries ]; then
+                        done=$YES
+                else
+                        sleep $sleep_time
+                        let try=try+1
+                fi
+        done # while
        if [ -n "$umount_failed" ]; then
                ocf_log err "'umount $fullpath' failed ($mp), error=$ret_val"
 
                return $FAIL
        fi
+
        return $SUCCESS
 }
 

Reply via email to