Send Linux-ha-cvs mailing list submissions to
[email protected]
To subscribe or unsubscribe via the World Wide Web, visit
http://lists.community.tummy.com/mailman/listinfo/linux-ha-cvs
or, via email, send a message with subject or body 'help' to
[EMAIL PROTECTED]
You can reach the person managing the list at
[EMAIL PROTECTED]
When replying, please edit your Subject line so it is more specific
than "Re: Contents of Linux-ha-cvs digest..."
Today's Topics:
1. Linux-HA CVS: mgmt by zhenh from
([email protected])
2. Linux-HA CVS: resources by lars from
([email protected])
----------------------------------------------------------------------
Message: 1
Date: Wed, 17 May 2006 21:31:43 -0600 (MDT)
From: [email protected]
Subject: [Linux-ha-cvs] Linux-HA CVS: mgmt by zhenh from
To: [EMAIL PROTECTED]
Message-ID: <[EMAIL PROTECTED]>
linux-ha CVS committal
Author : zhenh
Host :
Project : linux-ha
Module : mgmt
Dir : linux-ha/mgmt/client
Modified Files:
haclient.py.in
Log Message:
the data from lrm will not changed when cib changed. so add keep-in-cache
option for it
===================================================================
RCS file: /home/cvs/linux-ha/linux-ha/mgmt/client/haclient.py.in,v
retrieving revision 1.42
retrieving revision 1.43
diff -u -3 -r1.42 -r1.43
--- haclient.py.in 17 May 2006 03:07:42 -0000 1.42
+++ haclient.py.in 18 May 2006 03:31:43 -0000 1.43
@@ -1554,6 +1554,7 @@
username = None
password = None
cache = {}
+ no_update_cache = {}
io_tag = None
update_timer = -1
active_nodes = []
@@ -1564,12 +1565,16 @@
def cache_lookup(self, key) :
if self.cache.has_key(key) :
return self.cache[key]
+ if self.no_update_cache.has_key(key) :
+ return self.no_update_cache[key]
return None
- def cache_update(self, key, data) :
- if not self.cache.has_key(key) :
+ def cache_update(self, key, data, keep_in_cache = False) :
+ if not keep_in_cache :
self.cache[key] = data
-
+ else :
+ self.no_update_cache[key] = data
+
def cache_clear(self) :
self.cache.clear()
@@ -1629,12 +1634,12 @@
self.io_tag = gobject.io_add_watch(fd, gobject.IO_IN,
self.on_event)
return True
- def query(self, query) :
+ def query(self, query, keep_in_catch = False) :
result = self.cache_lookup(query)
if result != None :
return result
result = self.do_cmd(query)
- self.cache_update(query, result)
+ self.cache_update(query, result, keep_in_catch)
return result
def do_cmd(self, command) :
@@ -1830,7 +1835,7 @@
return self.split_attr_list(raw_params, param_attr_names)
def get_rsc_meta(self, rsc_class, rsc_type, rsc_provider) :
- lines = self.query("rsc_metadata\n%s\n%s\n%s"%(rsc_class,
rsc_type, rsc_provider))
+ lines = self.query("rsc_metadata\n%s\n%s\n%s"%(rsc_class,
rsc_type, rsc_provider),True)
if lines == None :
return None
meta_data = ""
@@ -1889,13 +1894,13 @@
return meta
def get_rsc_classes(self) :
- return self.query("rsc_classes");
+ return self.query("rsc_classes",True);
def get_rsc_types(self, rsc_class) :
- return self.query("rsc_types\n"+rsc_class)
+ return self.query("rsc_types\n"+rsc_class,True)
def get_rsc_providers(self, rsc_class, rsc_type) :
- provider = self.query("rsc_providers\n%s\n%s"%(rsc_class,
rsc_type))
+ provider = self.query("rsc_providers\n%s\n%s"%(rsc_class,
rsc_type),True)
if provider == [] :
return ["heartbeat"]
return provider
------------------------------
Message: 2
Date: Thu, 18 May 2006 09:27:12 -0600 (MDT)
From: [email protected]
Subject: [Linux-ha-cvs] Linux-HA CVS: resources by lars from
To: [EMAIL PROTECTED]
Message-ID: <[EMAIL PROTECTED]>
linux-ha CVS committal
Author : lars
Host :
Project : linux-ha
Module : resources
Dir : linux-ha/resources/OCF
Modified Files:
Filesystem.in
Log Message:
Put in first version of OCFS2 related changes.
===================================================================
RCS file: /home/cvs/linux-ha/linux-ha/resources/OCF/Filesystem.in,v
retrieving revision 1.17
retrieving revision 1.18
diff -u -3 -r1.17 -r1.18
--- Filesystem.in 10 Mar 2006 04:14:20 -0000 1.17
+++ Filesystem.in 18 May 2006 15:27:11 -0000 1.18
@@ -1,6 +1,6 @@
#!/bin/sh
#
-# $Id: Filesystem.in,v 1.17 2006/03/10 04:14:20 xunsun Exp $
+# $Id: Filesystem.in,v 1.18 2006/05/18 15:27:11 lars Exp $
#
# Support: [EMAIL PROTECTED]
# License: GNU General Public License (GPL)
@@ -95,7 +95,7 @@
usage() {
cat <<-EOT
usage: $0 {start|stop|status|monitor|validate-all|meta-data}
- $Id: Filesystem.in,v 1.17 2006/03/10 04:14:20 xunsun Exp $
+ $Id: Filesystem.in,v 1.18 2006/05/18 15:27:11 lars Exp $
EOT
}
@@ -143,11 +143,31 @@
<shortdesc lang="en">options</shortdesc>
<content type="string" default="" />
</parameter>
+
+<parameter name="ocfs2_cluster" unique="0">
+<longdesc lang="en">
+The name (UUID) of the OCFS2 cluster this filesystem is part of,
+iff this is an OCFS2 resource and there's more than one cluster. You
+should not need to specify this.
+</longdesc>
+<shortdesc lang="en">OCFS2 cluster name/UUID</shortdesc>
+<content type="string" default="" />
+</parameter>
+
+<parameter name="ocfs2_configfs" unique="0">
+<longdesc lang="en">
+Mountpoint of the cluster hierarchy below configfs. You should not
+need to specify this.
+</longdesc>
+<shortdesc lang="en">OCFS2 configfs root</shortdesc>
+<content type="string" default="" />
+</parameter>
</parameters>
<actions>
<action name="start" timeout="60" />
<action name="stop" timeout="60" />
+<action name="notify" timeout="60" />
<action name="status" depth="0" timeout="10" interval="10" start-delay="10" />
<action name="monitor" depth="0" timeout="10" interval="10" start-delay="10" />
<action name="validate-all" timeout="5" />
@@ -167,16 +187,10 @@
#
flushbufs() {
if
- [ "$BLOCKDEV" != "" -a -x "$BLOCKDEV" ]
+ [ "$BLOCKDEV" != "" -a -x "$BLOCKDEV" -a "$blockdevice" = "yes" ]
then
- case $1 in
- -*|[^/]*:/*|//[^/]*/*) # -U, -L options to mount, or NFS mount point,
- # or samba mount point
- ;;
- *) $BLOCKDEV --flushbufs $1
- return $?
- ;;
- esac
+ $BLOCKDEV --flushbufs $1
+ return $?
fi
return 0
@@ -187,6 +201,13 @@
#
Filesystem_start()
{
+ if [ "$FSTYPE" = "ocfs2" ] && [ -z "$OCFS2_DO_MOUNT" ]; then
+ # Sorry, start doesn't actually do anything here. Magic
+ # happens in Filesystem_notify; see the comment there.
+ ocf_log debug "$DEVICE: ocfs2 - skipping start."
+ return $OCF_SUCCESS
+ fi
+
# See if the device is already mounted.
Filesystem_status >/dev/null 2>&1
if [ $? -eq $OCF_SUCCESS ] ; then
@@ -195,6 +216,8 @@
fi
# Insert SCSI module
+ # TODO: This probably should go away. Why should the filesystem
+ # RA magically load a kernel module?
$MODPROBE scsi_hostadapter >/dev/null 2>&1
if [ -z $FSTYPE ]; then
@@ -221,7 +244,7 @@
if
case $FSTYPE in
- ext3|reiserfs|xfs|jfs|vfat|fat|nfs|cifs|smbfs) false;;
+ ext3|reiserfs|reiser4|nss|xfs|jfs|vfat|fat|nfs|cifs|smbfs|ocfs2)
false;;
*) true;;
esac
then
@@ -265,11 +288,154 @@
}
# end of Filesystem_start
+Filesystem_notify() {
+ # Process notifications; this is the essential glue level for
+ # giving user-space membership events to a cluster-aware
+ # filesystem. Right now, only OCFS2 is supported.
+ #
+ # We get notifications from hb2 that some operation (start or
+ # stop) has completed; we then (1) compare the list of nodes
+ # which are active in the fs membership with the list of nodes
+ # which hb2 wants to be participating and remove those which
+ # aren't supposed to be around. And vice-versa, (2) we add nodes
+ # which aren't yet members, but which hb2 _does_ want to be
+ # active.
+ #
+ # Eventually, if (3) we figure that we ourselves are on the list
+ # of nodes which weren't active yet, we initiate a mount
+ # operation.
+ #
+ # That's it.
+ #
+ # If you wonder why we don't process pre-notifications, or don't
+ # do anything in "start": pre-start doesn't help us, because we
+ # don't get it on the node just starting. pre-stop doesn't help
+ # us either, because we can't remove any nodes while still
+ # having the fs mounted. And because we can't mount w/o the
+ # membership populated, we have to wait for the post-start
+ # event.
+ #
+ # This approach _does_ have the advantage of being rather
+ # robust, I hope. We always re-sync the current membership with
+ # the expected membership.
+ #
+ # Note that this expects that the base cluster is already
+ # active; ie o2cb has been started and populated
+ # $OCFS2_CLUSTER_ROOT/node/ already. This can be achieved by
+ # simply having o2cb run on all nodes by the CRM too. This
+ # probably ought to be mentioned somewhere in the to be written
+ # documentation. ;-)
+ #
+
+ if [ "$FSTYPE" != "ocfs2" ]; then
+ # One of the cases which shouldn't occur; it should have
+ # been caught much earlier. Still, you know ...
+ ocf_log err "$DEVICE: Notification received for non-ocfs2
mount."
+ return $OCF_ERR_GENERIC
+ fi
+
+ local n_type="$OCF_RESKEY_notify_type"
+ local n_op="$OCF_RESKEY_notify_operation"
+ local n_active="$OCF_RESKEY_notify_active_uname"
+
+ ocf_log debug "$OCFS2_UUID - notify: $n_type for $n_op - active on
$n_active"
+
+ if [ "$n_type" != "post" ]; then
+ ocf_log debug "$OCFS2_UUID: ignoring pre-notify."
+ return $OCF_SUCCESS
+ fi
+
+ local n_myself=${HA_CURHOST:-$(uname -n | tr A-Z a-z)}
+ ocf_log debug "$OCFS2_UUID: I am node $n_myself."
+
+ case " $n_active " in
+ *" $n_myself "*) ;;
+ *) ocf_log err "$OCFS2_UUID: $n_myself (local) not on active list!"
+ return $OCF_ERR_GENERIC
+ ;;
+ esac
+
+ # (1)
+ if [ -d "$OCFS2_FS_ROOT" ]; then
+ entry_prefix=$OCFS2_FS_ROOT/
+ for entry in $OCFS2_FS_ROOT/* ; do
+ n_fs="${entry##$entry_prefix}"
+ ocf_log debug "$OCFS2_UUID: Found node $n_fs"
+ case " $n_active " in
+ *" $n_fs "*)
+ # Construct a list of nodes which are present
+ # already in the membership.
+ n_exists="$n_exists $n_fs"
+ ocf_log debug "$OCFS2_UUID: Keeping node: $n_fs"
+ ;;
+ *)
+ # Node is in the membership currently, but not on our
+ # active list. Must be removed.
+ if [ "$n_op" = "start" ]; then
+ ocf_log warn "$OCFS2_UUID: Removing nodes on
start"
+ fi
+ ocf_log info "$OCFS2_UUID: Removing dead node: $n_fs"
+ if rm -f $entry ; then
+ ocf_log debug "$OCFS2_UUID: Removal of $n_fs
ok."
+ else
+ ocf_log err "$OCFS2_UUID: Removal of $n_fs
failed!"
+ fi
+ ;;
+ esac
+ done
+ else
+ ocf_log info "$OCFS2_UUID: Doesn't exist yet, creating."
+ mkdir -p $OCFS2_UUID
+ fi
+
+ ocf_log debug "$OCFS2_UUID: Nodes which already exist: $n_exists"
+
+ # (2)
+ for entry in $n_active ; do
+ ocf_log debug "$OCFS2_UUID: Expected active node: $entry"
+ case " $n_exists " in
+ *" $entry "*)
+ ocf_log debug "$OCFS2_UUID: Already active: $entry"
+ ;;
+ *)
+ if [ "$n_op" = "stop" ]; then
+ ocf_log warn "$OCFS2_UUID: Adding nodes on stop"
+ fi
+ ocf_log info "$OCFS2_UUID: Activating node: $entry"
+ if ! ln -s $OCFS2_CLUSTER_ROOT/node/$entry
$OCFS2_UUID/$entry ; then
+ ocf_log err "$OCFS2_CLUSTER_ROOT/node/$entry:
failed to link"
+ # exit $OCF_ERR_GENERIC
+ fi
+
+ if [ "$entry" = "$n_myself" ]; then
+ OCFS2_DO_MOUNT=yes
+ ocf_log debug "$OCFS2_UUID: To be mounted."
+ fi
+ ;;
+ esac
+ done
+
+ # (3)
+ # For now, always unconditionally go ahead; we're here, so we
+ # should have the fs mounted. In theory, it should be fine to
+ # only do this when we're activating ourselves, but what if
+ # something went wrong, and we're in the membership but don't
+ # have the fs mounted? Can this happen? TODO
+ OCFS2_DO_MOUNT="yes"
+ if [ -n "$OCFS2_DO_MOUNT" ]; then
+ Filesystem_start
+ fi
+}
+
#
# STOP: Unmount the filesystem
#
Filesystem_stop()
{
+ # TODO: We actually need to free up anything mounted on top of
+ # us too, and clear nfs exports of ourselves; otherwise, our own
+ # unmount process may be blocked.
+
# See if the device is currently mounted
if
Filesystem_status >/dev/null 2>&1
@@ -309,6 +475,7 @@
DEV=`$MOUNT | grep "on $MOUNTPOINT " | cut -d' ' -f1`
# Unmount the filesystem
$UMOUNT $MOUNTPOINT
+ rc=$?
fi
if [ $? -ne 0 ] ; then
ocf_log err "Couldn't unmount $MOUNTPOINT"
@@ -319,7 +486,18 @@
: $MOUNTPOINT Not mounted. No problema!
fi
- return $?
+ # We'll never see the post-stop notification. We're gone now,
+ # have unmounted, and thus should remove the membership.
+ if [ "$FSTYPE" = "ocfs2" ]; then
+ if [ ! -d "$OCFS2_FS_ROOT" ]; then
+ ocf_log info "$OCFS2_FS_ROOT: Filesystem membership
already gone."
+ else
+ ocf_log info "$OCFS2_FS_ROOT: Removing membership
directory."
+ rm -rf $OCFS2_FS_ROOT/
+ fi
+ fi
+
+ return $rc
}
# end of Filesystem_stop
@@ -345,10 +523,14 @@
msg="$MOUNTPOINT is unmounted (stopped)"
fi
+ # TODO: For ocfs2, or other cluster filesystems, should we be
+ # checking connectivity to other nodes here, or the IO path to
+ # the storage?
+
case "$OP" in
status) ocf_log info "$msg";;
esac
- return $rc
+ return_master $rc
}
# end of Filesystem_status
@@ -383,6 +565,63 @@
return $OCF_SUCCESS
}
+ocfs2_init()
+{
+ # Check & initialize the OCFS2 specific variables.
+ if [ -z "$OCF_RESKEY_clone_max" ]; then
+ ocf_log err "ocfs2 must be run as a clone."
+ exit $OCF_ERR_GENERIC
+ fi
+
+ if [ $blockdevice = "no" ]; then
+ ocf_log err "$DEVICE: ocfs2 needs a block device instead."
+ exit $OCF_ERR_GENERIC
+ fi
+
+ for f in "$OCF_RESKEY_ocfs2_configfs" /sys/kernel/config/cluster
/configfs/cluster ; do
+ if [ -n "$f" -a -d "$f" ]; then
+ OCFS2_CONFIGFS="$f"
+ ocf_log debug "$OCFS2_CONFIGFS: used as configfs root."
+ break
+ fi
+ done
+ if [ ! -d "$OCFS2_CONFIGFS" ]; then
+ ocf_log err "ocfs2 needs configfs mounted."
+ exit $OCF_ERR_GENERIC
+ fi
+
+ OCFS2_UUID=$(mounted.ocfs2 -d $DEVICE|tail -1|awk '{print $3}'|tr -d --
-|tr a-z A-Z)
+ if [ -z "$OCFS2_UUID" ]; then
+ ocf_log err "$DEVICE: Could not determine ocfs2 UUID."
+ exit $OCF_ERR_GENERIC
+ fi
+
+ if [ -n "$OCF_RESKEY_ocfs2_cluster" ]; then
+ OCFS2_CLUSTER=$(echo $OCF_RESKEY_ocfs2_cluster | tr a-z A-Z)
+ else
+ OCFS2_CLUSTER=$(find /tmp -maxdepth 1 -mindepth 1 -type d 2>&1)
+ set -- $OCFS2_CLUSTER
+ local n="$#"
+ if [ $n -gt 1 ]; then
+ ocf_log err "$OCFS2_CLUSTER: several clusters found."
+ exit $OCF_ERR_GENERIC
+ fi
+ if [ $n -eq 0 ]; then
+ ocf_log err "$OCFS2_CONFIGFS: no clusters found."
+ exit $OCF_ERR_GENERIC
+ fi
+ fi
+ ocf_log debug "$DEVICE: using cluster $OCFS2_CLUSTER"
+
+ OCFS2_CLUSTER_ROOT="$OCFS2_CONFIGFS/$OCFS2_CLUSTER"
+ if [ ! -d "$OCFS2_CLUSTER_ROOT" ]; then
+ ocf_log err "$OCFS2_CLUSTER: Cluster doesn't exist. Maybe o2cb
hasn't been run?"
+ exit $OCF_ERR_GENERIC
+ fi
+
+ OCFS2_FS_ROOT=$OCFS2_CLUSTER_ROOT/heartbeat/$OCFS2_UUID
+}
+
# Check the arguments passed to this script
if
[ $# -ne 1 ]
@@ -428,6 +667,17 @@
;;
esac
+if [ "$FSTYPE" = "ocfs2" ]; then
+ ocfs2_init
+else
+ if [ -n "$OCF_RESKEY_clone_max" ]; then
+ ocf_log err "DANGER! $FSTYPE on $DEVICE is NOT cluster-aware!"
+ ocf_log err "DO NOT RUN IT AS A CLONE!"
+ ocf_log err "Politely refusing to proceed to avoid data
corruption."
+ exit $OCF_ERR_GENERIC
+ fi
+fi
+
# It is possible that OCF_RESKEY_directory has one or even multiple trailing
"/".
# But the output of `mount` and /proc/mounts do not.
if [ -z $OCF_RESKEY_directory ]; then
@@ -439,6 +689,8 @@
MOUNTPOINT=$(echo $OCF_RESKEY_directory | sed 's/\/*$//')
: ${MOUNTPOINT:=/}
# At this stage, $MOUNTPOINT does not contain trailing "/" unless it is "/"
+ # TODO: / mounted via Filesystem sounds dangerous. On stop, we'll
+ # kill the whole system. Is that a good idea?
fi
# Check to make sure the utilites are found
@@ -451,6 +703,8 @@
case $OP in
start) Filesystem_start
;;
+ notify) Filesystem_notify
+ ;;
stop) Filesystem_stop
;;
status|monitor) Filesystem_status
------------------------------
_______________________________________________
Linux-ha-cvs mailing list
[email protected]
http://lists.community.tummy.com/mailman/listinfo/linux-ha-cvs
End of Linux-ha-cvs Digest, Vol 30, Issue 56
********************************************