Hi, I'm trying to setup a NFS cluster based on Centos 5.4 on X86_64,
using:
[r...@nfs01 ~]# rpm -qa | grep heartbeat
heartbeat-3.0.0-34.1
heartbeat-3.0.0-34.1
[r...@nfs01 ~]# rpm -qa | grep pacemaker
pacemaker-1.0.5-5.2
pacemaker-libs-1.0.5-5.2
pacemaker-libs-1.0.5-5.2
pacemaker-1.0.5-5.2
[r...@nfs01 ~]# rpm -qa | grep openais
openais-0.80.6-8.el5_4.1
libopenais2-0.80.5-16.1
[r...@nfs01 ~]# rpm -qa | grep drbd
drbd83-8.3.2-6.el5_3
kmod-drbd83-8.3.2-6.el5_3
[r...@nfs01 ~]#
My config files:
[r...@nfs01 ~]# more /etc/drbd.conf
global {
usage-count no;
}
common {
protocol C;
}
resource nfsdata {
on nfs01.local {
device /dev/drbd0;
disk /dev/hda4;
address 192.168.25.200:7788;
meta-disk internal;
}
on nfs02.local {
device /dev/drbd0;
disk /dev/hda4;
address 192.168.25.201:7788;
meta-disk internal;
}
disk {
fencing resource-only;
on-io-error detach;
}
handlers {
# these handlers are necessary for drbd 8.3 + pacemaker compatibility
fence-peer "/usr/lib/drbd/crm-fence-peer.sh";
after-resync-target "/usr/lib/drbd/crm-unfence-peer.sh";
outdate-peer "/usr/lib64/heartbeat/drbd-peer-outdater";
}
net {
max-buffers 2048; #datablock buffers used before writing to disk.
}
syncer {
rate 40M;
}
}
[r...@nfs01 ~]# more /etc/ais/openais.conf
totem {
version: 2
token: 3000
token_retransmits_before_loss_const: 10
join: 60
consensus: 1500
vsftype: none
max_messages: 20
clear_node_high_bit: yes
secauth: on
threads: 0
rrp_mode: passive
interface {
ringnumber: 0
bindnetaddr: 192.168.25.0
mcastaddr: 239.96.1.1
mcastport: 5405
}
}
logging {
to_stderr: yes
debug: off
timestamp: on
to_file: no
to_syslog: yes
syslog_facility: daemon
}
amf {
mode: disabled
}
service {
ver: 0
name: pacemaker
use_mgmtd: yes
}
aisexec {
user: root
group: root
}
[r...@nfs01 ~]# crm configure show
node nfs01.local \
attributes standby="off"
node nfs02.local
primitive drbd0 ocf:heartbeat:drbd \
params drbd_resource="nfsdata" ignore_deprecation="true" \
op monitor interval="59s" role="Master" timeout="30s" \
op monitor interval="60s" role="Slave" timeout="30s"
primitive fs0 ocf:heartbeat:Filesystem \
params fstype="ext3" directory="/data" device="/dev/drbd0" \
meta target-role="Started"
primitive nfs0 ocf:heartbeat:nfsserver \
params nfs_init_script="/etc/init.d/nfs"
nfs_notify_cmd="/sbin/rpc.statd"
nfs_shared_infodir="/var/lib/nfs" nfs_ip="172.16.16.229" \
meta target-role="Started"
primitive pingd ocf:pacemaker:pingd \
params host_list="172.16.16.1" multiplier="100" \
op monitor interval="15s" timeout="5s"
primitive vip0 ocf:heartbeat:IPaddr \
params ip="172.16.16.229" broadcast="172.16.16.255" nic="eth0"
cidr_netmask="24" \
op monitor interval="21s" timeout="5s" \
meta target-role="Started"
group nfs-group fs0 vip0 nfs0
ms ms-drbd0 drbd0 \
meta clone-max="2" notify="true" globally-unique="false"
target-role="Started"
clone pingdclone pingd \
meta globally-unique="false"
location cli-prefer-nfs-group nfs-group \
rule $id="cli-prefer-rule-nfs-group" inf: #uname eq nfs01.local
location drbd-fence-by-handler-ms-drbd0 ms-drbd0 \
rule $id="drbd-fence-by-handler-rule-ms-drbd0" $role="Master" -inf:
#uname ne nfs02.local
location ms-drbd0-master-on-node1 ms-drbd0 \
rule $id="ms-drbd0-master-on-node1-rule" $role="master" 100: #uname eq
nfs01.local
location nfs-group-on-connected-node nfs-group \
rule $id="nfs-group-on-connected-node-rule" -inf: not_defined pingd or
pingd lte 0
colocation nfs-group-on-ms-drbd0 inf: nfs-group ms-drbd0:Master
order ms-drbd0-before-nfs-group inf: ms-drbd0:promote nfs-group:start
property $id="cib-bootstrap-options" \
dc-version="1.0.5-462f1569a43740667daf7b0f6b521742e9eb8fa7" \
cluster-infrastructure="openais" \
expected-quorum-votes="2" \
no-quorum-policy="ignore" \
stonith-enabled="false"
Everything seems fine (drbd-fence-by-handler-ms-drbd0 locations seems
right?), but if I reboot the primary node (nfs01), I notice that:
- centos is unable to umount cleanly the /data directory;
- after the reboot the cluster is in a split-brain situation.
I haven't found any help using centos 5.4 and a similar setup.
I used packages from:
[server_ha-clustering]
name=High Availability/Clustering server technologies (CentOS_5)
type=rpm-md
baseurl=http://download.opensuse.org/repositories/home:/hammer_pan:/ocfs2/RHEL_5/
gpgcheck=1
gpgkey=http://download.opensuse.org/repositories/home:/hammer_pan:/ocfs2/RHEL_5/repodata/repomd.xml.key
enabled=1
Could anyone help me?
--
d.
_______________________________________________
Linux-HA mailing list
[email protected]
http://lists.linux-ha.org/mailman/listinfo/linux-ha
See also: http://linux-ha.org/ReportingProblems