I thought I had it nailed but still no go.
I'm running a simple two-nodes Active/Passive, Debian/Etch cluster
with apache, mysql, heartbeat-2.1.3 and drbd-8.2.5 using mcast on the
primary NIC and bcast on secondary GigE interfaces which is also the
replication link for drbd. I also setup a serial link between the
nodes. I've setup dopd as per the drbd user guide and Florian's blog
and seems to work as documented when I 'ifconfig down eth1' or do
other nasty things with the x-over cable.
I can migrate the drbd master manually back and forth between the
nodes (feeble-0 and feeble-1) but if I crm_standy the master or
shutdown/pull the plug on the primary then secondary doesn't get
promoted and the drbd get split-brained and I must then manuallly
untangle the mess. My cib is obviously not correct but my brain is
having a hard time parsing the xml...any pointers please?
Log show after attempting a crm_standby:
pengine[5003]: 2008/03/19_16:55:58 info: unpack_nodes: Node feeble-1 is in
standby-mode
pengine[5003]: 2008/03/19_16:55:58 info: determine_online_status: Node feeble-1
is standby
pengine[5003]: 2008/03/19_16:55:58 info: determine_online_status: Node feeble-0
is online
pengine[5003]: 2008/03/19_16:55:58 WARN: unpack_rsc_op: Processing failed op
drbd_id:0_promote_0 on feeble-0: Error
pengine[5003]: 2008/03/19_16:55:58 notice: clone_print: Master/Slave Set:
ms-drbd_id
pengine[5003]: 2008/03/19_16:55:58 notice: native_print: drbd_id:0
(heartbeat::ocf:drbd): Master feeble-0 FAILED
pengine[5003]: 2008/03/19_16:55:58 notice: native_print: drbd_id:1
(heartbeat::ocf:drbd): Stopped
pengine[5003]: 2008/03/19_16:55:58 notice: native_print: fs_id
(heartbeat::ocf:Filesystem): Stopped
pengine[5003]: 2008/03/19_16:55:58 notice: native_print: ip_id
(heartbeat::ocf:IPaddr): Stopped
pengine[5003]: 2008/03/19_16:55:58 notice: native_print: mysql_id
(heartbeat::ocf:mysql): Stopped
pengine[5003]: 2008/03/19_16:55:58 notice: native_print: apache_id
(heartbeat::ocf:apache): Stopped
pengine[5003]: 2008/03/19_16:55:58 notice: native_print: email_id
(heartbeat::ocf:MailTo): Stopped
pengine[5003]: 2008/03/19_16:55:58 WARN: native_color: Resource drbd_id:1
cannot run anywhere
cib.xml resources and constraints sections:
<resources>
<master_slave id="ms-drbd_id">
<meta_attributes id="ma-ms-drbd1_id">
<attributes>
<nvpair id="ma-ms-drbd-1_id" name="clone_max" value="2"/>
<nvpair id="ma-ms-drbd-2_id" name="clone_node_max" value="1"/>
<nvpair id="ma-ms-drbd-3_id" name="master_max" value="1"/>
<nvpair id="ma-ms-drbd-4_id" name="master_node_max" value="1"/>
<nvpair id="ma-ms-drbd-5_id" name="notify" value="yes"/>
<nvpair id="ma-ms-drbd-6_id" name="globally_unique" value="false"/>
<nvpair id="ma-ms-drbd-7_id" name="target_role" value="started"/>
</attributes>
</meta_attributes>
<primitive id="drbd_id" class="ocf" provider="heartbeat" type="drbd">
<operations>
<op id="drbd-monitoring" interval="30s" name="monitor" timeout="15s"/>
</operations>
<instance_attributes id="ia-drbd_id">
<attributes>
<nvpair id="drdb-resource_id" name="drbd_resource" value="r0"/>
</attributes>
</instance_attributes>
</primitive>
</master_slave>
<primitive id="fs_id" class="ocf" provider="heartbeat" type="Filesystem">
<operations>
<op id="Filesystem_Monitoring" interval="10s" name="monitor"
timeout="30s"/>
</operations>
<instance_attributes id="ia-fs_id">
<attributes>
<nvpair id="ia-fs-1_id" name="fstype" value="ext3"/>
<nvpair id="ia-fs-2_id" name="directory" value="/export_www"/>
<nvpair id="ia-fs-3_id" name="device" value="/dev/drbd1"/>
</attributes>
</instance_attributes>
</primitive>
<primitive id="ip_id" class="ocf" provider="heartbeat" type="IPaddr">
<operations>
<op id="ip-monitoring" interval="10s" name="monitor" timeout="30s"/>
</operations>
<instance_attributes id="ia-ip_id">
<attributes>
<nvpair id="ip_id" name="ip" value="132.206.178.80"/>
</attributes>
</instance_attributes>
</primitive>
<primitive id="mysql_id" class="ocf" provider="heartbeat" type="mysql">
<operations>
<op id="mysql-monitoring" interval="10s" name="monitor" timeout="30s"/>
</operations>
</primitive>
<primitive id="apache_id" class="ocf" provider="heartbeat" type="apache">
<operations>
<op id="apache-monitoring" interval="10s" name="monitor" timeout="30s"/>
</operations>
</primitive>
<primitive id="email_id" class="ocf" provider="heartbeat" type="MailTo">
<instance_attributes id="ia-email_id">
<attributes>
<nvpair id="ia-email-1_id" name="subject" value="DRBD takeover"/>
<nvpair id="ia-email-2_id" name="email" value="root"/>
</attributes>
</instance_attributes>
</primitive>
</resources>
<constraints>
<rsc_colocation id="fs_on_drbd" from="fs_id" to="ms-drbd_id" to_role="master"
score="infinity"/>
<rsc_colocation id="ip_on_fs" from="ip_id" to="fs_id" score="infinity"/>
<rsc_colocation id="mysql_on_ip" from="mysql_id" to="ip_id" score="infinity"/>
<rsc_colocation id="apache_on_mysql" from="apache_id" to="mysql_id"
score="infinity"/>
<rsc_colocation id="email_on_apache" from="email_id" to="apache_id"
score="infinity"/>
<rsc_order id="drbd_before_fs" from="fs_id" to="ms-drbd_id" action="start"
to_action="promote"/>
<rsc_order id="ip_start" from="ip_id" to="fs_id" type="after"/>
<rsc_order id="mysql_start" from="mysql_id" to="ip_id" type="after"/>
<rsc_order id="apache_start" from="apache_id" to="mysql_id" type="after"/>
<rsc_order id="email_start" from="email_id" to="apache_id" type="after"/>
</constraints>
/etc/heartbeat/ha.cf
mcast eth0 239.0.0.1 694 1 0
bcast eth1
ping 132.206.178.1
baud 19200
serial /dev/ttyS0
node feeble-0 feeble-1
auto_failback off
use_logd on
respawn hacluster /usr/lib/heartbeat/dopd
apiauth dopd gid=haclient uid=hacluster
respawn root /usr/lib/heartbeat/pingd -m 100 -d 5s
TIA
jf
--
Jean-François Malouin, <Jean-Francois.Malouin @ bic.mni.mcgill.ca>
System/Network Administrator <http://www.bic.mni.mcgill.ca/~malin>
McConnell Brain Imaging Centre Voice: (514) 398-8924
Montréal Neurological Institute Fax: (514) 398-8948
3801 University Street, Room WB219 Montréal Québec H3A 2B4 Canada
<° ><
_______________________________________________
Linux-HA mailing list
[email protected]
http://lists.linux-ha.org/mailman/listinfo/linux-ha
See also: http://linux-ha.org/ReportingProblems