On 13/03/2008, Schmidt, Florian <[EMAIL PROTECTED]> wrote: > Here's something to read about dopd: > > http://blogs.linbit.com/florian/2007/10/01/an-underrated-cluster-admins-companion-dopd/
I read that and then used the instructions from http://www.drbd.org/users-guide/s-heartbeat-dopd.html. > These are 2 differnt things: > dopd is for outdating the secondaries DRBD-resources in case of DRBD > split-brain, which is not the same as Heartbeat split-brain. It only works, > if there are Heartbeat-lines which are different from DRBD-connection. > > Pingd is for checking connectivity to other ping nodes (such as routers or > switches) and maybe to switch the resources in case that other cluster-nodes > have better connectivity I'm using the heartbeat outdater at the moment. My setup is using two nics. One to the local network and one with crossover cable to the other node (the production machines will have 3 nics, 1 external/internet, 1 internal network and one with crossover). When I take out the network cable from node1 it fences the secondary partition (/dev/drbd1) on node1 correctly, but the primary partition (/dev/drbd0) on node1 becomes split-brain. Node2 in the mean time keeps its primary partition (/dev/drbd1) and changes /dev/drbd0 to primary just fine. So I still land up having to manually intervene to fix the split brain of /dev/drbd0 on node1 before I can bring it back in. After the manual intervention, node1 takes back control of /dev/drbd0 and nfs etc switchs nodes successfully. I need node1 to outdate both partitions since it has lost connectivity so that they both sync from node2 when node1 returns. Then I can check node1 and let it take back it's primary drbd partition. (I'm still not totally sure how to do this without just restarting heartbeat on node1.) I've attached my ha.cf, cib.xml and drbd.conf. If someone has a little time to skim them and point out any errors. Besides the split brain when network is lost the setup seems to be working nicely. Thanks for all the help and patience from some of you guys so far. Thanks for any further help. Guy -- Don't just do something...sit there!
ha.cf
Description: Binary data
drbd.conf
Description: Binary data
<cib admin_epoch="0" epoch="5" have_quorum="false" ignore_dtd="false" num_peers="0" cib_feature_revision="1.3" generated="false" num_updates="583" cib-last-written="Thu Mar 13 10:55:12 2008">
<configuration>
<crm_config>
<cluster_property_set id="cib-bootstrap-options">
<attributes>
<nvpair id="cib-bootstrap-options-symmetric-cluster" name="symmetric-cluster" value="true"/>
<nvpair id="cib-bootstrap-options-no-quorum-policy" name="no-quorum-policy" value="stop"/>
<nvpair id="cib-bootstrap-options-default-resource-stickiness" name="default-resource-stickiness" value="200"/>
<nvpair id="cib-bootstrap-options-default-resource-failure-stickiness" name="default-resource-failure-stickiness" value="-200"/>
<nvpair id="cib-bootstrap-options-stonith-enabled" name="stonith-enabled" value="false"/>
<nvpair id="cib-bootstrap-options-stonith-action" name="stonith-action" value="reboot"/>
<nvpair id="cib-bootstrap-options-stop-orphan-resources" name="stop-orphan-resources" value="true"/>
<nvpair id="cib-bootstrap-options-stop-orphan-actions" name="stop-orphan-actions" value="true"/>
<nvpair id="cib-bootstrap-options-remove-after-stop" name="remove-after-stop" value="false"/>
<nvpair id="cib-bootstrap-options-short-resource-names" name="short-resource-names" value="true"/>
<nvpair id="cib-bootstrap-options-transition-idle-timeout" name="transition-idle-timeout" value="5min"/>
<nvpair id="cib-bootstrap-options-default-action-timeout" name="default-action-timeout" value="15s"/>
<nvpair id="cib-bootstrap-options-is-managed-default" name="is-managed-default" value="true"/>
</attributes>
</cluster_property_set>
</crm_config>
<nodes>
<node id="38f67b9a-1831-49d0-9d93-afb1071f6b8e" uname="drbd2" type="normal"/>
<node id="b776756f-dc29-4b1d-83e7-1980b4aca7e0" uname="drbd1" type="normal"/>
</nodes>
<resources>
<clone id="pingd-clone">
<meta_attributes id="pingd-clone-ma">
<attributes>
<nvpair id="pingd-clone-1" name="globally_unique" value="false"/>
</attributes>
</meta_attributes>
<primitive id="pingd-child" provider="heartbeat" class="ocf" type="pingd">
<operations>
<op id="pingd-child-monitor" name="monitor" interval="20s" timeout="60s" prereq="nothing"/>
<op id="pingd-child-start" name="start" prereq="nothing"/>
</operations>
<instance_attributes id="pingd_inst_attr">
<attributes>
<nvpair id="pingd-1" name="dampen" value="5s"/>
<nvpair id="pingd-2" name="multiplier" value="100"/>
</attributes>
</instance_attributes>
</primitive>
</clone>
<master_slave id="ms-drbd0">
<meta_attributes id="ma-ms-drbd0">
<attributes>
<nvpair id="ma-ms-drbd0-1" name="clone_max" value="2"/>
<nvpair id="ma-ms-drbd0-2" name="clone_node_max" value="1"/>
<nvpair id="ma-ms-drbd0-3" name="master_max" value="1"/>
<nvpair id="ma-ms-drbd0-4" name="master_node_max" value="1"/>
<nvpair id="ma-ms-drbd0-5" name="notify" value="yes"/>
<nvpair id="ma-ms-drbd0-6" name="globally_unique" value="false"/>
<nvpair id="ma-ms-drbd0-7" name="target_role" value="#default"/>
</attributes>
</meta_attributes>
<primitive id="drbd0" class="ocf" provider="heartbeat" type="drbd">
<instance_attributes id="ia-drbd0">
<attributes>
<nvpair id="ia-drbd0-1" name="drbd_resource" value="r0"/>
</attributes>
</instance_attributes>
</primitive>
</master_slave>
<master_slave id="ms-drbd1">
<meta_attributes id="ma-ms-drbd1">
<attributes>
<nvpair id="ma-ms-drbd1-1" name="clone_max" value="2"/>
<nvpair id="ma-ms-drbd1-2" name="clone_node_max" value="1"/>
<nvpair id="ma-ms-drbd1-3" name="master_max" value="1"/>
<nvpair id="ma-ms-drbd1-4" name="master_node_max" value="1"/>
<nvpair id="ma-ms-drbd1-5" name="notify" value="yes"/>
<nvpair id="ma-ms-drbd1-6" name="globally_unique" value="false"/>
<nvpair id="ma-ms-drbd1-7" name="target_role" value="#default"/>
</attributes>
</meta_attributes>
<primitive id="drbd1" class="ocf" provider="heartbeat" type="drbd">
<instance_attributes id="ia-drbd1">
<attributes>
<nvpair id="ia-drbd1-1" name="drbd_resource" value="r1"/>
</attributes>
</instance_attributes>
</primitive>
</master_slave>
<group id="group_1">
<primitive class="ocf" id="Filesystem_2" provider="heartbeat" type="Filesystem">
<operations>
<op id="Filesystem_2_mon" interval="120s" name="monitor" timeout="60s"/>
</operations>
<instance_attributes id="Filesystem_2_inst_attr">
<attributes>
<nvpair id="Filesystem_2_attr_0" name="device" value="/dev/drbd0"/>
<nvpair id="Filesystem_2_attr_1" name="directory" value="/h.virt/store1"/>
<nvpair id="Filesystem_2_attr_2" name="fstype" value="ext3"/>
</attributes>
</instance_attributes>
</primitive>
<primitive class="lsb" id="nfs-common_4" provider="heartbeat" type="nfs-common">
<operations>
<op id="nfs-common_4_mon" interval="120s" name="monitor" timeout="60s"/>
</operations>
</primitive>
<primitive class="lsb" id="nfs-kernel-server_5" provider="heartbeat" type="nfs-kernel-server">
<operations>
<op id="nfs-kernel-server_5_mon" interval="120s" name="monitor" timeout="60s"/>
</operations>
</primitive>
<primitive class="ocf" id="IPaddr_10_172_19_120" provider="heartbeat" type="IPaddr">
<operations>
<op id="IPaddr_10_172_19_120_mon" interval="5s" name="monitor" timeout="5s"/>
</operations>
<instance_attributes id="IPaddr_10_172_19_120_inst_attr">
<attributes>
<nvpair id="IPaddr_10_172_19_120_attr_0" name="ip" value="10.172.19.120"/>
<nvpair id="IPaddr_10_172_19_120_attr_1" name="netmask" value="24"/>
<nvpair id="IPaddr_10_172_19_120_attr_2" name="nic" value="eth0"/>
</attributes>
</instance_attributes>
</primitive>
</group>
<group id="group_11">
<primitive class="ocf" id="Filesystem_12" provider="heartbeat" type="Filesystem">
<operations>
<op id="Filesystem_12_mon" interval="120s" name="monitor" timeout="60s"/>
</operations>
<instance_attributes id="Filesystem_12_inst_attr">
<attributes>
<nvpair id="Filesystem_12_attr_0" name="device" value="/dev/drbd1"/>
<nvpair id="Filesystem_12_attr_1" name="directory" value="/h.virt/store2"/>
<nvpair id="Filesystem_12_attr_2" name="fstype" value="ext3"/>
</attributes>
</instance_attributes>
</primitive>
<primitive class="lsb" id="nfs-common_14" provider="heartbeat" type="nfs-common">
<operations>
<op id="nfs-common_14_mon" interval="120s" name="monitor" timeout="60s"/>
</operations>
</primitive>
<primitive class="lsb" id="nfs-kernel-server_15" provider="heartbeat" type="nfs-kernel-server">
<operations>
<op id="nfs-kernel-server_15_mon" interval="120s" name="monitor" timeout="60s"/>
</operations>
</primitive>
<primitive class="ocf" id="IPaddr_10_172_19_123" provider="heartbeat" type="IPaddr">
<operations>
<op id="IPaddr_10_172_19_123_mon" interval="5s" name="monitor" timeout="5s"/>
</operations>
<instance_attributes id="IPaddr_10_172_19_123_inst_attr">
<attributes>
<nvpair id="IPaddr_10_172_19_123_attr_0" name="ip" value="10.172.19.123"/>
<nvpair id="IPaddr_10_172_19_123_attr_1" name="netmask" value="24"/>
<nvpair id="IPaddr_10_172_19_123_attr_2" name="nic" value="eth0"/>
</attributes>
</instance_attributes>
</primitive>
</group>
</resources>
<constraints>
<rsc_location id="loc:r0_likes_drbd1" rsc="ms-drbd0">
<rule id="rule:r0_likes_drbd1" role="master" score="100">
<expression attribute="#uname" operation="eq" value="drbd1" id="4e72c134-8f56-49bd-b3af-b3fde396d83b"/>
</rule>
</rsc_location>
<rsc_location id="loc:r1_likes_drbd2" rsc="ms-drbd1">
<rule id="rule:r1_likes_drbd2" role="master" score="100">
<expression attribute="#uname" operation="eq" value="drbd2" id="5b8cc51c-00cc-4aee-b3a7-3c5598ffa41c"/>
</rule>
</rsc_location>
<rsc_location id="ms-drbd0:connected" rsc="ms-drbd0">
<rule id="ms-drbd0:connected:rule" score_attribute="pingd-clone">
<expression id="ms-drbd0:connected:expr:defined" attribute="pingd-clone" operation="defined"/>
</rule>
</rsc_location>
<rsc_location id="ms-drbd1:connected" rsc="ms-drbd1">
<rule id="ms-drbd1:connected:rule" score_attribute="pingd-clone">
<expression id="ms-drbd1:connected:expr:defined" attribute="pingd-clone" operation="defined"/>
</rule>
</rsc_location>
<rsc_colocation id="group_1_on_r0_stopped" to="ms-drbd0" to_role="stopped" from="group_1" score="-infinity"/>
<rsc_colocation id="group_1_on_r0_slave" to="ms-drbd0" to_role="slave" from="group_1" score="-infinity"/>
<rsc_colocation id="group_1_on_r0_master" to="ms-drbd0" to_role="master" from="group_1" score="infinity"/>
<rsc_order id="r0_before_group_1" from="group_1" action="start" to="ms-drbd0" to_action="promote"/>
<rsc_colocation id="group_11_on_r0_stopped" to="ms-drbd1" to_role="stopped" from="group_11" score="-infinity"/>
<rsc_colocation id="group_11_on_r0_slave" to="ms-drbd1" to_role="slave" from="group_11" score="-infinity"/>
<rsc_colocation id="group_11_on_r0_master" to="ms-drbd1" to_role="master" from="group_11" score="infinity"/>
<rsc_order id="r1_before_group_11" from="group_11" action="start" to="ms-drbd1" to_action="promote"/>
</constraints>
</configuration>
</cib>
_______________________________________________ Linux-HA mailing list [email protected] http://lists.linux-ha.org/mailman/listinfo/linux-ha See also: http://linux-ha.org/ReportingProblems
