On 13/03/2008, Schmidt, Florian <[EMAIL PROTECTED]> wrote:
> Here's something to read about dopd:
>  
> http://blogs.linbit.com/florian/2007/10/01/an-underrated-cluster-admins-companion-dopd/

I read that and then used the instructions from
http://www.drbd.org/users-guide/s-heartbeat-dopd.html.

> These are 2 differnt things:
>  dopd is for outdating the secondaries DRBD-resources in case of DRBD 
> split-brain, which is not the same as Heartbeat split-brain. It only works, 
> if there are Heartbeat-lines which are different from DRBD-connection.
>
>  Pingd is for checking connectivity to other ping nodes (such as routers or 
> switches) and maybe to switch the resources in case that other cluster-nodes 
> have better connectivity

I'm using the heartbeat outdater at the moment. My setup is using two
nics. One to the local network and one with crossover cable to the
other node (the production machines will have 3 nics, 1
external/internet, 1 internal network and one with crossover). When I
take out the network cable from node1 it fences the secondary
partition (/dev/drbd1) on node1 correctly, but the primary partition
(/dev/drbd0) on node1 becomes split-brain. Node2 in the mean time
keeps its primary partition (/dev/drbd1) and changes /dev/drbd0 to
primary just fine. So I still land up having to manually intervene to
fix the split brain of /dev/drbd0 on node1 before I can bring it back
in. After the manual intervention, node1 takes back control of
/dev/drbd0 and nfs etc switchs nodes successfully.

I need node1 to outdate both partitions since it has lost connectivity
so that they both sync from node2 when node1 returns. Then I can check
node1 and let it take back it's primary drbd partition. (I'm still not
totally sure how to do this without just restarting heartbeat on
node1.)

I've attached my ha.cf, cib.xml and drbd.conf. If someone has a little
time to skim them and point out any errors. Besides the split brain
when network is lost the setup seems to be working nicely.

Thanks for all the help and patience from some of you guys so far.

Thanks for any further help.
Guy

-- 
Don't just do something...sit there!

Attachment: ha.cf
Description: Binary data

Attachment: drbd.conf
Description: Binary data

 <cib admin_epoch="0" epoch="5" have_quorum="false" ignore_dtd="false" num_peers="0" cib_feature_revision="1.3" generated="false" num_updates="583" cib-last-written="Thu Mar 13 10:55:12 2008">
   <configuration>
     <crm_config>
       <cluster_property_set id="cib-bootstrap-options">
         <attributes>
           <nvpair id="cib-bootstrap-options-symmetric-cluster" name="symmetric-cluster" value="true"/>
           <nvpair id="cib-bootstrap-options-no-quorum-policy" name="no-quorum-policy" value="stop"/>
           <nvpair id="cib-bootstrap-options-default-resource-stickiness" name="default-resource-stickiness" value="200"/>
           <nvpair id="cib-bootstrap-options-default-resource-failure-stickiness" name="default-resource-failure-stickiness" value="-200"/>
           <nvpair id="cib-bootstrap-options-stonith-enabled" name="stonith-enabled" value="false"/>
           <nvpair id="cib-bootstrap-options-stonith-action" name="stonith-action" value="reboot"/>
           <nvpair id="cib-bootstrap-options-stop-orphan-resources" name="stop-orphan-resources" value="true"/>
           <nvpair id="cib-bootstrap-options-stop-orphan-actions" name="stop-orphan-actions" value="true"/>
           <nvpair id="cib-bootstrap-options-remove-after-stop" name="remove-after-stop" value="false"/>
           <nvpair id="cib-bootstrap-options-short-resource-names" name="short-resource-names" value="true"/>
           <nvpair id="cib-bootstrap-options-transition-idle-timeout" name="transition-idle-timeout" value="5min"/>
           <nvpair id="cib-bootstrap-options-default-action-timeout" name="default-action-timeout" value="15s"/>
           <nvpair id="cib-bootstrap-options-is-managed-default" name="is-managed-default" value="true"/>
         </attributes>
       </cluster_property_set>
     </crm_config>
     <nodes>
       <node id="38f67b9a-1831-49d0-9d93-afb1071f6b8e" uname="drbd2" type="normal"/>
       <node id="b776756f-dc29-4b1d-83e7-1980b4aca7e0" uname="drbd1" type="normal"/>
     </nodes>
     <resources>
       <clone id="pingd-clone">
         <meta_attributes id="pingd-clone-ma">
           <attributes>
             <nvpair id="pingd-clone-1" name="globally_unique" value="false"/>
           </attributes>
         </meta_attributes>
         <primitive id="pingd-child" provider="heartbeat" class="ocf" type="pingd">
           <operations>
             <op id="pingd-child-monitor" name="monitor" interval="20s" timeout="60s" prereq="nothing"/>
             <op id="pingd-child-start" name="start" prereq="nothing"/>
           </operations>
           <instance_attributes id="pingd_inst_attr">
             <attributes>
               <nvpair id="pingd-1" name="dampen" value="5s"/>
               <nvpair id="pingd-2" name="multiplier" value="100"/>
             </attributes>
           </instance_attributes>
         </primitive>
       </clone>
       <master_slave id="ms-drbd0">
         <meta_attributes id="ma-ms-drbd0">
           <attributes>
             <nvpair id="ma-ms-drbd0-1" name="clone_max" value="2"/>
             <nvpair id="ma-ms-drbd0-2" name="clone_node_max" value="1"/>
             <nvpair id="ma-ms-drbd0-3" name="master_max" value="1"/>
             <nvpair id="ma-ms-drbd0-4" name="master_node_max" value="1"/>
             <nvpair id="ma-ms-drbd0-5" name="notify" value="yes"/>
             <nvpair id="ma-ms-drbd0-6" name="globally_unique" value="false"/>
             <nvpair id="ma-ms-drbd0-7" name="target_role" value="#default"/>
           </attributes>
         </meta_attributes>
         <primitive id="drbd0" class="ocf" provider="heartbeat" type="drbd">
           <instance_attributes id="ia-drbd0">
             <attributes>
               <nvpair id="ia-drbd0-1" name="drbd_resource" value="r0"/>
             </attributes>
           </instance_attributes>
         </primitive>
       </master_slave>
       <master_slave id="ms-drbd1">
         <meta_attributes id="ma-ms-drbd1">
           <attributes>
             <nvpair id="ma-ms-drbd1-1" name="clone_max" value="2"/>
             <nvpair id="ma-ms-drbd1-2" name="clone_node_max" value="1"/>
             <nvpair id="ma-ms-drbd1-3" name="master_max" value="1"/>
             <nvpair id="ma-ms-drbd1-4" name="master_node_max" value="1"/>
             <nvpair id="ma-ms-drbd1-5" name="notify" value="yes"/>
             <nvpair id="ma-ms-drbd1-6" name="globally_unique" value="false"/>
             <nvpair id="ma-ms-drbd1-7" name="target_role" value="#default"/>
           </attributes>
         </meta_attributes>
         <primitive id="drbd1" class="ocf" provider="heartbeat" type="drbd">
           <instance_attributes id="ia-drbd1">
             <attributes>
               <nvpair id="ia-drbd1-1" name="drbd_resource" value="r1"/>
             </attributes>
           </instance_attributes>
         </primitive>
       </master_slave>
       <group id="group_1">
         <primitive class="ocf" id="Filesystem_2" provider="heartbeat" type="Filesystem">
           <operations>
             <op id="Filesystem_2_mon" interval="120s" name="monitor" timeout="60s"/>
           </operations>
           <instance_attributes id="Filesystem_2_inst_attr">
             <attributes>
               <nvpair id="Filesystem_2_attr_0" name="device" value="/dev/drbd0"/>
               <nvpair id="Filesystem_2_attr_1" name="directory" value="/h.virt/store1"/>
               <nvpair id="Filesystem_2_attr_2" name="fstype" value="ext3"/>
             </attributes>
           </instance_attributes>
         </primitive>
         <primitive class="lsb" id="nfs-common_4" provider="heartbeat" type="nfs-common">
           <operations>
             <op id="nfs-common_4_mon" interval="120s" name="monitor" timeout="60s"/>
           </operations>
         </primitive>
         <primitive class="lsb" id="nfs-kernel-server_5" provider="heartbeat" type="nfs-kernel-server">
           <operations>
             <op id="nfs-kernel-server_5_mon" interval="120s" name="monitor" timeout="60s"/>
           </operations>
         </primitive>
         <primitive class="ocf" id="IPaddr_10_172_19_120" provider="heartbeat" type="IPaddr">
           <operations>
             <op id="IPaddr_10_172_19_120_mon" interval="5s" name="monitor" timeout="5s"/>
           </operations>
           <instance_attributes id="IPaddr_10_172_19_120_inst_attr">
             <attributes>
               <nvpair id="IPaddr_10_172_19_120_attr_0" name="ip" value="10.172.19.120"/>
               <nvpair id="IPaddr_10_172_19_120_attr_1" name="netmask" value="24"/>
               <nvpair id="IPaddr_10_172_19_120_attr_2" name="nic" value="eth0"/>
             </attributes>
           </instance_attributes>
         </primitive>
       </group>
       <group id="group_11">
         <primitive class="ocf" id="Filesystem_12" provider="heartbeat" type="Filesystem">
           <operations>
             <op id="Filesystem_12_mon" interval="120s" name="monitor" timeout="60s"/>
           </operations>
           <instance_attributes id="Filesystem_12_inst_attr">
             <attributes>
               <nvpair id="Filesystem_12_attr_0" name="device" value="/dev/drbd1"/>
               <nvpair id="Filesystem_12_attr_1" name="directory" value="/h.virt/store2"/>
               <nvpair id="Filesystem_12_attr_2" name="fstype" value="ext3"/>
             </attributes>
           </instance_attributes>
         </primitive>
         <primitive class="lsb" id="nfs-common_14" provider="heartbeat" type="nfs-common">
           <operations>
             <op id="nfs-common_14_mon" interval="120s" name="monitor" timeout="60s"/>
           </operations>
         </primitive>
         <primitive class="lsb" id="nfs-kernel-server_15" provider="heartbeat" type="nfs-kernel-server">
           <operations>
             <op id="nfs-kernel-server_15_mon" interval="120s" name="monitor" timeout="60s"/>
           </operations>
         </primitive>
         <primitive class="ocf" id="IPaddr_10_172_19_123" provider="heartbeat" type="IPaddr">
           <operations>
             <op id="IPaddr_10_172_19_123_mon" interval="5s" name="monitor" timeout="5s"/>
           </operations>
           <instance_attributes id="IPaddr_10_172_19_123_inst_attr">
             <attributes>
               <nvpair id="IPaddr_10_172_19_123_attr_0" name="ip" value="10.172.19.123"/>
               <nvpair id="IPaddr_10_172_19_123_attr_1" name="netmask" value="24"/>
               <nvpair id="IPaddr_10_172_19_123_attr_2" name="nic" value="eth0"/>
             </attributes>
           </instance_attributes>
         </primitive>
       </group>
     </resources>
     <constraints>
       <rsc_location id="loc:r0_likes_drbd1" rsc="ms-drbd0">
         <rule id="rule:r0_likes_drbd1" role="master" score="100">
           <expression attribute="#uname" operation="eq" value="drbd1" id="4e72c134-8f56-49bd-b3af-b3fde396d83b"/>
         </rule>
       </rsc_location>
       <rsc_location id="loc:r1_likes_drbd2" rsc="ms-drbd1">
         <rule id="rule:r1_likes_drbd2" role="master" score="100">
           <expression attribute="#uname" operation="eq" value="drbd2" id="5b8cc51c-00cc-4aee-b3a7-3c5598ffa41c"/>
         </rule>
       </rsc_location>
       <rsc_location id="ms-drbd0:connected" rsc="ms-drbd0">
         <rule id="ms-drbd0:connected:rule" score_attribute="pingd-clone">
           <expression id="ms-drbd0:connected:expr:defined" attribute="pingd-clone" operation="defined"/>
         </rule>
       </rsc_location>
       <rsc_location id="ms-drbd1:connected" rsc="ms-drbd1">
         <rule id="ms-drbd1:connected:rule" score_attribute="pingd-clone">
           <expression id="ms-drbd1:connected:expr:defined" attribute="pingd-clone" operation="defined"/>
         </rule>
       </rsc_location>
       <rsc_colocation id="group_1_on_r0_stopped" to="ms-drbd0" to_role="stopped" from="group_1" score="-infinity"/>
       <rsc_colocation id="group_1_on_r0_slave" to="ms-drbd0" to_role="slave" from="group_1" score="-infinity"/>
       <rsc_colocation id="group_1_on_r0_master" to="ms-drbd0" to_role="master" from="group_1" score="infinity"/>
       <rsc_order id="r0_before_group_1" from="group_1" action="start" to="ms-drbd0" to_action="promote"/>
       <rsc_colocation id="group_11_on_r0_stopped" to="ms-drbd1" to_role="stopped" from="group_11" score="-infinity"/>
       <rsc_colocation id="group_11_on_r0_slave" to="ms-drbd1" to_role="slave" from="group_11" score="-infinity"/>
       <rsc_colocation id="group_11_on_r0_master" to="ms-drbd1" to_role="master" from="group_11" score="infinity"/>
       <rsc_order id="r1_before_group_11" from="group_11" action="start" to="ms-drbd1" to_action="promote"/>
     </constraints>
   </configuration>
 </cib>
_______________________________________________
Linux-HA mailing list
[email protected]
http://lists.linux-ha.org/mailman/listinfo/linux-ha
See also: http://linux-ha.org/ReportingProblems

Reply via email to