Le Fri, 26 Jun 2009 14:17:36 -0500,
David Hoskinson <[email protected]> a écrit :

> On the primary it shows standalone
> Primary/Unknown, and on the secondary it shows standalone and
> Secondary/Unknown

Does the drbd log mention split brain ?

With recent drbd version (8.2.X), you can specify policies to solve
split brain in drbd configuration file automagically. 
 
> First of all how do I get these to link back up? 

It really look likes drbd split brain, you can resolve it manually
(see drbd.org documentation about it). 

> Last test I had to
> redo make the md which took 4 hours and I would prefer not to do that.

hu ?
 
> Second why is this happening in the first place and what can I do
> about it. I am so close.  Let me know what I can provide to help.
 
> <cib validate-with="pacemaker-1.0" crm_feature_set="3.0.1"
> have-quorum="1" admin_epoch="0" epoch="167" num_updates="0"
> cib-last-written="Fri Jun 26 14:03:04 2009" dc-uuid="mail1">
>   <configuration>
>     <crm_config>
>       <cluster_property_set id="cib-bootstrap-options">
>         <nvpair id="cib-bootstrap-options-dc-version"
> name="dc-version"
> value="1.0.4-6dede86d6105786af3a5321ccf66b44b6914f0aa"/> <nvpair
> id="cib-bootstrap-options-cluster-infrastructure"
> name="cluster-infrastructure" value="openais"/> <nvpair
> id="cib-bootstrap-options-expected-quorum-votes"
> name="expected-quorum-votes" value="2"/> <nvpair
> id="cib-bootstrap-options-last-lrm-refresh" name="last-lrm-refresh"
> value="1245863799"/> <nvpair
> id="cib-bootstrap-options-stonith-enabled" name="stonith-enabled"
> value="false"/> 

You should really fence a device for STONITH if you care about your
data.

><nvpair id="cib-bootstrap-options-no-quorom-policy"
> name="no-quorom-policy" value="stop"/>
>         <nvpair id="cib-bootstrap-options-start-failure-is-fatal"
> name="start-failure-is-fatal" value="false"/>
>         <nvpair id="cib-bootstrap-options-stonith-action"
> name="stonith-action" value="reboot"/>
>         <nvpair id="cib-bootstrap-options-no-quorum-policy"
> name="no-quorum-policy" value="ignore"/>
>       </cluster_property_set>
>     </crm_config>
>     <nodes>
>       <node id="mail1" uname="mail1" type="normal"/>
>       <node id="mail2" uname="" type="normal"/>

There a pb with mail2 hostname ... 

>     </nodes>
>     <resources>
>       <master id="ms-drbd0">
>         <meta_attributes id="ms-drbd0-meta_attributes">
>           <nvpair id="ms-drbd0-meta_attributes-clone-max"
> name="clone-max" value="2"/>
>           <nvpair id="ms-drbd0-meta_attributes-notify" name="notify"
> value="true"/>
>           <nvpair id="ms-drbd0-meta_attributes-globally-unique"
> name="globally-unique" value="false"/>
>           <nvpair id="ms-drbd0-meta_attributes-target-role"
> name="target-role" value="Started"/>
>         </meta_attributes>
>         <primitive class="ocf" id="drbd0" provider="heartbeat"
> type="drbd"> <instance_attributes id="drbd0-instance_attributes">
>             <nvpair id="drbd0-instance_attributes-drbd_resource"
> name="drbd_resource" value="r0"/>
>           </instance_attributes>
>           <operations>
>           <operations>
>             <op id="drbd0-monitor-59s" interval="59s" name="monitor"
> role="Master" timeout="30s"/>
>             <op id="drbd0-monitor-60s" interval="60s" name="monitor"
> role="Slave" timeout="30s"/>
>           </operations>
>           <meta_attributes id="drbd0-meta_attributes">
>             <nvpair id="drbd0-meta_attributes-target-role"
> name="target-role" value="Started"/>
>           </meta_attributes>
>         </primitive>
>       </master>
>       <group id="mail-group">
>         <primitive class="ocf" id="fs0" provider="heartbeat"
> type="Filesystem">
>           <instance_attributes id="fs0-instance_attributes">
>             <nvpair id="fs0-instance_attributes-fstype" name="fstype"
> value="ext3"/>
>             <nvpair id="fs0-instance_attributes-directory"
> name="directory" value="/shared"/>
>             <nvpair id="fs0-instance_attributes-device" name="device"
> value="/dev/drbd0"/>
>           </instance_attributes>
>           <meta_attributes id="fs0-meta_attributes">
>             <nvpair id="fs0-meta_attributes-target-role"
> name="target-role" value="Started"/>
>           </meta_attributes>
>         </primitive>
>         <primitive class="ocf" id="virtual-ip" provider="heartbeat"
> type="IPaddr2">
>           <instance_attributes id="virtual-ip-instance_attributes">
>             <nvpair id="virtual-ip-instance_attributes-ip" name="ip"
> value="128.255.22.17"/>
>             <nvpair id="virtual-ip-instance_attributes-broadcast"
> name="broadcast" value="128.255.23.255"/>
>             <nvpair id="virtual-ip-instance_attributes-nic" name="nic"
> value="eth1:0"/>

No need to use a NIC alias ... 

>             <nvpair id="virtual-ip-instance_attributes-cidr_netmask"
> name="cidr_netmask" value="23"/>
>           </instance_attributes>
>           <operations>
>             <op id="virtual-ip-monitor-21s" interval="21s"
> name="monitor" timeout="5s"/>
>           </operations>
>         </primitive>
>         <primitive class="lsb" id="postfix" type="postfix">
>           <operations>
>             <op id="postfix-monitor-30s" interval="30s" name="monitor"
> timeout="30s"/>
>           </operations>
>         </primitive>
>         <primitive class="lsb" id="spamassassin" type="spamassassin">
>           <operations>
>             <op id="spamassassin-monitor-30s" interval="30s"
> name="monitor" timeout="30s"/>
>           </operations>
>         </primitive>
>         <primitive class="lsb" id="dovecot" type="dovecot">
>           <operations>
>             <op id="dovecot-monitor-30s" interval="30s" name="monitor"
> timeout="30s"/>
>           </operations>
>         </primitive>
>         <primitive class="lsb" id="amavisd" type="amavisd">
>           <operations>
>             <op id="amavisd-start-0" interval="0" name="start"
> timeout="45s"/>
>           </operations>
>         </primitive>
>         <primitive class="lsb" id="clamd" type="clamd">
>           <operations>
>             <op id="clamd-monitor-30s" interval="30s" name="monitor"
> timeout="30s"/>
>           </operations>
>         </primitive>
>         <primitive class="ocf" id="webserver" provider="heartbeat"
> type="apache">
>           <instance_attributes id="webserver-instance_attributes">
>             <nvpair id="webserver-instance_attributes-configfile"
> name="configfile" value="/etc/httpd/conf/httpd.conf"/>
>             <nvpair id="webserver-instance_attributes-httpd"
> name="httpd" value="/usr/sbin/httpd"/>
>             <nvpair id="webserver-instance_attributes-port"
> name="port" value="80"/>
>           </instance_attributes>
>           <operations>
>             <op id="webserver-monitor-30s" interval="30s"
> name="monitor" timeout="30s"/>
>           </operations>
>         </primitive>
>       </group>
>       <clone id="stonith-clone">
>         <primitive class="stonith" id="ssh-stonith" type="ssh">
>           <instance_attributes id="ssh-stonith-instance_attributes">
>             <nvpair id="ssh-stonith-instance_attributes-hostlist"
> name="hostlist" value="mail1 mail2"/>
>           </instance_attributes>
>           <operations>
>             <op id="ssh-stonith-monitor-1h" interval="1h"
> name="monitor"/> </operations>

Is this the box hostnames on the crossover link between the nodes ?
Anyway, stonith is disabled.  

>         </primitive>
>       </clone>
>     </resources>
>     <constraints>
>       <rsc_location id="ms-drbd0-master-on-mail1" rsc="ms-drbd0">
>         <rule id="ms-drbd0-master-on-mail1-rule" role="master"
> score="100"> <expression attribute="#uname"
> id="ms-drbd0-master-on-mail1-expression" operation="eq"
> value="mail1"/> </rule>
>       </rsc_location>
>       <rsc_colocation id="mail-group-on-ms-drbd0" rsc="mail-group"
> score="INFINITY" with-rsc="ms-drbd0" with-rsc-role="Master"/>
>       <rsc_order first="ms-drbd0" first-action="promote"
> id="ms-drbd0-before-mail-group" score="INFINITY" then="mail-group"
> then-action="start"/>
>     </constraints>
>     <rsc_defaults/>
>     <op_defaults/>
>   </configuration>
> </cib>
> 



-- 
Jérôme Benoit aka fraggle
La Météo du Net - http://grenouille.com
OpenPGP Key ID : 9FE9161D
Key fingerprint : 9CA4 0249 AF57 A35B 34B3 AC15 FAA0 CB50 9FE9 161D

Attachment: pgpew6OMwOwpO.pgp
Description: PGP signature

_______________________________________________
Linux-HA mailing list
[email protected]
http://lists.linux-ha.org/mailman/listinfo/linux-ha
See also: http://linux-ha.org/ReportingProblems

Reply via email to