I have the system working well at this point.  Master starts up drbd and
services just fine, slave sits and waits patiently.  I can shut off slave
and it notices its offline and when slave restarts it shows as being online
again.  I can shutdown the master and drbd and services transfer to slave
and it becomes the master.  However when the original master comes back on
line and resumes its duties, the drbd device is not linked.  On the primary
it shows standalone Primary/Unknown, and on the secondary it shows
standalone and Secondary/Unknown

First of all how do I get these to link back up?  Last test I had to redo
make the md which took 4 hours and I would prefer not to do that.

Second why is this happening in the first place and what can I do about it.
I am so close.  Let me know what I can provide to help.


<cib validate-with="pacemaker-1.0" crm_feature_set="3.0.1" have-quorum="1"
admin_epoch="0" epoch="167" num_updates="0" cib-last-written="Fri Jun 26
14:03:04 2009" dc-uuid="mail1">
  <configuration>
    <crm_config>
      <cluster_property_set id="cib-bootstrap-options">
        <nvpair id="cib-bootstrap-options-dc-version" name="dc-version"
value="1.0.4-6dede86d6105786af3a5321ccf66b44b6914f0aa"/>
        <nvpair id="cib-bootstrap-options-cluster-infrastructure"
name="cluster-infrastructure" value="openais"/>
        <nvpair id="cib-bootstrap-options-expected-quorum-votes"
name="expected-quorum-votes" value="2"/>
        <nvpair id="cib-bootstrap-options-last-lrm-refresh"
name="last-lrm-refresh" value="1245863799"/>
        <nvpair id="cib-bootstrap-options-stonith-enabled"
name="stonith-enabled" value="false"/>
        <nvpair id="cib-bootstrap-options-no-quorom-policy"
name="no-quorom-policy" value="stop"/>
        <nvpair id="cib-bootstrap-options-start-failure-is-fatal"
name="start-failure-is-fatal" value="false"/>
        <nvpair id="cib-bootstrap-options-stonith-action"
name="stonith-action" value="reboot"/>
        <nvpair id="cib-bootstrap-options-no-quorum-policy"
name="no-quorum-policy" value="ignore"/>
      </cluster_property_set>
    </crm_config>
    <nodes>
      <node id="mail1" uname="mail1" type="normal"/>
      <node id="mail2" uname="" type="normal"/>
    </nodes>
    <resources>
      <master id="ms-drbd0">
        <meta_attributes id="ms-drbd0-meta_attributes">
          <nvpair id="ms-drbd0-meta_attributes-clone-max" name="clone-max"
value="2"/>
          <nvpair id="ms-drbd0-meta_attributes-notify" name="notify"
value="true"/>
          <nvpair id="ms-drbd0-meta_attributes-globally-unique"
name="globally-unique" value="false"/>
          <nvpair id="ms-drbd0-meta_attributes-target-role"
name="target-role" value="Started"/>
        </meta_attributes>
        <primitive class="ocf" id="drbd0" provider="heartbeat" type="drbd">
          <instance_attributes id="drbd0-instance_attributes">
            <nvpair id="drbd0-instance_attributes-drbd_resource"
name="drbd_resource" value="r0"/>
          </instance_attributes>
          <operations>
          <operations>
            <op id="drbd0-monitor-59s" interval="59s" name="monitor"
role="Master" timeout="30s"/>
            <op id="drbd0-monitor-60s" interval="60s" name="monitor"
role="Slave" timeout="30s"/>
          </operations>
          <meta_attributes id="drbd0-meta_attributes">
            <nvpair id="drbd0-meta_attributes-target-role"
name="target-role" value="Started"/>
          </meta_attributes>
        </primitive>
      </master>
      <group id="mail-group">
        <primitive class="ocf" id="fs0" provider="heartbeat"
type="Filesystem">
          <instance_attributes id="fs0-instance_attributes">
            <nvpair id="fs0-instance_attributes-fstype" name="fstype"
value="ext3"/>
            <nvpair id="fs0-instance_attributes-directory" name="directory"
value="/shared"/>
            <nvpair id="fs0-instance_attributes-device" name="device"
value="/dev/drbd0"/>
          </instance_attributes>
          <meta_attributes id="fs0-meta_attributes">
            <nvpair id="fs0-meta_attributes-target-role" name="target-role"
value="Started"/>
          </meta_attributes>
        </primitive>
        <primitive class="ocf" id="virtual-ip" provider="heartbeat"
type="IPaddr2">
          <instance_attributes id="virtual-ip-instance_attributes">
            <nvpair id="virtual-ip-instance_attributes-ip" name="ip"
value="128.255.22.17"/>
            <nvpair id="virtual-ip-instance_attributes-broadcast"
name="broadcast" value="128.255.23.255"/>
            <nvpair id="virtual-ip-instance_attributes-nic" name="nic"
value="eth1:0"/>
            <nvpair id="virtual-ip-instance_attributes-cidr_netmask"
name="cidr_netmask" value="23"/>
          </instance_attributes>
          <operations>
            <op id="virtual-ip-monitor-21s" interval="21s" name="monitor"
timeout="5s"/>
          </operations>
        </primitive>
        <primitive class="lsb" id="postfix" type="postfix">
          <operations>
            <op id="postfix-monitor-30s" interval="30s" name="monitor"
timeout="30s"/>
          </operations>
        </primitive>
        <primitive class="lsb" id="spamassassin" type="spamassassin">
          <operations>
            <op id="spamassassin-monitor-30s" interval="30s" name="monitor"
timeout="30s"/>
          </operations>
        </primitive>
        <primitive class="lsb" id="dovecot" type="dovecot">
          <operations>
            <op id="dovecot-monitor-30s" interval="30s" name="monitor"
timeout="30s"/>
          </operations>
        </primitive>
        <primitive class="lsb" id="amavisd" type="amavisd">
          <operations>
            <op id="amavisd-start-0" interval="0" name="start"
timeout="45s"/>
          </operations>
        </primitive>
        <primitive class="lsb" id="clamd" type="clamd">
          <operations>
            <op id="clamd-monitor-30s" interval="30s" name="monitor"
timeout="30s"/>
          </operations>
        </primitive>
        <primitive class="ocf" id="webserver" provider="heartbeat"
type="apache">
          <instance_attributes id="webserver-instance_attributes">
            <nvpair id="webserver-instance_attributes-configfile"
name="configfile" value="/etc/httpd/conf/httpd.conf"/>
            <nvpair id="webserver-instance_attributes-httpd" name="httpd"
value="/usr/sbin/httpd"/>
            <nvpair id="webserver-instance_attributes-port" name="port"
value="80"/>
          </instance_attributes>
          <operations>
            <op id="webserver-monitor-30s" interval="30s" name="monitor"
timeout="30s"/>
          </operations>
        </primitive>
      </group>
      <clone id="stonith-clone">
        <primitive class="stonith" id="ssh-stonith" type="ssh">
          <instance_attributes id="ssh-stonith-instance_attributes">
            <nvpair id="ssh-stonith-instance_attributes-hostlist"
name="hostlist" value="mail1 mail2"/>
          </instance_attributes>
          <operations>
            <op id="ssh-stonith-monitor-1h" interval="1h" name="monitor"/>
          </operations>
        </primitive>
      </clone>
    </resources>
    <constraints>
      <rsc_location id="ms-drbd0-master-on-mail1" rsc="ms-drbd0">
        <rule id="ms-drbd0-master-on-mail1-rule" role="master" score="100">
          <expression attribute="#uname"
id="ms-drbd0-master-on-mail1-expression" operation="eq" value="mail1"/>
        </rule>
      </rsc_location>
      <rsc_colocation id="mail-group-on-ms-drbd0" rsc="mail-group"
score="INFINITY" with-rsc="ms-drbd0" with-rsc-role="Master"/>
      <rsc_order first="ms-drbd0" first-action="promote"
id="ms-drbd0-before-mail-group" score="INFINITY" then="mail-group"
then-action="start"/>
    </constraints>
    <rsc_defaults/>
    <op_defaults/>
  </configuration>
</cib>

Would I be better off not switching back to the original machine?

Thanks for everybody for there help...


_______________________________________________
Linux-HA mailing list
[email protected]
http://lists.linux-ha.org/mailman/listinfo/linux-ha
See also: http://linux-ha.org/ReportingProblems

Reply via email to