[Linux-HA] DRBD / HAr2

Michael Judd Mon, 30 Mar 2009 07:29:32 -0700

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

Hi,


I'm using the ocf/heartbeat resource (Master/Slave OCF Resource Agent
for DRBD) with a ocf/heartbeat Filesystem  resource to mount the
filesystem which is ext3.

This can be mounted and unmounted on each node without hearbeat.

However, when I fail over a node, the new (acquiring) node appears to be
unable to mount the node with a message as follows:

crmd[2184]: 2009/03/27_14:08:02 info: do_lrm_rsc_op: Performing
op=FS_drbd0_start_0 key=41:15:2b5cddee-44fb-4857-93ff-69172171e2da)
lrmd[2181]: 2009/03/27_14:08:02 info: rsc:FS_drbd0: start
lrmd[2181]: 2009/03/27_14:08:03 info: RA output:
(DRBD_drbd0:1:notify:stdout) No set matching
id=master-9ed2c6ec-f548-4505-b332-e2e715083ac0 in status

crmd[2184]: 2009/03/27_14:08:03 info: process_lrm_event: LRM operation
DRBD_drbd0:1_notify_0 (call=19, rc=0) complete
Filesystem[4442]:       2009/03/27_14:08:03 INFO: Running start for /dev/drbd0
on /var/lib/pgsql
lrmd[2181]: 2009/03/27_14:08:03 info: RA output: (FS_drbd0:start:stderr)
/dev/drbd0: Wrong medium type

lrmd[2181]: 2009/03/27_14:08:03 info: RA output: (FS_drbd0:start:stderr)
mount: block device /dev/drbd0 is write-protected, mounting read-only

lrmd[2181]: 2009/03/27_14:08:03 info: RA output: (FS_drbd0:start:stderr)
mount: Wrong medium type

Filesystem[4442]:       2009/03/27_14:08:04 ERROR: Couldn't mount filesystem
/dev/drbd0 on /var/lib/pgsql
crmd[2184]: 2009/03/27_14:08:04 ERROR: process_lrm_event: LRM operation
FS_drbd0_start_0 (call=21, rc=1) Error unknown error
crmd[2184]: 2009/03/27_14:08:04 info: do_lrm_rsc_op: Performing
op=DRBD_drbd0:1_notify_0 key=54:15:2b5cddee-44fb-4857-93ff-69172171e2da)
lrmd[2181]: 2009/03/27_14:08:04 info: rsc:DRBD_drbd0:1: notify

If I perform a cleanup on the filesystem resource, then the service
comes up correctly.  It appears the drbd resource may not demoting the
old node correctly.  When mounting the device manually, I received an
error message saying that both nodes could not be primary.

Any ideas on how to solve this?

/etc/drbd.conf
global {
  usage-count yes;
}

resource drbd0 {
  protocol C;

  startup { wfc-timeout 0; # infinitely wait on resources during init
            degr-wfc-timeout 120; }
  disk    { on-io-error detach; }
  net     { timeout 60; connect-int 10; ping-int 10;
            max-buffers 2048; # data block buffers used before writing
to disk
            max-epoch-size 2048;
            shared-secret "C45hgtreeu!3z";
            ko-count 4; # Peer is dead is this count is exceeded
           }
  syncer  { rate 40M; } # sync

  on pgcluster1.weir.lan {
    device     /dev/drbd0;
    disk       /dev/hdb1;
    address    192.168.1.101:7788;
    meta-disk  internal;
  }
  on pgcluster2.weir.lan {
    device     /dev/drbd0;
    disk       /dev/hdb1;
    address    192.168.1.102:7788;
    meta-disk  internal;
  }

/var/lib/heartbeat/crm/cib.xml

 <cib generated="true" admin_epoch="0" have_quorum="true"
ignore_dtd="false" num_peers="2" cib_feature_revision="2.0"
ccm_transition="34" dc_uuid="9ed2c6ec-f548-4505-b332-e2e715083ac0"
epoch="161" num_updates="1" cib-last-written="Fri Mar 27 14:11:10 2009">
   <configuration>
     <crm_config>
       <cluster_property_set id="cib-bootstrap-options">
         <attributes>
           <nvpair id="cib-bootstrap-options-dc-version"
name="dc-version" value="2.1.3-node:
552305612591183b1628baa5bc6e903e0f1e26a3"/>
           <nvpair name="last-lrm-refresh"
id="cib-bootstrap-options-last-lrm-refresh" value="1238163065"/>
         </attributes>
       </cluster_property_set>
     </crm_config>
     <nodes>
       <node uname="pgcluster2.weir.lan" type="normal"
id="9ed2c6ec-f548-4505-b332-e2e715083ac0">
         <instance_attributes
id="nodes-9ed2c6ec-f548-4505-b332-e2e715083ac0">
           <attributes>
             <nvpair name="standby"
id="standby-9ed2c6ec-f548-4505-b332-e2e715083ac0" value="off"/>
           </attributes>
         </instance_attributes>
       </node>
       <node id="a28b1a3d-b34b-4791-b91c-ad77c7ff428c"
uname="pgcluster1.weir.lan" type="normal">
         <instance_attributes
id="nodes-a28b1a3d-b34b-4791-b91c-ad77c7ff428c">
           <attributes>
             <nvpair id="standby-a28b1a3d-b34b-4791-b91c-ad77c7ff428c"
name="standby" value="off"/>
           </attributes>
         </instance_attributes>
       </node>
     </nodes>
     <resources>
       <master_slave id="ms-drbd0">
         <meta_attributes id="ms-drbd0_meta_attrs">
           <attributes>
             <nvpair id="ms-drbd0_metaattr_target_role"
name="target_role" value="started"/>
             <nvpair id="ms-drbd0_metaattr_clone_max" name="clone_max"
value="2"/>
             <nvpair id="ms-drbd0_metaattr_clone_node_max"
name="clone_node_max" value="1"/>
             <nvpair id="ms-drbd0_metaattr_master_max" name="master_max"
value="1"/>
             <nvpair id="ms-drbd0_metaattr_master_node_max"
name="master_node_max" value="1"/>
             <nvpair id="ms-drbd0_metaattr_notify" name="notify"
value="true"/>
             <nvpair id="ms-drbd0_metaattr_globally_unique"
name="globally_unique" value="false"/>
           </attributes>
         </meta_attributes>
         <primitive id="DRBD_drbd0" class="ocf" type="drbd"
provider="heartbeat">
           <instance_attributes id="DRBD_drbd0_instance_attrs">
             <attributes>
               <nvpair id="f0deba6f-9513-4e34-9f8d-b949fe898b2b"
name="drbd_resource" value="drbd0"/>
             </attributes>
           </instance_attributes>
           <operations>
             <op id="0ec050a8-2fc0-4f91-8ac6-52e8f12fd341"
name="monitor" description="op-drbd0-1" interval="59s" timeout="10s"
start_delay="1m" role="Master" disabled="false"/>
             <op id="2a7499c3-5078-41ce-bff2-afdfae78a584"
name="monitor" description="op-drbd0-2" interval="60s" timeout="10s"
start_delay="1m" role="Slave" disabled="false"/>
           </operations>
           <meta_attributes id="DRBD_drbd0:1_meta_attrs">
             <attributes>
               <nvpair id="DRBD_drbd0:0_metaattr_target_role"
name="target_role" value="started"/>
             </attributes>
           </meta_attributes>
         </primitive>
       </master_slave>
       <group id="PG_Group">
         <meta_attributes id="PG_Group_meta_attrs">
           <attributes>
             <nvpair name="target_role"
id="PG_Group_metaattr_target_role" value="started"/>
             <nvpair id="PG_Group_metaattr_ordered" name="ordered"
value="true"/>
             <nvpair id="PG_Group_metaattr_collocated" name="collocated"
value="true"/>
           </attributes>
         </meta_attributes>
         <primitive id="IP_100" class="ocf" type="IPaddr2"
provider="heartbeat">
           <instance_attributes id="IP_100_instance_attrs">
             <attributes>
               <nvpair id="9e6fbfde-bc59-48ff-b188-28c8f66f6bdd"
name="ip" value="192.168.1.100"/>
               <nvpair id="0a3c6f65-0572-480e-9d27-16c8be4ec018"
name="nic" value="eth0:1"/>
             </attributes>
           </instance_attributes>
           <meta_attributes id="IP_100_meta_attrs">
             <attributes>
               <nvpair id="IP_100_metaattr_target_role"
name="target_role" value="started"/>
             </attributes>
           </meta_attributes>
         </primitive>
         <primitive id="FS_drbd0" class="ocf" type="Filesystem"
provider="heartbeat">
           <instance_attributes id="FS_drbd0_instance_attrs">
             <attributes>
               <nvpair id="a13a908f-25b9-4e49-9446-368d4485ac59"
name="device" value="/dev/drbd0"/>
               <nvpair id="da3c0716-6678-48ce-a544-41dda7e8734e"
name="directory" value="/var/lib/pgsql"/>
               <nvpair id="95b5966c-cc3d-42e4-a884-3c6dec64c3ec"
name="fstype" value="ext3"/>
             </attributes>
           </instance_attributes>
           <meta_attributes id="FS_drbd0_meta_attrs">
             <attributes>
               <nvpair id="FS_drbd0_metaattr_target_role"
name="target_role" value="started"/>
             </attributes>
           </meta_attributes>
         </primitive>
         <primitive id="PG_service" class="lsb" type="postgresql"
provider="heartbeat">
           <meta_attributes id="PG_service_meta_attrs">
             <attributes>
               <nvpair id="PG_service_metaattr_target_role"
name="target_role" value="started"/>
             </attributes>
           </meta_attributes>
         </primitive>
       </group>
     </resources>
     <constraints>
       <rsc_location id="PG_location" rsc="PG_Group">
         <rule id="prefered_PG_location" score="100">
           <expression attribute="#uname"
id="3f381e7f-9d78-4744-b872-53974241612e" operation="eq"
value="pgcluster1.weir.lan"/>
         </rule>
       </rsc_location>
     </constraints>
   </configuration>
 </cib>



-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.9 (GNU/Linux)
Comment: Using GnuPG with Fedora - http://enigmail.mozdev.org

iEYEARECAAYFAknM35YACgkQMOHG1y4QtQIPSACgg2i9nfJN7+wlNU7nZEAY4xi4
PFEAnj481nI0WgndER20+Zr9bCqPJiVm
=st9O
-----END PGP SIGNATURE-----

smime.p7s
Description: S/MIME Cryptographic Signature

_______________________________________________
Linux-HA mailing list
[email protected]
http://lists.linux-ha.org/mailman/listinfo/linux-ha
See also: http://linux-ha.org/ReportingProblems

[Linux-HA] DRBD / HAr2

Reply via email to