Hi list-denizens.

I think I've finally gotten my head around CRM/CIB. The whole deal of cluster resources, then ordering and co-location all makes sense now. What I'm having trouble with is trying to figure out what it is in my CIB that I've gotten wrong, such that my cluster fails to come up. Let me lay out what I have:

The cluster has two nods, nfs1 and nfs2. Both are running Ubuntu Dapper Drake, Heartbeat 2.0.2 and DRBD 0.7. nfs1 is the preference for being primary, but in the event of a failover, we require that a human intervene manually to fail back over from nfs2 to nfs1. I'll attach my ha.cf and cib.xml files to the end of this email, for reference.

When I try to bring heartbeat up, everything appears to be going fine. Except that nfs1 (which I start first) doesn't take up the drbd resource, and so it doesn't mount the file system, so nfs-kernel- server doesn't start, so the IPaddr resource isn't initialised. I hope I'm explaining this clearly. But the logs (syslog, ha-log and ha-debug) show nothing that indicates what's actually going wrong with drbd and so causing the rest of the resources (which are order- dependant on drbd) to fail to start. Maybe someone out there can help me. This is the first heartbeat cluster I've ever created, and I'm trying to wrap my head around cib at the same time.

Hoping you can help,

Adrian Overbury
Inomial Pty Ltd
[EMAIL PROTECTED]

--

ha.cf:

use_logd on
udpport 694
keepalive 1
deadtime 10
initdead 80
bcast eth2
node nfs1 nfs2
crm yes
auto_failback no

--

cib.xml:

<cib admin_epoch="0" have_quorum="true" num_peers="1" origin="nfs1" last_written="Fri Jun 29 15:37:58 2007" debug_source="finalize_join" cib_feature_revision="1" generated="true" dc_uuid="81fdf3a0-b341-4927- b995-0b5cff5cbdb3" epoch="5" num_updates="96" ccm_transition="1">
   <configuration>
     <crm_config>
       <cluster_property_set id="cib-bootstrap-options">
         <attributes>
<nvpair id="cib-bootstrap-options-symmetric_cluster" name="symmetric_cluster" value="true"/> <nvpair id="cib-bootstrap-options-no_quorum_policy" name="no_quorum_policy" value="ignore"/> <nvpair id="cib-bootstrap-options- default_resource_stickiness" name="default_resource_stickiness" value="INFINITY"/> <nvpair id="cib-bootstrap-options- default_resource_failure_stickiness" name="default_resource_failure_stickiness" value="0"/> <nvpair id="cib-bootstrap-options-stonith_enabled" name="stonith_enabled" value="false"/> <nvpair id="cib-bootstrap-options-stonith_action" name="stonith_action" value="reboot"/> <nvpair id="cib-bootstrap-options-stop_orphan_resources" name="stop_orphan_resources" value="true"/> <nvpair id="cib-bootstrap-options-stop_orphan_actions" name="stop_orphan_actions" value="true"/> <nvpair id="cib-bootstrap-options-remove_after_stop" name="remove_after_stop" value="false"/> <nvpair id="cib-bootstrap-options-short_resource_names" name="short_resource_names" value="true"/> <nvpair id="cib-bootstrap-options- transition_idle_timeout" name="transition_idle_timeout" value="5min"/> <nvpair id="cib-bootstrap-options-default_action_timeout" name="default_action_timeout" value="5s"/> <nvpair id="cib-bootstrap-options-is_managed_default" name="is_managed_default" value="true"/>
         </attributes>
       </cluster_property_set>
     </crm_config>
     <nodes>
<node id="81fdf3a0-b341-4927-b995-0b5cff5cbdb3" uname="nfs1" type="member"/> <node id="f528af1c-ac48-49f1-9b1c-3812732be79c" uname="nfs2" type="member"/>
     </nodes>
     <resources>
<primitive class="ocf" id="drbd_r0" provider="heartbeat" type="drbd">
         <operations>
<op id="drbd_r0_mon" interval="5s" name="monitor" timeout="5s"/>
         </operations>
         <meta_attributes id="drbd_r0_ma">
           <attributes>
             <nvpair id="ma-ms-drbd0-1" name="clone_max" value="2"/>
<nvpair id="ma-ms-drbd0-2" name="clone_node_max" value="1"/>
             <nvpair id="ma-ms-drbd0-3" name="master_max" value="1"/>
<nvpair id="ma-ms-drbd0-4" name="master_node_max" value="1"/>
             <nvpair id="ma-ms-drbd0-5" name="notify" value="yes"/>
<nvpair id="ma-ms-drbd0-6" name="globally_unique" value="false"/> <nvpair id="ma-ms-drbd0-7" name="target_role" value="started"/>
           </attributes>
         </meta_attributes>
         <instance_attributes id="drbd_r0_inst_attr">
           <attributes>
<nvpair id="drbd_r0_attr_0" name="drbd_resource" value="r0"/> <nvpair id="drbd_r0_attr_1" name="drbdconf" value="/etc/ drbd.conf"/>
           </attributes>
         </instance_attributes>
       </primitive>
<primitive class="ocf" id="Filesystem_nfs" provider="heartbeat" type="Filesystem">
         <operations>
<op id="Filesystem_nfs_mon" interval="5s" name="monitor" timeout="5s"/>
         </operations>
         <meta_attributes id="Filesystem_nfs_ma">
           <nvpair name="target_role" id="ma-fs0-1" value="started"/>
         </meta_attributes>
         <instance_attributes id="Filesystem_nfs_inst_attr">
           <attributes>
<nvpair id="Filesystem_nfs_attr_0" name="device" value="/dev/drbd0"/> <nvpair id="Filesystem_nfs_attr_1" name="directory" value="/nfs"/> <nvpair id="Filesystem_nfs_attr_2" name="fstype" value="ext3"/>
           </attributes>
         </instance_attributes>
       </primitive>
       <primitive class="lsb" id="drbdlinks" name="drbdlinks">
         <operations>
<op id="drbdlinks_mon" interval="30s" name="monitor" timeout="60s"/>
         </operations>
       </primitive>
<primitive class="lsb" id="nfs_kernel_server" name="nfs- kernel-server">
         <operations>
<op id="nfs_kernel_server_mon" interval="30s" name="monitor" timeout="60s"/>
         </operations>
       </primitive>
<primitive class="ocf" id="IPaddr_10_0_0_254" provider="heartbeat" type="IPaddr">
         <operations>
<op id="IPaddr_10_0_0_254_mon" interval="5s" name="monitor" timeout="5s"/>
         </operations>
         <instance_attributes id="IPaddr_10_0_0_254_inst_attr">
           <attributes>
<nvpair id="IPaddr_10_0_0_254_attr_0" name="ip" value="10.0.0.254"/> <nvpair id="IPaddr_10_0_0_254_attr_1" name="netmask" value="8"/> <nvpair id="IPaddr_10_0_0_254_attr_2" name="nic" value="eth1"/>
           </attributes>
         </instance_attributes>
       </primitive>
     </resources>
     <constraints>
<rsc_order id="drbd-Filesystem" from="Filesystem_nfs" type="after" to="drbd_r0"/> <rsc_order id="Filesystem-drbdlinks" from="drbdlinks" type="after" to="Filesystem_nfs"/> <rsc_order id="Filesystem-nfs_kernel_server" from="nfs_kernel_server" type="after" to="Filesystem_nfs"/> <rsc_order id="nfs_kernel_server-IPaddr_10_0_0_254" from="IPaddr_10_0_0_254" type="after" to="nfs_kernel_server"/> <rsc_colocation id="colo_Filesystem" from="Filesystem_nfs" to="drbd_r0" score="INFINITY"/> <rsc_colocation id="colo_drbdlinks" from="drbdlinks" to="drbd_r0" score="INFINITY"/> <rsc_colocation id="colo_nfs_server" from="nfs_kernel_server" to="drbd_r0" score="INFINITY"/> <rsc_colocation id="colo_IPaddr" from="IPaddr_10_0_0_254" to="drbd_r0" score="INFINITY"/>
     </constraints>
   </configuration>
   <status>
<node_state id="81fdf3a0-b341-4927-b995-0b5cff5cbdb3" uname="nfs1" in_ccm="true" join="member" origin="do_lrm_query" crmd="online" ha="active" expected="member">
       <lrm>
         <lrm_resources>
<lrm_resource id="drbd_r0" rsc_state="running" op_status="0" rc_code="0" last_op="monitor"> <lrm_rsc_op id="drbd_r0_start_0" operation="start" op_status="0" call_id="2" rc_code="0" origin="do_update_resource" transition_key="0:e9e6fc20-15e3-4cfb-9798-a67a8c92fc2e" transition_magic="0:0:e9e6fc20-15e3-4cfb-9798-a67a8c92fc2e" rsc_state="running"/> <lrm_rsc_op id="drbd_r0_monitor_5000" operation="monitor" op_status="0" call_id="3" rc_code="0" origin="do_update_resource" transition_key="0:e9e6fc20-15e3-4cfb-9798- a67a8c92fc2e" transition_magic="0:0:e9e6fc20-15e3-4cfb-9798- a67a8c92fc2e" rsc_state="running"/>
           </lrm_resource>
<lrm_resource id="Filesystem_nfs" rsc_state="running" op_status="0" rc_code="0" last_op="monitor"> <lrm_rsc_op id="Filesystem_nfs_start_0" operation="start" op_status="0" call_id="4" rc_code="0" origin="do_update_resource" transition_key="0:e9e6fc20-15e3-4cfb-9798- a67a8c92fc2e" transition_magic="0:0:e9e6fc20-15e3-4cfb-9798- a67a8c92fc2e" rsc_state="running"/> <lrm_rsc_op id="Filesystem_nfs_monitor_5000" operation="monitor" op_status="0" call_id="5" rc_code="0" origin="do_update_resource" transition_key="0:e9e6fc20-15e3-4cfb-9798- a67a8c92fc2e" transition_magic="0:0:e9e6fc20-15e3-4cfb-9798- a67a8c92fc2e" rsc_state="running"/>
           </lrm_resource>
<lrm_resource id="drbdlinks" rsc_state="starting" op_status="-1" rc_code="-1" last_op="start"> <lrm_rsc_op id="drbdlinks_start_0" operation="start" op_status="-1" call_id="-1" rc_code="-1" origin="cib_action_update" transition_key="4:e9e6fc20-15e3-4cfb-9798-a67a8c92fc2e" transition_magic="-1:4:e9e6fc20-15e3-4cfb-9798-a67a8c92fc2e"/>
           </lrm_resource>
<lrm_resource id="nfs_kernel_server" rsc_state="starting" op_status="-1" rc_code="-1" last_op="start"> <lrm_rsc_op id="nfs_kernel_server_start_0" operation="start" op_status="-1" call_id="-1" rc_code="-1" origin="cib_action_update" transition_key="4:e9e6fc20-15e3-4cfb-9798- a67a8c92fc2e" transition_magic="-1:4:e9e6fc20-15e3-4cfb-9798- a67a8c92fc2e"/>
           </lrm_resource>
         </lrm_resources>
       </lrm>
     </node_state>
   </status>
</cib>
_______________________________________________
Linux-HA mailing list
[email protected]
http://lists.linux-ha.org/mailman/listinfo/linux-ha
See also: http://linux-ha.org/ReportingProblems

Reply via email to