Hi all,
I've had some trouble setting up Heartbeat V2 on a x86_64 Fedora Core 5 system.
I've
searched the mailing lists and linux-ha site but haven't been able to find a
solution
to my problem.
The problem I'm having is when both nodes are active, and I try shutting down
either
to test failover. The remaining node takes over the resources, but then
immediately
stops them again. I haven't seen any obvious errors in the logs that I haven't
seen
on other working i386 nodes in other clusters.
The only differences between the two versions of ha.cf are the ping and ucast
parameters
(ping = the locally attached switch, and peer = the other node).
ha.cf:
keepalive 1
deadtime 10
warntime 5
initdead 60
udpport 694
ping 192.168.42.XXX # network switch IP to identify local state
ucast eth1 192.168.42.BBB # peer IP
auto_failback off
watchdog /dev/watchdog
node nfsAAA # identify each node using output of 'uname -n'
node nfsBBB # identify each node using output of 'uname -n'
use_logd yes
crm yes
apiauth cibmon uid=hacluster
respawn hacluster /usr/lib64/heartbeat/cibmon -d
The cib.xml file was generated using the
/usr/lib64/heartbeat/haresources2cib.py script
based on the following:
nfsAAA drbddisk::drbd-resource-0 \
Filesystem::/dev/drbd0::/serve/spool0::ext3 \
portmap \
nfslock \
nfs \
IPaddr::10.20.30.AAA
Unfortunately, the cib.xml file below does not include the secondary <node
id=..> tag,
since the systems I'm trying to set up are active production systems and I
can't afford
to have both running in a clustered mode, as this will introduce two single
points of
failure if either one goes offline.
The only difference I was able to see between the version below and the version
generated
when both nodes were up and part of the same cluster (as verified with
crm_mon), was an
extra <node id...> tag.
When either of the nodes is taken down to test failover, the have_quorum
attribute is "false"
and resources are shut down on the remaining online node.
<cib admin_epoch="0" epoch="1" num_updates="26" have_quorum="true"
generated="true" ccm_transition="1" num_peers="1" cib_feature_revision="1.3"
dc_uuid="1dc5b2d3-7950-4ead-88f4-5fc026ce4bf3" cib-last-written="Mon Dec 10
02:24:40 2007">
<configuration>
<crm_config>
<cluster_property_set id="cib-bootstrap-options">
<attributes>
<nvpair id="cib-bootstrap-options-symmetric_cluster"
name="symmetric_cluster" value="true"/>
<nvpair id="cib-bootstrap-options-no_quorum_policy"
name="no_quorum_policy" value="stop"/>
<nvpair id="cib-bootstrap-options-default_resource_stickiness"
name="default_resource_stickiness" value="0"/>
<nvpair
id="cib-bootstrap-options-default_resource_failure_stickiness"
name="default_resource_failure_stickiness" value="0"/>
<nvpair id="cib-bootstrap-options-stonith_enabled"
name="stonith_enabled" value="false"/>
<nvpair id="cib-bootstrap-options-stonith_action"
name="stonith_action" value="reboot"/>
<nvpair id="cib-bootstrap-options-stop_orphan_resources"
name="stop_orphan_resources" value="true"/>
<nvpair id="cib-bootstrap-options-stop_orphan_actions"
name="stop_orphan_actions" value="true"/>
<nvpair id="cib-bootstrap-options-remove_after_stop"
name="remove_after_stop" value="false"/>
<nvpair id="cib-bootstrap-options-short_resource_names"
name="short_resource_names" value="true"/>
<nvpair id="cib-bootstrap-options-transition_idle_timeout"
name="transition_idle_timeout" value="5min"/>
<nvpair id="cib-bootstrap-options-default_action_timeout"
name="default_action_timeout" value="5s"/>
<nvpair id="cib-bootstrap-options-is_managed_default"
name="is_managed_default" value="true"/>
</attributes>
</cluster_property_set>
</crm_config>
<nodes>
<node id="1dc5b2d3-7950-4ead-88f4-5fc026ce4bf3" uname="nfsAAA"
type="normal"/>
</nodes>
<resources>
<group id="group_1">
<primitive class="heartbeat" id="drbddisk_1" provider="heartbeat"
type="drbddisk">
<operations>
<op id="drbddisk_1_mon" interval="120s" name="monitor"
timeout="60s"/>
</operations>
<instance_attributes id="drbddisk_1_inst_attr">
<attributes>
<nvpair id="drbddisk_1_attr_1" name="1" value="drbd-resource-0"/>
</attributes>
</instance_attributes>
</primitive>
<primitive class="ocf" id="Filesystem_2" provider="heartbeat"
type="Filesystem">
<operations>
<op id="Filesystem_2_mon" interval="120s" name="monitor"
timeout="60s"/>
</operations>
<instance_attributes id="Filesystem_2_inst_attr">
<attributes>
<nvpair id="Filesystem_2_attr_0" name="device"
value="/dev/drbd0"/>
<nvpair id="Filesystem_2_attr_1" name="directory"
value="/serve/spool0"/>
<nvpair id="Filesystem_2_attr_2" name="fstype" value="ext3"/>
</attributes>
</instance_attributes>
</primitive>
<primitive class="lsb" id="portmap_3" provider="heartbeat"
type="portmap">
<operations>
<op id="portmap_3_mon" interval="120s" name="monitor"
timeout="60s"/>
</operations>
</primitive>
<primitive class="lsb" id="nfslock_4" provider="heartbeat"
type="nfslock">
<operations>
<op id="nfslock_4_mon" interval="120s" name="monitor"
timeout="60s"/>
</operations>
</primitive>
<primitive class="lsb" id="nfs_5" provider="heartbeat" type="nfs">
<operations>
<op id="nfs_5_mon" interval="120s" name="monitor" timeout="60s"/>
</operations>
</primitive>
<primitive class="ocf" id="IPaddr_10_20_30_AAA" provider="heartbeat"
type="IPaddr">
<operations>
<op id="IPaddr_10_20_30_AAA_mon" interval="5s" name="monitor"
timeout="5s"/>
</operations>
<instance_attributes id="IPaddr_10_20_30_AAA_inst_attr">
<attributes>
<nvpair id="IPaddr_10_20_30_AAA_attr_0" name="ip"
value="10.20.30.AAA"/>
</attributes>
</instance_attributes>
</primitive>
</group>
</resources>
<constraints>
<rsc_location id="rsc_location_group_1" rsc="group_1">
<rule id="prefered_location_group_1" score="100">
<expression attribute="#uname" id="prefered_location_group_1_expr"
operation="eq" value="nfsAAA"/>
</rule>
</rsc_location>
</constraints>
</configuration>
</cib>
Note, the configuration generated from the haresources example above works fine
on other
i386 systems, just not on this x86_64 system.
The heartbeat version used is heartbeat-2.0.7-3.fc5 for x86_64.
Any help would be greatly appreciated.
Thanks in advance,
-Rolando
_______________________________________________
Linux-HA mailing list
[email protected]
http://lists.linux-ha.org/mailman/listinfo/linux-ha
See also: http://linux-ha.org/ReportingProblems