Hello,
I have 3 nodes cluster with 2 active and 1 passive nodes.
I have configured 2 resources groups containing all necessary file system,
ip address and vz-script.
My cluster works perfect if both of Active nodes fail there resources one at
a time to passive node but the issue which I am facing is when I try to fail
both of the active node at the same time one by one then I noticed that the
2nd active node which fails there resources after the 1st active node stop
the resources of 1st active node from passive node(which presently running
on passive node) and start its own resources over the passive node which
should not be there.
As I think the 2nd active node should stop there resources because passive
node is already occupied with the 1st active node resources.
Is that normal? And how I avoid not to stop the resources which already run
on passive node by the last active node which fails.
I am just explaining my scenario below, hope this will give better idea to
understand the exact issue is and also pasting my cib.xml file of my cluster
below.
The 3 mode are as:
1) node_master ---> Active node
2) node_slave ---> Passive node
3) node3 ---------> Active node
Problem:
If node_master fail there resources on node_slave then there isn't any
problem for that but at the same time if node3 fail there resources then
node_master resources stops on node_slave and node3 resources starts over
node_slave node. (Problematic scenario)
But whereas,
If node3 fail there resources on node_slave then there isn't any problem and
at the same time if node_master fail there resources on node_slave then
node3 fi
nds that node_slave already have occupied with resources and stop node3
resources. (Perfect scenario)
Any help will be highly appreciated.
The details for the my scenerio are as:
1) I am running CentOS-5.3
2) Kernel version is 2.6.18-028stab059.6 (Virtoozzo Kernel)
3) Heartbeat version- 2.1.3-3.
4) Installation through RPM.
5) Using Heartbeat version 2.
6) cat /etc/ha.d/ha.cf
___________________________________________________________________________________________
deadtime 10
bcast eth1
crm yes
node node_master
node node_slave
node node3
debugfile /var/log/ha-debug
logfile /var/log/ha-log
logfacility local0
_____________________________________________________________________________________________
This file is same in all the 3 nodes.
7) cib.xml
-------------------------------------------------------------------------------------------------------
<cib admin_epoch="0" have_quorum="true" ignore_dtd="false" num_peers="3"
cib_feature_revision="2.0" crm_feature_set="2.0" ccm_transition="3"
generated="true" dc_uuid="d90b1ed2-0000-44ac-9a4d-b435a6befd36" epoch="291"
num_updates="2" cib-last-written="Mon Jan 4 19:52:33 2010">
<configuration>
<crm_config>
<cluster_property_set id="cib-bootstrap-options">
<attributes>
<nvpair id="cib-bootstrap-options-dc-version" name="dc-version"
value="2.1.3-node: 552305612591183b1628baa5bc6e903e0f1e26a3"/>
<nvpair id="cib-bootstrap-options-symmetric-cluster"
name="symmetric-cluster" value="false"/>
<nvpair name="last-lrm-refresh"
id="cib-bootstrap-options-last-lrm-refresh" value="1262555178"/>
</attributes>
</cluster_property_set>
</crm_config>
<nodes>
<node uname="node3" type="normal" id="7e5fdac9-80dc-41a7-bd8f-a5591a1b69a0">
<instance_attributes
id="nodes-7e5fdac9-80dc-41a7-bd8f-a5591a1b69a0">
<attributes>
<nvpair name="standby"
id="standby-7e5fdac9-80dc-41a7-bd8f-a5591a1b69a0" value="on"/>
</attributes>
</instance_attributes>
</node>
<node uname="node_slave" type="normal"
id="d90b1ed2-0000-44ac-9a4d-b435a6befd36">
<instance_attributes
id="nodes-d90b1ed2-0000-44ac-9a4d-b435a6befd36">
<attributes>
<nvpair name="standby"
id="standby-d90b1ed2-0000-44ac-9a4d-b435a6befd36" value="off"/>
</attributes>
</instance_attributes>
</node>
<node uname="node_master" type="normal"
id="075961d1-4492-4ba9-b4ad-e8c27b9e3f4b">
<instance_attributes
id="nodes-075961d1-4492-4ba9-b4ad-e8c27b9e3f4b">
<attributes>
<nvpair name="standby"
id="standby-075961d1-4492-4ba9-b4ad-e8c27b9e3f4b" value="on"/>
</attributes>
</instance_attributes>
</node>
</nodes>
<resources>
<group id="group_vz_1">
<meta_attributes id="group_vz_1_meta_attrs">
<attributes>
<nvpair name="target_role" id="group_vz_1_metaattr_target_role"
value="started"/>
<nvpair id="group_vz_1_metaattr_ordered" name="ordered"
value="true"/>
<nvpair id="group_vz_1_metaattr_collocated" name="collocated"
value="true"/>
<nvpair id="group_vz_1_metaattr_resource_stickiness"
name="resource_stickiness" value="900"/>
</attributes>
</meta_attributes>
<primitive id="resource_ipaddr" class="ocf" type="IPaddr"
provider="heartbeat">
<instance_attributes id="resource_ipaddr_instance_attrs">
<attributes>
<nvpair id="69fd1897-4ec1-4bdb-a33f-1d2bf1eda0da" name="ip"
value="66.199.245.207"/>
<nvpair id="177a2461-1b7a-4875-a866-916ed730b396" name="nic" value="eth0"/>
<nvpair id="347bf250-d25f-41df-bb7f-36f273c8029c"
name="cidr_netmask" value="255.255.255.224"/>
</attributes>
</instance_attributes>
<operations/>
</primitive>
<primitive id="resource_filesystem" class="ocf" type="Filesystem"
provider="heartbeat">
<instance_attributes id="resource_filesystem_instance_attrs">
<attributes>
<nvpair id="11aaf508-8f1f-4a9d-adde-2ff7e6a82740"
name="device"
value="/dev/disk/by-uuid/f5feb406-685a-41f8-a4f7-170ae0925901"/>
<nvpair id="517a54cf-70cd-46bf-8ae6-0953d3617599"
name="directory" value="/vz"/>
<nvpair id="b0a7cb9d-d0be-45fe-afcb-2860745bc5d5"
name="fstype" value="ext3"/>
<nvpair id="06344316-a2c5-4ced-930f-5e151dfbe1e2"
name="options" value="_netdev,noatime"/>
</attributes>
</instance_attributes>
<operations/>
</primitive>
<primitive id="resource_vz_script_1" class="lsb" type="vz"
provider="heartbeat">
<operations>
<op id="88f60be0-19ac-4dd1-bbf3-0471a4c7bd03" name="monitor"
interval="15s" timeout="30s" start_delay="0s" on_fail="restart"/>
<op id="387958aa-3ede-46c0-b77c-495e6cd44192" name="stop"
timeout="200s"/>
</operations>
</primitive>
</group>
<group id="group_vz_2">
<meta_attributes id="group_vz_2_meta_attrs">
<attributes>
<nvpair name="target_role" id="group_vz_2_metaattr_target_role"
value="started"/>
<nvpair id="group_vz_2_metaattr_ordered" name="ordered"
value="true"/>
<nvpair id="group_vz_2_metaattr_collocated" name="collocated"
value="true"/>
<nvpair name="resource_stickiness"
id="group_vz_2_metaattr_resource_stickiness" value="900"/>
</attributes>
</meta_attributes>
<primitive id="resource_ipaddr_2" class="ocf" type="IPaddr"
provider="heartbeat">
<instance_attributes id="resource_ipaddr_2_instance_attrs">
<attributes>
<nvpair id="d7945234-539b-4f86-9fd7-93501d1ff590" name="ip"
value="66.199.245.204"/>
<nvpair id="c18f6b41-2798-42be-a5cb-470963ad3559" name="nic"
value="eth0"/>
<nvpair id="3112ab9f-d0e3-413e-84cd-76ecd39f51e7"
name="cidr_netmask" value="255.255.255.224"/>
</attributes>
</instance_attributes>
</primitive>
<primitive id="resource_filesystem_2" class="ocf" type="Filesystem"
provider="heartbeat">
<instance_attributes id="resource_filesystem_2_instance_attrs">
<attributes>
<nvpair id="db553cf1-1419-42d0-9a0b-caf30d2862a5"
name="device"
value="/dev/disk/by-uuid/81c3845e-c2f6-4cb0-a0cd-e00c074942fb"/>
<nvpair id="4f375334-b48f-4537-8351-ab5ae02eb351"
name="directory" value="/vz"/>
<nvpair id="b1b5608e-0248-4a85-935d-b9957cdc044e"
name="fstype" value="ext3"/>
<nvpair id="8a147356-405d-46dd-81ac-2cfa1401a988"
name="options" value="_netdev,noatime"/>
</attributes>
</instance_attributes>
</primitive>
<primitive id="resource_vz_script_2" class="lsb" type="vz"
provider="heartbeat">
<operations>
<op id="be25b095-6032-4890-b665-24b1860e3ab9" name="monitor"
interval="15s" timeout="30s" start_delay="0s" on_fail="restart"/>
<op id="d715bd3a-851e-41cf-a836-93a3cfaa6a78" name="stop"
timeout="200s"/>
</operations>
</primitive>
</group>
</resources>
<constraints>
<rsc_location id="location_slave_1" rsc="group_vz_1">
<rule id="prefered_location_slave_1" score="200">
<expression attribute="#uname"
id="fd0fb0b6-3795-426d-966e-9da5fd24ff1a" operation="eq"
value="node_slave"/>
</rule>
</rsc_location>
<rsc_location id="location_node3" rsc="group_vz_2">
<rule id="prefered_location_node3" score="600">
<expression attribute="#uname"
id="72aaa5f0-c2f0-4038-afe3-4ce7cacb4acf" operation="eq" value="node3"/>
</rule>
</rsc_location>
<rsc_location id="location_slave_2" rsc="group_vz_2">
<rule id="prefered_location_slave_2" score="200">
<expression attribute="#uname"
id="6cc8bf2f-9f52-4099-8b9e-85483973124c" operation="eq"
value="node_slave"/>
</rule>
</rsc_location>
<rsc_location id="location_master" rsc="group_vz_1">
<rule id="prefered_location_master" score="600">
<expression attribute="#uname"
id="2f8bfb3f-a6f7-4791-97d8-96d673e42ba6" operation="eq"
value="node_master"/>
</rule>
</rsc_location>
<rsc_colocation id="colocation_vz" from="group_vz_1" to="group_vz_2"
score="-INFINITY"/>
</constraints>
</configuration>
</cib>
Cheer's,
Jaspal
_______________________________________________
Linux-HA mailing list
[email protected]
http://lists.linux-ha.org/mailman/listinfo/linux-ha
See also: http://linux-ha.org/ReportingProblems