Thomas Glanzmann wrote:
Tim,
put them in a ressource group. A resource group, does, order colocation
and stops all implictly.
<group id="whatever">
...
</group>
The resource group is what's not working. If the second resource in a
resource group can't start, the first one is stopped, then restarted,
and you end up with one resource running, one not. I've attached a CIB
that demonstrates this problem, along with a fake RA to simulate the
resource that can't start (put this in /usr/lib/ocf/resource.d/heartbeat).
Here's what I've got, in summary:
group-test-a (resource group)
group-test-a-res-1 (Dummy RA)
group-test-a-res-2 (Dummy-that-cant-start RA)
group-test-b-res-1 (Dummy RA)
group-test-b-res-2 (Dummy-that-cant-start RA)
group-test-a ends up with group-test-a-res-1 started, group-test-a-res-2
stopped.
group-test-b-res-1 and group-test-b-res-2 are bound together with
colocation and ordering constraints and behave correctly, i.e. they both
end up stopped.
Tim
<cib admin_epoch="0" have_quorum="true" ignore_dtd="false" num_peers="2" cib_feature_revision="2.0" generated="true" ccm_transition="2" dc_uuid="2f7536f4-2d86-49e6-97a8-422e627e8a67" epoch="1198" num_updates="1" cib-last-written="Mon May 12 17:55:38 2008">
<configuration>
<crm_config>
<cluster_property_set id="cib-bootstrap-options">
<attributes>
<nvpair id="cib-bootstrap-options-dc-version" name="dc-version" value="2.1.3-node: 552305612591183b1628baa5bc6e903e0f1e26a3"/>
<nvpair name="last-lrm-refresh" id="cib-bootstrap-options-last-lrm-refresh" value="1210578903"/>
</attributes>
</cluster_property_set>
</crm_config>
<nodes>
<node id="2f7536f4-2d86-49e6-97a8-422e627e8a67" uname="node-1" type="normal">
<instance_attributes id="nodes-2f7536f4-2d86-49e6-97a8-422e627e8a67">
<attributes/>
</instance_attributes>
</node>
<node id="3ec6cfbd-8e1e-4a70-abdf-c3dc660f83a8" uname="node-0" type="normal">
<instance_attributes id="nodes-3ec6cfbd-8e1e-4a70-abdf-c3dc660f83a8">
<attributes/>
</instance_attributes>
</node>
</nodes>
<resources>
<group id="group-test-a">
<primitive id="group-test-a-res-1" class="ocf" provider="heartbeat" type="Dummy">
<operations>
<op name="start" timeout="60s" prereq="fencing" on_fail="restart" id="27369cbb-95cc-413f-a9c6-460a8e7cd2d7"/>
<op name="stop" timeout="30s" on_fail="fence" id="f1f638a5-fa60-4472-9667-279fbac84822"/>
<op name="monitor" timeout="10s" interval="20s" on_fail="restart" id="1df1b7a7-d3f5-4eba-9902-56d40cb83a02"/>
</operations>
</primitive>
<primitive id="group-test-a-res-2" class="ocf" provider="heartbeat" type="Dummy-that-cant-start">
<operations>
<op name="start" timeout="60s" prereq="fencing" on_fail="restart" id="f124611e-bf74-4509-a0d5-dfd31380d898"/>
<op name="stop" timeout="30s" on_fail="fence" id="5f169e2d-dfc0-4a36-9edd-c0365f02dfef"/>
<op name="monitor" timeout="10s" interval="20s" on_fail="restart" id="b957979e-f1de-43cf-8046-5038a08d04a3"/>
</operations>
</primitive>
</group>
<primitive id="group-test-b-res-1" class="ocf" provider="heartbeat" type="Dummy">
<operations>
<op name="start" timeout="60s" prereq="fencing" on_fail="restart" id="a891fde4-d189-45a8-89b6-124a55777104"/>
<op name="stop" timeout="30s" on_fail="fence" id="ca59505c-30fc-4d30-9cf5-11b93727f4df"/>
<op name="monitor" timeout="10s" interval="20s" on_fail="restart" id="81e4b8d3-8d1f-45c0-a538-824d1c64e4fc"/>
</operations>
</primitive>
<primitive id="group-test-b-res-2" class="ocf" provider="heartbeat" type="Dummy-that-cant-start">
<operations>
<op name="start" timeout="60s" prereq="fencing" on_fail="restart" id="1c286997-8a21-46ab-8f76-0015526f3c89"/>
<op name="stop" timeout="30s" on_fail="fence" id="cacc40c1-49aa-4aa7-a0c5-df3bcbded17a"/>
<op name="monitor" timeout="10s" interval="20s" on_fail="restart" id="6a642e97-92a6-4541-b385-489023ef8e5c"/>
</operations>
</primitive>
</resources>
<constraints>
<rsc_location id="group-test-a-cl" rsc="group-test-a">
<rule score="200" id="8553a995-61a2-4864-a77b-e8fc72329c5e">
<expression attribute="#uname" operation="eq" value="node-0" id="cb9ad918-720a-49df-a585-1d8c34230c08"/>
</rule>
<rule score="190" id="871f9f91-84ae-4b65-a54d-b01371cd8915">
<expression attribute="#uname" operation="eq" value="node-1" id="79b26f69-183e-401c-81fc-a4ef09a1bb12"/>
</rule>
</rsc_location>
<rsc_colocation id="group-test-b-cc1" from="group-test-b-res-1" to="group-test-b-res-2" score="INFINITY"/>
<rsc_order id="group-test-b-co1" from="group-test-b-res-1" action="start" type="before" to="group-test-b-res-2"/>
<rsc_location id="group-test-b-cl-1" rsc="group-test-b-res-1">
<rule score="1" id="ecd91c37-5f9d-4ef8-bb26-42d5c20da484">
<expression attribute="#uname" operation="eq" value="node-0" id="d2f5b233-9a81-497c-bfa0-77d6efc46434"/>
</rule>
<rule score="1" id="ad5a93cd-1674-4222-80c2-b2a36faa85cd">
<expression attribute="#uname" operation="eq" value="node-1" id="12d310fe-9c9c-4858-ae2b-79de65ccdd6c"/>
</rule>
</rsc_location>
<rsc_location id="group-test-b-cl-2" rsc="group-test-b-res-2">
<rule score="1" id="f4d5d56e-a323-4326-9f50-ff77c8ab66b6">
<expression attribute="#uname" operation="eq" value="node-0" id="dd837ee7-dbbf-43b8-b684-83b46d038a69"/>
</rule>
<rule score="1" id="2bc6a523-a679-4087-9324-0b4972d371d7">
<expression attribute="#uname" operation="eq" value="node-1" id="accc5b8e-bdda-47fc-9e7d-f5a718a25104"/>
</rule>
</rsc_location>
</constraints>
</configuration>
</cib>
#!/bin/bash
case $1
in
start)
exit 1
;;
stop)
exit 0
;;
monitor)
exit 7
;;
*)
exit 1
esac
_______________________________________________
Linux-HA mailing list
[email protected]
http://lists.linux-ha.org/mailman/listinfo/linux-ha
See also: http://linux-ha.org/ReportingProblems