Thomas Glanzmann wrote:
Tim,
put them in a ressource group. A resource group, does, order colocation
and stops all implictly.

        <group id="whatever">
        ...
        </group>

The resource group is what's not working. If the second resource in a resource group can't start, the first one is stopped, then restarted, and you end up with one resource running, one not. I've attached a CIB that demonstrates this problem, along with a fake RA to simulate the resource that can't start (put this in /usr/lib/ocf/resource.d/heartbeat).

Here's what I've got, in summary:

  group-test-a (resource group)
    group-test-a-res-1 (Dummy RA)
    group-test-a-res-2 (Dummy-that-cant-start RA)

  group-test-b-res-1 (Dummy RA)
  group-test-b-res-2 (Dummy-that-cant-start RA)


group-test-a ends up with group-test-a-res-1 started, group-test-a-res-2 stopped.

group-test-b-res-1 and group-test-b-res-2 are bound together with colocation and ordering constraints and behave correctly, i.e. they both end up stopped.

Tim
 <cib admin_epoch="0" have_quorum="true" ignore_dtd="false" num_peers="2" cib_feature_revision="2.0" generated="true" ccm_transition="2" dc_uuid="2f7536f4-2d86-49e6-97a8-422e627e8a67" epoch="1198" num_updates="1" cib-last-written="Mon May 12 17:55:38 2008">
   <configuration>
     <crm_config>
       <cluster_property_set id="cib-bootstrap-options">
         <attributes>
           <nvpair id="cib-bootstrap-options-dc-version" name="dc-version" value="2.1.3-node: 552305612591183b1628baa5bc6e903e0f1e26a3"/>
           <nvpair name="last-lrm-refresh" id="cib-bootstrap-options-last-lrm-refresh" value="1210578903"/>
         </attributes>
       </cluster_property_set>
     </crm_config>
     <nodes>
       <node id="2f7536f4-2d86-49e6-97a8-422e627e8a67" uname="node-1" type="normal">
         <instance_attributes id="nodes-2f7536f4-2d86-49e6-97a8-422e627e8a67">
           <attributes/>
         </instance_attributes>
       </node>
       <node id="3ec6cfbd-8e1e-4a70-abdf-c3dc660f83a8" uname="node-0" type="normal">
         <instance_attributes id="nodes-3ec6cfbd-8e1e-4a70-abdf-c3dc660f83a8">
           <attributes/>
         </instance_attributes>
       </node>
     </nodes>
     <resources>
       <group id="group-test-a">
         <primitive id="group-test-a-res-1" class="ocf" provider="heartbeat" type="Dummy">
           <operations>
             <op name="start" timeout="60s" prereq="fencing" on_fail="restart" id="27369cbb-95cc-413f-a9c6-460a8e7cd2d7"/>
             <op name="stop" timeout="30s" on_fail="fence" id="f1f638a5-fa60-4472-9667-279fbac84822"/>
             <op name="monitor" timeout="10s" interval="20s" on_fail="restart" id="1df1b7a7-d3f5-4eba-9902-56d40cb83a02"/>
           </operations>
         </primitive>
         <primitive id="group-test-a-res-2" class="ocf" provider="heartbeat" type="Dummy-that-cant-start">
           <operations>
             <op name="start" timeout="60s" prereq="fencing" on_fail="restart" id="f124611e-bf74-4509-a0d5-dfd31380d898"/>
             <op name="stop" timeout="30s" on_fail="fence" id="5f169e2d-dfc0-4a36-9edd-c0365f02dfef"/>
             <op name="monitor" timeout="10s" interval="20s" on_fail="restart" id="b957979e-f1de-43cf-8046-5038a08d04a3"/>
           </operations>
         </primitive>
       </group>
       <primitive id="group-test-b-res-1" class="ocf" provider="heartbeat" type="Dummy">
         <operations>
           <op name="start" timeout="60s" prereq="fencing" on_fail="restart" id="a891fde4-d189-45a8-89b6-124a55777104"/>
           <op name="stop" timeout="30s" on_fail="fence" id="ca59505c-30fc-4d30-9cf5-11b93727f4df"/>
           <op name="monitor" timeout="10s" interval="20s" on_fail="restart" id="81e4b8d3-8d1f-45c0-a538-824d1c64e4fc"/>
         </operations>
       </primitive>
       <primitive id="group-test-b-res-2" class="ocf" provider="heartbeat" type="Dummy-that-cant-start">
         <operations>
           <op name="start" timeout="60s" prereq="fencing" on_fail="restart" id="1c286997-8a21-46ab-8f76-0015526f3c89"/>
           <op name="stop" timeout="30s" on_fail="fence" id="cacc40c1-49aa-4aa7-a0c5-df3bcbded17a"/>
           <op name="monitor" timeout="10s" interval="20s" on_fail="restart" id="6a642e97-92a6-4541-b385-489023ef8e5c"/>
         </operations>
       </primitive>
     </resources>
     <constraints>
       <rsc_location id="group-test-a-cl" rsc="group-test-a">
         <rule score="200" id="8553a995-61a2-4864-a77b-e8fc72329c5e">
           <expression attribute="#uname" operation="eq" value="node-0" id="cb9ad918-720a-49df-a585-1d8c34230c08"/>
         </rule>
         <rule score="190" id="871f9f91-84ae-4b65-a54d-b01371cd8915">
           <expression attribute="#uname" operation="eq" value="node-1" id="79b26f69-183e-401c-81fc-a4ef09a1bb12"/>
         </rule>
       </rsc_location>
       <rsc_colocation id="group-test-b-cc1" from="group-test-b-res-1" to="group-test-b-res-2" score="INFINITY"/>
       <rsc_order id="group-test-b-co1" from="group-test-b-res-1" action="start" type="before" to="group-test-b-res-2"/>
       <rsc_location id="group-test-b-cl-1" rsc="group-test-b-res-1">
         <rule score="1" id="ecd91c37-5f9d-4ef8-bb26-42d5c20da484">
           <expression attribute="#uname" operation="eq" value="node-0" id="d2f5b233-9a81-497c-bfa0-77d6efc46434"/>
         </rule>
         <rule score="1" id="ad5a93cd-1674-4222-80c2-b2a36faa85cd">
           <expression attribute="#uname" operation="eq" value="node-1" id="12d310fe-9c9c-4858-ae2b-79de65ccdd6c"/>
         </rule>
       </rsc_location>
       <rsc_location id="group-test-b-cl-2" rsc="group-test-b-res-2">
         <rule score="1" id="f4d5d56e-a323-4326-9f50-ff77c8ab66b6">
           <expression attribute="#uname" operation="eq" value="node-0" id="dd837ee7-dbbf-43b8-b684-83b46d038a69"/>
         </rule>
         <rule score="1" id="2bc6a523-a679-4087-9324-0b4972d371d7">
           <expression attribute="#uname" operation="eq" value="node-1" id="accc5b8e-bdda-47fc-9e7d-f5a718a25104"/>
         </rule>
       </rsc_location>
     </constraints>
   </configuration>
 </cib>
#!/bin/bash

case $1
in
        start)
                exit 1
                ;;
        stop)
                exit 0
                ;;
        monitor)
                exit 7
                ;;
        *)
                exit 1
esac

_______________________________________________
Linux-HA mailing list
[email protected]
http://lists.linux-ha.org/mailman/listinfo/linux-ha
See also: http://linux-ha.org/ReportingProblems

Reply via email to