[Linux-HA] Prevent failback until colocated resource running

Daniel DeFreez Wed, 10 Dec 2008 16:16:45 -0800

Hello all,

I've been playing with heartbeat 2 for a few weeks and have a goodhandle on most of the basics, but I am having trouble with some of thesubtleties. I'm trying to prevent a resource from from returning to itspreferred node until resources that are co-located with it haveSUCCESSFULLY started - it appears to be returning during the process ofstarting these resources.

I have a two-node cluster with a clone filesystem resource (OCFS2) and/two/ ip addresses. The two ips weakly prefer to run on separate nodeson the cluster (for DNS round-robin purposes). I have set a colocationconstraint requiring that the ip resource only be allowed to run wherean instance the ocfs2 clone is (eventually this will be a web cluster).

When, say, node2 goes down, the ip address correctly fails over tonode1. However, when I bring node 2 back up OCFS often fails to mountwithout rebooting (a separate issue). The problem is that the ip addresswill temporarily failback to its preferred location for about 10-15seconds while ocfs is trying to mount, then will return to its lesspreferred node when ocfs actually fails.

Is there some way to prevent this behavior without setting stickinesssuch that I have to manually failback the ip? I want the ip to return tonode2, but only AFTER any colocated resources have successfully started.

I tried using an order constraint, but that just had the effect ofstopping the ip resource while ocfs was attempting to mount.


Thanks in advance!

Here is my cib:

<cib generated="true" admin_epoch="0" have_quorum="true"ignore_dtd="false" num_peers="2" cib_feature_revision="1.3"crm_feature_set="2.0" ccm_transition="83"dc_uuid="b2bef128-5b51-4a72-a807-48297385a8a6" epoch="383" num_updates="39">

  <configuration>
    <crm_config>
      <cluster_property_set id="cib-bootstrap-options">
        <attributes>

<nvpair id="cib-bootstrap-options-dc-version"name="dc-version" value="2.1.3-node:a3184d5240c6e7032aef9cce6e5b7752ded544b3"/><nvpairid="cib-bootstrap-options-default-resource-stickiness"name="default-resource-stickiness" value="1"/>

        </attributes>
      </cluster_property_set>
      <cluster_property_set id="cibbootstrap">
        <attributes>

<nvpair id="cibbootstrap-01" name="transition_idle_timeout"value="60"/><nvpair id="cibbootstrap-02"name="default_resource_stickiness" value="1"/><nvpair id="cibbootstrap-03"name="default_resource_failure_stickiness" value="-500"/><nvpair id="cibbootstrap-04" name="stonith_enabled"value="false"/><nvpair id="cibbootstrap-05" name="stonith_action"value="reboot"/><nvpair id="cibbootstrap-06" name="symmetric_cluster"value="true"/><nvpair id="cibbootstrap-07" name="no_quorum_policy"value="stop"/><nvpair id="cibbootstrap-08" name="stop_orphan_resources"value="true"/><nvpair id="cibbootstrap-09" name="stop_orphan_actions"value="true"/><nvpair id="cibbootstrap-10" name="is_managed_default"value="true"/>

        </attributes>
      </cluster_property_set>
    </crm_config>
    <nodes>

<node id="c5583448-853e-419c-bca9-eb52e8b64c79" uname="atweb01v"type="normal"/><node id="b2bef128-5b51-4a72-a807-48297385a8a6" uname="atweb02v"type="normal"/>

    </nodes>
    <resources>
      <clone id="ClusterStorage" notify="true" globally_unique="false">
        <instance_attributes id="ClusterStorage">
          <attributes>

<nvpair id="ClusterStorage-01" name="clone_node_max"value="1"/><nvpair id="ClusterStorage-02" name="target_role"value="started"/>

          </attributes>
        </instance_attributes>

<primitive id="OCFS2" class="ocf" type="Filesystem"provider="heartbeat">

          <operations>

<op name="monitor" interval="20s" timeout="60s"prereq="nothing" id="OCFS2-op-01"/>

          </operations>
          <instance_attributes id="OCFS2">
            <attributes>
              <nvpair id="OCFS2-01" name="device" value="/dev/sdb"/>
              <nvpair id="OCFS2-02" name="directory" value="/mnt/ocfs2"/>
              <nvpair id="OCFS2-03" name="fstype" value="ocfs2"/>

<nvpair id="OCFS2:1_target_role" name="target_role"value="started"/>

            </attributes>
          </instance_attributes>
        </primitive>
      </clone>
      <primitive id="ip1" class="ocf" type="IPaddr2" provider="heartbeat">
        <instance_attributes id="0f3f4fc2-354f-42c4-aca9-adfe2a4392a6">
          <attributes>

<nvpair name="ip" value="140.211.89.16"id="c68ae67b-7860-4309-a478-2c705ab2366b"/><nvpair name="nic" value="eth0"id="f6db2b92-492e-442e-b084-68cc97ab7b46"/><nvpair name="cidr_netmask" value="24"id="cc49f493-8e8c-48f0-bfc5-a24af7071b2c"/>

          </attributes>
        </instance_attributes>
        <operations>

<op id="97f01f09-5322-40c1-a719-bf8b02c312fa" name="monitor"interval="10s" timeout="20s" start_delay="5s" on_fail="restart"/>

        </operations>
      </primitive>
      <primitive id="ip2" class="ocf" type="IPaddr2" provider="heartbeat">
        <instance_attributes id="74652121-2b53-4d3d-901e-28a2d898a7bc">
          <attributes>

<nvpair name="ip" value="140.211.89.17"id="bda42825-7e28-4318-99a0-197d2f5216f0"/><nvpair name="nic" value="eth0"id="55cb0faa-c05f-4a52-b1c2-fdfffb09cb12"/><nvpair name="cidr_netmask" value="24"id="74420629-9086-4967-8a29-63b9dfde94a1"/>

          </attributes>
        </instance_attributes>
        <operations>

<op id="47e5eaa3-9472-42cc-942a-f9f1d80f3443" name="monitor"interval="10s" timeout="20s" start_delay="5s"/>

        </operations>
      </primitive>
    </resources>
    <constraints>

<rsc_colocation id="colo_ip1" from="ip1" to="ClusterStorage"score="INFINITY"/><rsc_colocation id="colo_ip2" from="ip2" to="ClusterStorage"score="INFINITY"/>

      <rsc_location id="loc_ip1" rsc="ip1">
        <rule id="pref_ip1" score="100">

<expression attribute="#uname" operation="eq"value="atweb01v" id="88ce033e-90fc-4e46-959b-15d198ba0273"/>

        </rule>
      </rsc_location>
      <rsc_location id="loc_ip2" rsc="ip2">
        <rule id="pref_ip2" score="100">

<expression attribute="#uname" operation="eq"value="atweb02v" id="876c2018-842a-41c7-8d5e-b16a66c60ea7"/>

        </rule>
      </rsc_location>
    </constraints>
  </configuration>




_______________________________________________
Linux-HA mailing list
[email protected]
http://lists.linux-ha.org/mailman/listinfo/linux-ha
See also: http://linux-ha.org/ReportingProblems

[Linux-HA] Prevent failback until colocated resource running

Reply via email to