Re: [Linux-HA] 2.0.7 Failover Behavior Question - FIXED

Andrew Beekhof Thu, 29 Mar 2007 00:10:59 -0800

On 3/28/07, Mohler, Eric (EMOHLER) <[EMAIL PROTECTED]> wrote:

> Hi there,
>
Sorry for last post - hopefully this better captures the timing issues
I'm trying to get across.


> I have a question regarding failover behavior of 2.0.7.
>
> We're running just 2 nodes on a single LAN and no crossover cable.
>
> We're running with 2.0.7 (R2). Our project has locked our release to
> 2.0.7. There is no possibility to upgrade to 2.0.8 until it is
> sechuled for a subsequent release.
>
>
> I am able to get the following behavior with this (below) config in
> the event of an ***application failure***:
> default_resource_stickiness value="INFINITY"
> default_resource_failure_stickiness value="-INFINITY"
>
>
BOX1 Applications             BOX2 Applications
ON                            OFF
ON                            OFF
ON                            OFF
(App Failure)
ON                            OFF
ON                            OFF
ON                            OFF
(App Failure)
OFF                           ON
OFF                           ON
OFF                           ON
(App Failure)
ON                            OFF
ON                            OFF
ON                            OFF
(App Failure)
ON                            OFF
ON                            OFF
ON                            OFF
(App Failure)
OFF                           ON
OFF                           ON
OFF                           ON
Etc...                          Etc...


> I am able to get the following behavior with this (below) config in
> the event of an ***application failure***:
> default_resource_stickiness value="INFINITY"
> default_resource_failure_stickiness value="0"
>
>
BOX1 Applications             BOX2 Applications
ON                            OFF
ON                            OFF
ON                            OFF
(App Failure)
ON                            OFF
ON                            OFF
ON                            OFF
(App Failure)
ON                            OFF
ON                            OFF
ON                            OFF
(App Failure)
ON                            OFF
ON                            OFF
ON                            OFF
Etc...                        Etc...



> BUT can you tell me how to get this behavior in the event of an
> ***application failure*** ????????????????????
> default_resource_stickiness value               ="???"
> default_resource_failure_stickiness value       ="???"
> Other parameter which I don't know how to tweak ="???"


i assume "ON" and "OFF" refer to the resource state?

try:
rsc_location(your_resource, BOX1, 10000)
rsc_location(your_resource, BOX2, 10000)
default_resource_failure_stickiness = 100
default_resource_stickiness = 10

that should let it ping-pong (due to failures) between your nodes 200
times before we'll give up

>
>
BOX1 Applications             BOX2 Applications
ON                            OFF
ON                            OFF
ON                            OFF
(App Failure)
OFF                           ON
OFF                           ON
OFF                           ON
(App Failure)
ON                            OFF
ON                            OFF
ON                            OFF
(App Failure)
OFF                           ON
OFF                           ON
OFF                           ON
Etc...                        Etc...




> ****************************************************************
>
> /etc/ha.d/ha.cf:
>
> logfacility   local0
> keepalive 2
> deadtime 20
> warntime 5
> initdead 20
> udpport       694
> bcast eth0            # Linux
> auto_failback off
> ping 172.18.3.2
> respawn hacluster /usr/lib64/heartbeat/ipfail
> apiauth ipfail gid=haclient uid=hacluster
> use_logd yes
> node ha-gmp1 ha-gmp2
> crm yes
> autojoin none
>
> /var/lib/heartbeat/crm/cib.xml:
>
>  <cib have_quorum="true" admin_epoch="0" generated="true"
> num_peers="2" cib_feature_revision="1.3" ccm_transition="2"
> dc_uuid="337eb755-7416-4172-9e2e-9e0d8bfa472d" epoch="25"
> num_updates="623" cib-last-written="Wed Mar 28 14:22:01 2007">
>    <configuration>
>      <crm_config>
>        <cluster_property_set id="cib-bootstrap-options">
>          <attributes>
>            <nvpair id="cib-bootstrap-options-default_action_timeout"
> name="default_action_timeout" value="15s"/>
>            <nvpair id="cib-bootstrap-options-symmetric_cluster"
> name="symmetric_cluster" value="true"/>
>            <nvpair id="cib-bootstrap-options-stonith_action"
> name="stonith_action" value="reboot"/>
>            <nvpair id="cib-bootstrap-options-stop_orphan_resources"
> name="stop_orphan_resources" value="true"/>
>            <nvpair id="cib-bootstrap-options-stop_orphan_actions"
> name="stop_orphan_actions" value="true"/>
>            <nvpair id="cib-bootstrap-options-remove_after_stop"
> name="remove_after_stop" value="true"/>
>            <nvpair id="cib-bootstrap-options-is_managed_default"
> name="is_managed_default" value="true"/>
>            <nvpair id="cib-bootstrap-options-short_resource_names"
> name="short_resource_names" value="true"/>
>            <nvpair
> id="cib-bootstrap-options-default_resource_stickiness"
> name="default_resource_stickiness" value="INFINITY"/>
>            <nvpair
> id="cib-bootstrap-options-default_resource_failure_stickiness"
> name="default_resource_failure_stickiness" value="0"/>
>            <nvpair id="cib-bootstrap-options-startup_fencing"
> name="startup_fencing" value="true"/>
>            <nvpair name="stonith_enabled"
> id="cib-bootstrap-options-stonith_enabled" value="false"/>
>            <nvpair id="cib-bootstrap-options-no_quorum_policy"
> name="no_quorum_policy" value="ignore"/>
>            <nvpair id="cib-bootstrap-options-pe-input-series-max"
> name="pe-input-series-max" value="200"/>
>            <nvpair id="cib-bootstrap-options-pe-error-series-max"
> name="pe-error-series-max" value="-1"/>
>            <nvpair id="cib-bootstrap-options-pe-warn-series-max"
> name="pe-warn-series-max" value="400"/>
>            <nvpair name="last-lrm-refresh"
> id="cib-bootstrap-options-last-lrm-refresh" value="1175091717"/>
>          </attributes>
>        </cluster_property_set>
>      </crm_config>
>      <nodes>
>        <node id="bd0ed1e3-0544-4bab-ae96-954bbeb53331" uname="ha-gmp1"
> type="normal"/>
>        <node id="337eb755-7416-4172-9e2e-9e0d8bfa472d" uname="ha-gmp2"
> type="normal"/>
>      </nodes>
>      <resources>
>        <group id="IpGrp">
>          <primitive id="HaIp" class="ocf" type="IPaddr"
> provider="heartbeat">
>            <instance_attributes id="HaIpAttr">
>              <attributes>
>                <nvpair name="ip" value="172.18.3.6"
> id="9f63f992-1a98-4936-9bd2-4be8450e229e"/>
>              </attributes>
>            </instance_attributes>
>          </primitive>
>        </group>
>        <group id="SrGrp">
>          <primitive id="SrApp" class="ocf" type="srctrl"
> provider="ARINC">
>            <operations>
>              <op id="SrMon" name="monitor" interval="20s"
> timeout="30s"/>
>            </operations>
>            <instance_attributes id="SrAttr">
>              <attributes>
>                <nvpair id="SrTargetRole" name="target_role"
> value="started"/>
>              </attributes>
>            </instance_attributes>
>          </primitive>
>        </group>
>        <group id="CapGrp">
>          <primitive id="CapApp" class="ocf" type="capctrl"
> provider="ARINC">
>            <operations>
>              <op id="CapMon" name="monitor" interval="20s"
> timeout="30s"/>
>            </operations>
>            <instance_attributes id="CapAttr">
>              <attributes>
>                <nvpair id="CapTargetRole" name="target_role"
> value="started"/>
>              </attributes>
>            </instance_attributes>
>          </primitive>
>        </group>
>        <group id="SfcGrp">
>          <primitive id="SfcApp" class="ocf" type="sfcctrl"
> provider="ARINC">
>            <operations>
>              <op id="SfcMon" name="monitor" interval="20s"
> timeout="30s"/>
>            </operations>
>            <instance_attributes id="SfcAttr">
>              <attributes>
>                <nvpair id="SfcTargetRole" name="target_role"
> value="started"/>
>              </attributes>
>            </instance_attributes>
>          </primitive>
>        </group>
>      </resources>
>      <constraints>
>        <rsc_colocation id="HaGrpColocation" from="IpGrp" to="SrGrp"
> score="INFINITY"/>
>        <rsc_colocation id="App1GrpColocation" from="SrGrp" to="CapGrp"
> score="INFINITY"/>
>        <rsc_colocation id="App2GrpColocation" from="CapGrp"
> to="SfcGrp" score="INFINITY"/>
>      </constraints>
>    </configuration>
>  </cib>
_______________________________________________
Linux-HA mailing list
[email protected]
http://lists.linux-ha.org/mailman/listinfo/linux-ha
See also: http://linux-ha.org/ReportingProblems

_______________________________________________
Linux-HA mailing list
[email protected]
http://lists.linux-ha.org/mailman/listinfo/linux-ha
See also: http://linux-ha.org/ReportingProblems

Re: [Linux-HA] 2.0.7 Failover Behavior Question - **FIXED**

Reply via email to

Re: [Linux-HA] 2.0.7 Failover Behavior Question - FIXED