I am having some trouble getting resources to failover in my 2.1.2 2-node
cluster.
I have attached my cib.
I created a number of resources and colocation rules. All of the resources
need to run on the same node that has the drbd partition mounted.
If I kill the daemon process of one of the dependent resources, it is simply
restarted, which is good. Unfortunately, if I make the failure more severe by
renaming the daemon binary and then killing it, heartbeat simply calls the
stop action and leaves it at that. It doesn't bother migrating the resources
to the other node.
I understand that if all of the resources were in a group, the whole group
would failover, but that also means that if one resource in the group failed
on both nodes, the whole group would be shut down, and I *definitely* don't
want that.
Also, if a resource fails and I later fix the problem, I can't seem to get
heartbeat to take it over again. I tried "crm_failcount -G -r <resource>" and
starting the process manually, then calling "crm_resource -P -H <node> -r
<resource>", but crm_mon still shows "(unmanaged) FAILED." The only thing
that works is stopping and restarting heartbeat on both nodes.
--
James Oakley
[EMAIL PROTECTED]
<cib admin_epoch="0" ccm_transition="2" cib_feature_revision="1.3" dc_uuid="1f7cf7cc-7d90-43fb-a7bd-f13fb5c203c1" epoch="6" generated="true" have_quorum="true" ignore_dtd="false" num_peers="2" num_updates="288">
<configuration>
<crm_config>
<cluster_property_set id="cib-bootstrap-options">
<attributes>
<nvpair id="cib-bootstrap-options-short-resource-names" name="short-resource-names" value="true" />
<nvpair id="cib-bootstrap-options-startup-fencing" name="startup-fencing" value="true" />
<nvpair id="cib-bootstrap-options-stonith-enabled" name="stonith-enabled" value="false" />
<nvpair id="cib-bootstrap-options-symmetric-cluster" name="symmetric-cluster" value="true" />
<nvpair id="cib-bootstrap-options-stop-orphan-actions" name="stop-orphan-actions" value="true" />
<nvpair id="cib-bootstrap-options-stonith-action" name="stonith-action" value="reboot" />
<nvpair id="cib-bootstrap-options-is-managed-default" name="is-managed-default" value="true" />
<nvpair id="cib-bootstrap-options-stop-orphan-resources" name="stop-orphan-resources" value="true" />
<nvpair id="cib-bootstrap-options-no-quorum-policy" name="no-quorum-policy" value="stop" />
<nvpair id="cib-bootstrap-options-default-resource-failure-stickiness" name="default-resource-failure-stickiness" value="-100" />
<nvpair id="cib-bootstrap-options-probe_complete" name="probe_complete" value="true" />
<nvpair id="cib-bootstrap-options-transition-idle-timeout" name="transition-idle-timeout" value="60s" />
<nvpair id="cib-bootstrap-options-default-resource-stickiness" name="default-resource-stickiness" value="0" />
</attributes>
</cluster_property_set>
</crm_config>
<nodes>
<node id="1f7cf7cc-7d90-43fb-a7bd-f13fb5c203c1" type="normal" uname="redun2" />
<node id="cd34ed93-fa5d-4092-a618-ac6349351d13" type="normal" uname="redun1" />
</nodes>
<resources>
<master_slave id="shared_storage">
<meta_attributes id="ma_shared_storage">
<attributes>
<nvpair id="shared_storage_ma_master_node_max" name="master_node_max" value="1" />
<nvpair id="shared_storage_ma_master_max" name="master_max" value="1" />
<nvpair id="shared_storage_ma_notify" name="notify" value="yes" />
<nvpair id="shared_storage_ma_clone_max" name="clone_max" value="2" />
<nvpair id="shared_storage_ma_clone_node_max" name="clone_node_max" value="1" />
<nvpair id="shared_storage_ma_globally_unique" name="globally_unique" value="false" />
</attributes>
</meta_attributes>
<primitive class="ocf" id="prim_shared_storage" provider="heartbeat" type="drbd">
<operations>
<op id="shared_storage_op_monitor" interval="120s" name="monitor" timeout="60s" />
</operations>
<instance_attributes id="ias_prim_shared_storage">
<attributes>
<nvpair id="shared_storage_attr_drbd_resource" name="drbd_resource" value="sip-shared" />
</attributes>
</instance_attributes>
</primitive>
</master_slave>
<primitive class="ocf" id="shared_filesystem" provider="heartbeat" type="Filesystem">
<operations>
<op id="shared_filesystem_op_monitor" interval="120s" name="monitor" timeout="60s" />
<op id="shared_filesystem_op_start" name="start" start_delay="10s" timeout="60s" />
</operations>
<instance_attributes id="ias_shared_filesystem">
<attributes>
<nvpair id="shared_filesystem_attr_device" name="device" value="/dev/drbd0" />
<nvpair id="shared_filesystem_attr_directory" name="directory" value="/shared" />
<nvpair id="shared_filesystem_attr_fstype" name="fstype" value="reiserfs" />
</attributes>
</instance_attributes>
</primitive>
<primitive class="heartbeat" id="drbdlinks" provider="heartbeat" type="drbdlinks">
<operations>
<op id="drbdlinks_op_monitor" interval="30s" name="monitor" timeout="30s" />
</operations>
<instance_attributes id="ias_drbdlinks">
<attributes />
</instance_attributes>
</primitive>
<primitive class="lsb" id="postgresql" type="postgresql">
<operations>
<op id="postgresql_op_monitor" interval="30s" name="monitor" timeout="30s" />
<op id="postgresql_op_start" name="start" timeout="120s" />
</operations>
<instance_attributes id="ias_postgresql">
<attributes />
</instance_attributes>
</primitive>
<primitive class="lsb" id="exim" type="exim">
<operations>
<op id="exim_op_monitor" interval="30s" name="monitor" timeout="30s" />
</operations>
<instance_attributes id="ias_exim">
<attributes />
</instance_attributes>
</primitive>
<primitive class="lsb" id="inetd" type="inetd">
<operations>
<op id="inetd_op_monitor" interval="30s" name="monitor" timeout="30s" />
</operations>
<instance_attributes id="ias_inetd">
<attributes />
</instance_attributes>
</primitive>
<primitive class="lsb" id="dhcpd" type="dhcpd">
<operations>
<op id="dhcpd_op_monitor" interval="30s" name="monitor" timeout="30s" />
</operations>
<instance_attributes id="ias_dhcpd">
<attributes />
</instance_attributes>
</primitive>
<primitive class="lsb" id="atftpd" type="atftpd">
<operations>
<op id="atftpd_op_monitor" interval="30s" name="monitor" timeout="30s" />
</operations>
<instance_attributes id="ias_atftpd">
<attributes />
</instance_attributes>
</primitive>
<primitive class="lsb" id="apache" type="apache">
<operations>
<op id="apache_op_monitor" interval="30s" name="monitor" timeout="30s" />
</operations>
<instance_attributes id="ias_apache">
<attributes />
</instance_attributes>
</primitive>
<primitive class="lsb" id="cups" type="cups">
<operations>
<op id="cups_op_monitor" interval="30s" name="monitor" timeout="30s" />
</operations>
<instance_attributes id="ias_cups">
<attributes />
</instance_attributes>
</primitive>
</resources>
<constraints>
<rsc_colocation from="shared_filesystem" id="colocation_shared_filesystem_shared_storage:master:master" score="INFINITY" to="shared_storage" to_role="master" />
<rsc_order from="shared_filesystem" id="order_shared_filesystem_after_shared_storage:promote" to="shared_storage" to_action="promote" />
<rsc_colocation from="drbdlinks" id="colocation_drbdlinks_shared_filesystem" score="INFINITY" to="shared_filesystem" />
<rsc_order from="drbdlinks" id="order_drbdlinks_after_shared_filesystem" to="shared_filesystem" />
<rsc_colocation from="postgresql" id="colocation_postgresql_drbdlinks" score="INFINITY" to="drbdlinks" />
<rsc_order from="postgresql" id="order_postgresql_after_drbdlinks" to="drbdlinks" />
<rsc_colocation from="exim" id="colocation_exim_drbdlinks" score="INFINITY" to="drbdlinks" />
<rsc_order from="exim" id="order_exim_after_drbdlinks" to="drbdlinks" />
<rsc_colocation from="inetd" id="colocation_inetd_shared_filesystem" score="INFINITY" to="shared_filesystem" />
<rsc_colocation from="dhcpd" id="colocation_dhcpd_shared_filesystem" score="INFINITY" to="shared_filesystem" />
<rsc_colocation from="atftpd" id="colocation_atftpd_drbdlinks" score="INFINITY" to="drbdlinks" />
<rsc_order from="atftpd" id="order_atftpd_after_drbdlinks" to="drbdlinks" />
<rsc_colocation from="apache" id="colocation_apache_drbdlinks" score="INFINITY" to="drbdlinks" />
<rsc_order from="apache" id="order_apache_after_drbdlinks" to="drbdlinks" />
<rsc_colocation from="cups" id="colocation_cups_shared_filesystem" score="INFINITY" to="shared_filesystem" />
</constraints>
</configuration>
</cib>_______________________________________________
Linux-HA mailing list
[email protected]
http://lists.linux-ha.org/mailman/listinfo/linux-ha
See also: http://linux-ha.org/ReportingProblems