Hello guys,
Well I'm stonithed too ;)
I'm trying to configure SSH STONITH, yes I KNOW it's a bad idea.
It will soon be replaced by hardware.
In the mean time, I can't seem to get it to work.
I'm trying to use STONITH to get the nodes out of a split brain
situation. I'm missing
a method on which I can test this, because polling the cable out an
returning it does not
seem to make it shoot. I've tried to make it shoot using
<on_fail="fence"> in the start() of my
rsc_mis but this does not seem to work as well (I'm always returning 1
in my rsc script, to test this).
How is the log looks for the shooting node? and for the murdered one ?
Who should be executing this ? (lrmd, pengine, crmd) ?
Using """stonith -t ssh -p rnd-dev1,rnd-dev2 -T reset rnd-dev2""" works for me.
So what am I doing wrong ? Do I even need the stonith-enabled=true instruction ?
Thanks for the help,
Maxim.
This is my (stripped down) CIB :
<cib admin_epoch="0" have_quorum="true" ignore_dtd="false"
num_peers="1" cib_feature_revision="1.3" generated="true"
ccm_transition="7" dc_uuid="21cd228e-7bd4-496d-981e-674f9a74ebeb"
epoch="129" num_updates="2783" cib-last-written="Mon Aug 13 10:19:22
2007">
<configuration>
<crm_config>
<cluster_property_set id="cluster_properties">
<attributes>
<nvpair id="default-resource-stickiness"
name="default-resource-stickiness" value="100"/>
<nvpair id="default-resource-failure-stickiness"
name="default-resource-failure-stickiness" value="-100"/>
<nvpair id="stonith-enabled" name="stonith-enabled" value="true"/>
</attributes>
</cluster_property_set>
</crm_config>
<nodes>
<node id="21cd228e-7bd4-496d-981e-674f9a74ebeb"
uname="rnd-dev1" type="normal"/>
<node id="4b954d07-f1af-4eb4-b998-a569f84c4597"
uname="rnd-dev2" type="normal"/>
</nodes>
<resources>
<group id="grp_mvap">
<primitive id="rsc_ip" class="ocf" type="IPaddr" provider="heartbeat">
<operations>
<op id="rsc_ip_monitor" name="monitor" interval="5s" timeout="3s"/>
</operations>
<instance_attributes id="ia_ip">
<attributes>
<nvpair id="nvpair_ip_address" name="ip" value="192.168.31.99"/>
</attributes>
</instance_attributes>
</primitive>
<primitive id="rsc_mis" class="ocf" type="mis_agent" provider="ML">
<operations>
<op id="op_mis_start" name="start" timeout="15s" on_fail="fence"/>
<op id="op_mis_monitor_pid_check" name="monitor"
timeout="3s" interval="1000ms">
<instance_attributes id="ia_mis_monitor_pid_check">
<attributes>
<nvpair id="nvpair_mis_monitor_pid_check_action"
name="monitor_action" value="CHECK_PID"/>
</attributes>
</instance_attributes>
</op>
</operations>
</primitive>
</group>
<clone id="DoFencing">
<instance_attributes id="DoFencing_instance_attributes">
<attributes>
<nvpair id="DoFencing_stonith_clone_max" name="clone_max"
value="2"/>
<nvpair id="DoFencing_stonith_clone_node_max"
name="clone_node_max" value="1"/>
</attributes>
</instance_attributes>
<primitive id="child_DoFencing" class="stonith" type="ssh"
provider="heartbeat">
<operations>
<op id="op_stonith_ssh_monitor" name="monitor"
interval="5s" timeout="20s" prereq="nothing"/>
<op id="op_stonith_ssh_start" name="start" timeout="20s"
prereq="nothing"/>
</operations>
<instance_attributes id="child_DoFencing_instance_attributes">
<attributes>
<nvpair id="nvpair_stonith_ssh_hostlist"
name="hostlist" value="rnd-dev1,rnd-dev2"/>
</attributes>
</instance_attributes>
</primitive>
</clone>
</resources>
<constraints/>
</configuration>
</cib>
--
Cheers,
Maxim Veksler
"Free as in Freedom" - Do u GNU ?
_______________________________________________
Linux-HA mailing list
[email protected]
http://lists.linux-ha.org/mailman/listinfo/linux-ha
See also: http://linux-ha.org/ReportingProblems