Hello Lino,
first of all Update to version 2.1.3
> 1. If I unplug both NICs of the active Node, I get a Splitbrain after
> I reconnect them again. Is there any solution to prevent this using
> heartbeat R1 or which possibilities would I have with R2?
Use a STONITH Device so that in an event of isolation one node can reset
the other. Use redundant heartbeat links.
> 2. How can I tell heartbeat to make an automatic failover to my
> passive node if any of my MySQL Process has a hangup or terminates?
> Can you monitor these processes and in cause of failure provoke an
> automatic failover? If yes, which tools would I have to use?
With heartbeat 1 you can't monitor a process. With version 2.1.3 you can
monitor a process. Per default heartbeat tries to restart the process on
the same node it was running before. If that fails, it switches the
service to the other nodes available.
> I digged around the linux-ha site and other mailing-list articles but
> so far unsuccessful. Has anyone had this combination yet?
I set up a postgres cluster a few weeks ago. I used ext3 on top of drbd with a
single postgres instance that can run on one of the two nodes available.
ha.cf:
use_logd yes
bcast eth1
mcast eth0.2 239.0.0.2 694 1 0
node postgres-01 postgres-02
respawn hacluster /usr/lib/heartbeat/dopd
apiauth dopd uid=hacluster gid=haclient
watchdog /dev/watchdog
crm on
drbd.conf:
global {
usage-count no;
}
common {
syncer {
rate 100M;
}
handlers {
outdate-peer "/usr/lib/heartbeat/drbd-peer-outdater";
}
}
resource postgres {
protocol C;
startup {
degr-wfc-timeout 120;
}
disk {
on-io-error pass_on;
fencing resource-only;
}
on postgres-01 {
device /dev/drbd0;
disk /dev/sda3;
address 172.17.0.21:7788;
meta-disk internal;
}
on postgres-02 {
device /dev/drbd0;
disk /dev/sda3;
address 172.17.0.22:7788;
meta-disk internal;
}
}
cib.xml:
<configuration>
<crm_config>
<cluster_property_set id="cib-bootstrap-options">
<attributes>
<nvpair name="stonith-enabled" value="true"
id="stonith-enabled"/>
<nvpair name="stonith-action" value="reboot"
id="stonith-action"/>
</attributes>
</cluster_property_set>
</crm_config>
<resources>
<clone id="DoFencing">
<instance_attributes>
<attributes>
<nvpair id="fencing-01"
name="clone_max" value="2"/>
<nvpair id="fencing-02"
name="clone_node_max" value="1"/>
</attributes>
</instance_attributes>
<primitive id="postgres-01-fencing" class="stonith"
type="external/ipmi" provider="heartbeat">
<operations>
<op id="postgres-01-fencing-monitor"
name="monitor" interval="60s" timeout="20s" prereq="nothing"/>
<op id="postgres-01-fencing-start"
name="start" timeout="20s" prereq="nothing"/>
</operations>
<instance_attributes>
<attributes>
<nvpair
id="postgres-01-fencing-hostname" name="hostname" value="postgres-01"/>
<nvpair
id="postgres-01-fencing-ipaddr" name="ipaddr" value="172.18.0.121"/>
<nvpair
id="postgres-01-fencing-userid" name="userid" value="Administrator"/>
<nvpair
id="postgres-01-fencing-passwd" name="passwd" value="evesun"/>
</attributes>
</instance_attributes>
</primitive>
<primitive id="postgres-02-fencing" class="stonith"
type="external/ipmi" provider="heartbeat">
<operations>
<op id="postgres-02-fencing-monitor"
name="monitor" interval="60s" timeout="20s" prereq="nothing"/>
<op id="postgres-02-fencing-start"
name="start" timeout="20s" prereq="nothing"/>
</operations>
<instance_attributes>
<attributes>
<nvpair
id="postgres-02-fencing-hostname" name="hostname" value="postgres-02"/>
<nvpair
id="postgres-02-fencing-ipaddr" name="ipaddr" value="172.18.0.122"/>
<nvpair
id="postgres-02-fencing-userid" name="userid" value="Administrator"/>
<nvpair
id="postgres-02-fencing-passwd" name="passwd" value="evesun"/>
</attributes>
</instance_attributes>
</primitive>
</clone>
<master_slave id="ms-drbd0">
<meta_attributes id="ma-ms-drbd0">
<attributes>
<nvpair id="ma-ms-drbd0-1"
name="clone_max" value="2"/>
<nvpair id="ma-ms-drbd0-2"
name="clone_node_max" value="1"/>
<nvpair id="ma-ms-drbd0-3"
name="master_max" value="1"/>
<nvpair id="ma-ms-drbd0-4"
name="master_node_max" value="1"/>
<nvpair id="ma-ms-drbd0-5"
name="notify" value="yes"/>
<nvpair id="ma-ms-drbd0-6"
name="globally_unique" value="false"/>
</attributes>
</meta_attributes>
<primitive id="drbd0" class="ocf" provider="heartbeat"
type="drbd">
<instance_attributes id="ia-drbd0">
<attributes>
<nvpair id="ia-drbd0-1"
name="drbd_resource" value="postgres"/>
</attributes>
</instance_attributes>
<operations>
<op id="op-ms-drbd2-1" name="monitor"
interval="60s" timeout="60s" start_delay="30s" role="Master"/>
<op id="op-ms-drbd2-2" name="monitor"
interval="61s" timeout="60s" start_delay="30s" role="Slave"/>
</operations>
</primitive>
</master_slave>
<group id="postgres-cluster">
<primitive class="ocf" provider="heartbeat"
type="Filesystem" id="fs0">
<instance_attributes id="ia-fs0">
<attributes>
<nvpair id="ia-fs0-1"
name="fstype" value="ext3"/>
<nvpair name="directory"
id="ia-fs0-2" value="/srv/postgres"/>
<nvpair id="ia-fs0-3"
name="device" value="/dev/drbd0"/>
</attributes>
</instance_attributes>
<operations>
<op id="fs0-monitor0" name="monitor"
interval="60s" timeout="120s" start_delay="1m"/>
</operations>
</primitive>
<primitive class="ocf" provider="heartbeat"
type="IPaddr2" id="ip0">
<instance_attributes id="ia-ip0">
<attributes>
<nvpair id="ia-ip0-1" name="ip"
value="172.17.0.20"/>
<nvpair id="ia-ip0-2"
name="cidr_netmask" value="24"/>
<nvpair id="ia-ip0-3"
name="nic" value="eth0.2"/>
</attributes>
</instance_attributes>
<operations>
<op id="ip0-monitor0" name="monitor"
interval="60s" timeout="120s" start_delay="1m"/>
</operations>
</primitive>
<primitive class="ocf" provider="heartbeat"
type="pgsql" id="pgsql0">
<instance_attributes id="ia-pgsql0">
<attributes>
<nvpair id="ia-pgsql0-1"
name="pgctl" value="/usr/lib/postgresql/8.1/bin/pg_ctl"/>
<nvpair id="ia-pgsql0-2"
name="start_opt" value="--config_file=/srv/postgres/etc/postgresql.conf"/>
<nvpair id="ia-pgsql0-3"
name="pgdata" value="/srv/postgres/data"/>
<nvpair id="ia-pgsql0-4"
name="logfile" value="/srv/postgres/postgresql.log"/>
</attributes>
</instance_attributes>
<operations>
<op id="pgsql0-monitor0" name="monitor"
interval="60s" timeout="120s" start_delay="1m"/>
<op id="pgsql0-start0" name="start"
timeout="120s" prereq="nothing"/>
</operations>
</primitive>
</group>
</resources>
<constraints>
<rsc_location id="drbd0-placement-1" rsc="ms-drbd0">
<rule id="drbd0-rule-1" score="-INFINITY">
<expression id="exp-01" value="postgres-01"
attribute="#uname" operation="ne"/>
<expression id="exp-02" value="postgres-02"
attribute="#uname" operation="ne"/>
</rule>
</rsc_location>
<rsc_order id="postgres_promotes_ms-drbd0"
from="postgres-cluster" action="start" to="ms-drbd0" to_action="promote"/>
<rsc_colocation id="postgres_on_drbd0" to="ms-drbd0"
to_role="master" from="postgres-cluster" score="infinity"/>
</constraints>
</configuration>
_______________________________________________
Linux-HA mailing list
[email protected]
http://lists.linux-ha.org/mailman/listinfo/linux-ha
See also: http://linux-ha.org/ReportingProblems