Hi,
I had to setup Slony1 + Linux-HA and I needed to
write my own OCF RA scripts for it. My boss told me
I should send it upstream, so here they are.
"Slony1" is responsible for starting and stopping
slon+slon_watchdog. You should run one instance on
each PostgreSQL nodes, you have to assign the
correct node IDs to the instances, they will check
themselves against _replication_schema.getlocalnodeid().
"slon_start" needs the node ID, anyway.
Slony1-master is the one that should be started where
the master node is. Easiest to achieve it is to bind to
the master DB's virtual IP.
Instead of custom embedded "slonik" scripts, both scripts
make use of the Slony1 administration scripts. This means
you need to set up your replication with "slonik_build_env"
and modify its output according to the slon_tools.conf
example found in slony1-1.2.x to set up your replication set.
This script tries to be truly generic and rely on external
configuration files only.
You can do "controlled fail over" in which case no transaction
is lost, as slonik_move_set will be use instead of slonik_failover
if you do "touch /tmp/CONTROLLED_FAILOVER" before
migrating the master to another node.
I couldn't do it with a true multistate master/slave OCF RA,
the documentation was not really clear on www.linux-ha.org
and my half-finished script make my resource unmanaged
all the time while I tested it. Splitting the script into two
simpler resources made is faster to implement and test for me.
Best regards,
Zoltán Böszörményi
--
----------------------------------
Zoltán Böszörményi
Cybertec Schönig & Schönig GmbH
http://www.postgresql.at/
#!/bin/sh
#
# Slony1 OCF RA. Slony1 replication script.
#
# Copyright (c) 2008 Cybertec GmbH, Zoltán Böszörményi
# All Rights Reserved.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it would be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Further, this software is distributed without any warranty that it is
# free of the rightful claim of any third person regarding infringement
# or the like. Any license provided herein, whether implied or
# otherwise, applies only to this software file. Patent licenses, if
# any, provided herein do not apply to combinations of this program with
# other software, or any other product whatsoever.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
#
#######################################################################
# Initialization:
. ${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs
#######################################################################
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="Slony1" version="1.0">
<version>1.0</version>
<longdesc lang="en">
This is a Slony1 Resource Agent. It controls the Slony1
replication for PostgreSQL.
</longdesc>
<shortdesc lang="en">Slony1 resource agent</shortdesc>
<parameters>
<parameter name="config" unique="0" required="1">
<longdesc lang="en">
Config file name for Slony1
</longdesc>
<shortdesc lang="en">Config file</shortdesc>
<content type="string" default="/etc/slon_tools.conf" />
</parameter>
<parameter name="cluster_name" unique="0" required="1">
<longdesc lang="en">
Cluster name in Slony1
</longdesc>
<shortdesc lang="en">Cluster name</shortdesc>
<content type="string" default="replication" />
</parameter>
<parameter name="node_id" unique="0" required="1">
<longdesc lang="en">
Node ID of this DB node inside Slony1
</longdesc>
<shortdesc lang="en">Node ID</shortdesc>
<content type="number" />
</parameter>
<parameter name="psql" unique="0" required="0">
<longdesc lang="en">
Path to psql command.
</longdesc>
<shortdesc lang="en">psql</shortdesc>
<content type="string" default="/usr/bin/psql" />
</parameter>
<parameter name="pgdba" unique="0" required="0">
<longdesc lang="en">
User that owns PostgreSQL.
</longdesc>
<shortdesc lang="en">pgdba</shortdesc>
<content type="string" default="postgres" />
</parameter>
<parameter name="pgport" unique="0" required="0">
<longdesc lang="en">
Port where PostgreSQL is listening
</longdesc>
<shortdesc lang="en">pgport</shortdesc>
<content type="string" default="5432" />
</parameter>
<parameter name="pgdb" unique="0" required="0">
<longdesc lang="en">
Database that is replicated and monitored.
</longdesc>
<shortdesc lang="en">pgdb</shortdesc>
<content type="string" default="template1" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="90" />
<action name="stop" timeout="100" />
<action name="monitor" timeout="20" interval="10" depth="0"
start-delay="0" />
<action name="meta-data" timeout="5" />
<action name="validate-all" timeout="30" />
</actions>
</resource-agent>
END
}
#######################################################################
# don't exit on TERM, to test that lrmd makes sure that we do exit
trap sigterm_handler TERM
sigterm_handler() {
ocf_log info "They use TERM to bring us down. No such luck."
return
}
slony1_usage() {
cat <<END
usage: $0 {start|stop|monitor|migrate_to|migrate_from|validate-all|meta-data}
Expects to have a fully populated OCF RA-compliant environment set.
END
}
#
# Run the given command in the Resource owner environment...
#
runasowner() {
su $OCF_RESKEY_pgdba -c "$*"
}
slony1_start() {
slony1_monitor
RET="$?"
if [ "$RET" = "$OCF_SUCCESS" ]; then
return $OCF_SUCCESS
fi
# Don't start slon if it wouldn't run locally
NODEID="`slony1_getlocalnodeid`"
if [ "$NODEID" != "$OCF_RESKEY_node_id" ]; then
return $OCF_ERR_GENERIC
fi
slon_start --config "$OCF_RESKEY_config" $NODEID
return $OCF_SUCCESS
}
slony1_stop() {
slony1_monitor
RET="$?"
if [ "$RET" == "$OCF_NOT_RUNNING" ]; then
return $OCF_SUCCESS
fi
slon_kill --config "$OCF_RESKEY_config"
}
slony1_getlocalnodeid () {
SQL="SELECT
_${OCF_RESKEY_cluster_name}.getlocalnodeid('_${OCF_RESKEY_cluster_name}');"
runasowner psql -A -t -c \" $SQL \" -h localhost -p $OCF_RESKEY_pgport
-U $OCF_RESKEY_pgdba $OCF_RESKEY_pgdb
}
slony1_getmasternodeid () {
SQL="SELECT set_origin FROM _${OCF_RESKEY_cluster_name}.sl_set GROUP
BY set_origin HAVING COUNT(*) > 0;"
runasowner psql -A -t -c \" $SQL \" -h localhost -p $OCF_RESKEY_pgport
-U $OCF_RESKEY_pgdba $OCF_RESKEY_pgdb
}
slony1_get1 () {
SQL="SELECT 1;"
runasowner psql -A -t -c \" $SQL \" -h localhost -p $OCF_RESKEY_pgport
-U $OCF_RESKEY_pgdba $OCF_RESKEY_pgdb
}
slony1_monitor() {
# Monitor _MUST!_ differentiate correctly between running
# (SUCCESS), failed (ERROR) or _cleanly_ stopped (NOT RUNNING).
# That is THREE states, not just yes/no.
# If the assigned node_id != the local node ID then report accordingly
NODEID="`slony1_getlocalnodeid`"
if [ "$NODEID1" != "$OCF_RESKEY_node_id" ]; then
return $OCF_NOT_RUNNING
fi
# Perform checks...
SLONPIDS="`pidof slon`"
if [ -z "${SLONPIDS}" ]; then
# No SLONs running? Maybe it just crashed and being dealt by
slon_watchdog
# See if it's running
SLONWATCHDOG="`ps uxw | grep slon_watchdog | grep -v grep | wc -l`"
if [ "$SLONWATCHDOG" != "1" ]; then
return $OCF_NOT_RUNNING
fi
# Fall through, treat "no slon, slon_watchdog running" as "slon
running",
# slon daemons will be restarted by the watchdog anyway.
fi
return $OCF_SUCCESS
}
slony1_validate() {
# Is psql installed?
if [ ! -x "$OCF_RESKEY_psql" ]; then
return $OCF_ERR_INSTALLED
fi
# Is PostgreSQL running?
ONE="`slony1_get1`"
if [ "$ONE" != "1" ]; then
return $OCF_ERR_INSTALLED
fi
if [ -z $OCF_RESKEY_node_id ]; then
return $OCF_ERR_CONFIGURED
fi
return $OCF_SUCCESS
}
: ${OCF_RESKEY_config=/etc/slon_tools.conf}
: ${OCF_RESKEY_cluster_name=replication}
: ${OCF_RESKEY_psql=/usr/bin/psql}
: ${OCF_RESKEY_pgdba=postgres}
: ${OCF_RESKEY_pgport=5432}
: ${OCF_RESKEY_pgdb=template1}
case $__OCF_ACTION in
meta-data) meta_data
exit $OCF_SUCCESS
;;
start) slony1_start;;
stop) slony1_stop;;
monitor) slony1_monitor;;
validate-all) slony1_validate;;
usage|help) slony1_usage
exit $OCF_SUCCESS
;;
*) slony1_usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
rc=$?
ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
exit $rc
#!/bin/sh
#
# Slony1-master OCF RA. Controls the master state of Slony1
#
# Copyright (c) 2008 Cybertec GmbH, Zoltán Böszörményi
# All Rights Reserved.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it would be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Further, this software is distributed without any warranty that it is
# free of the rightful claim of any third person regarding infringement
# or the like. Any license provided herein, whether implied or
# otherwise, applies only to this software file. Patent licenses, if
# any, provided herein do not apply to combinations of this program with
# other software, or any other product whatsoever.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
#
#######################################################################
# Initialization:
. ${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs
#######################################################################
meta_data() {
cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="Slony1-master" version="1.0">
<version>1.0</version>
<longdesc lang="en">
This is a Slony1-master Resource Agent.
It controls the Slony1 master node.
</longdesc>
<shortdesc lang="en">Slony1 resource agent</shortdesc>
<parameters>
<parameter name="config" unique="0" required="1">
<longdesc lang="en">
Config file name for Slony1
</longdesc>
<shortdesc lang="en">Cluster name</shortdesc>
<content type="string" default="/etc/slon_tools.conf" />
</parameter>
<parameter name="cluster_name" unique="0" required="1">
<longdesc lang="en">
Cluster name in Slony1
</longdesc>
<shortdesc lang="en">Cluster name</shortdesc>
<content type="string" default="replication" />
</parameter>
<parameter name="psql" unique="0" required="0">
<longdesc lang="en">
Path to psql command.
</longdesc>
<shortdesc lang="en">psql</shortdesc>
<content type="string" default="/usr/bin/psql" />
</parameter>
<parameter name="pgdba" unique="0" required="0">
<longdesc lang="en">
User that owns PostgreSQL.
</longdesc>
<shortdesc lang="en">pgdba</shortdesc>
<content type="string" default="postgres" />
</parameter>
<parameter name="pgport" unique="0" required="0">
<longdesc lang="en">
Port where PostgreSQL is listening
</longdesc>
<shortdesc lang="en">pgport</shortdesc>
<content type="string" default="5432" />
</parameter>
<parameter name="pgdb" unique="0" required="0">
<longdesc lang="en">
Database that is replicated and monitored.
</longdesc>
<shortdesc lang="en">pgdb</shortdesc>
<content type="string" default="template1" />
</parameter>
</parameters>
<actions>
<action name="start" timeout="90" />
<action name="stop" timeout="100" />
<action name="monitor" timeout="20" interval="10" depth="0"
start-delay="0" />
<action name="meta-data" timeout="5" />
<action name="validate-all" timeout="30" />
</actions>
</resource-agent>
END
}
#######################################################################
# don't exit on TERM, to test that lrmd makes sure that we do exit
trap sigterm_handler TERM
sigterm_handler() {
ocf_log info "They use TERM to bring us down. No such luck."
return
}
slony1_usage() {
cat <<END
usage: $0 {start|stop|monitor|migrate_to|migrate_from|validate-all|meta-data}
Expects to have a fully populated OCF RA-compliant environment set.
END
}
#
# Run the given command in the Resource owner environment...
#
runasowner() {
su $OCF_RESKEY_pgdba -c "$*"
}
slony1_start() {
slony1_monitor
RET="$?"
if [ "$RET" = "$OCF_SUCCESS" ]; then
return $OCF_SUCCESS
fi
OLD_MASTER="`slony1_getmasternodeid`"
NEW_MASTER="`slony1_getlocalnodeid`"
if [ -f /tmp/CONTROLLED_FAILOVER ]; then
slonik_move_set --config $OCF_RESKEY_config 1 $OLD_MASTER
$NEW_MASTER
else
slonik_failover --config $OCF_RESKEY_config $OLD_MASTER
$NEW_MASTER
fi
return $OCF_SUCCESS
}
slony1_stop() {
return $OCF_SUCCESS
}
slony1_is_master () {
SQL="SELECT count(*) FROM _$CLUSTER_NAME.sl_set WHERE
set_origin=_$CLUSTER_NAME.getlocalnodeid('_$CLUSTER_NAME');"
runasowner psql -A -t -c \" $SQL \" -h localhost -p $OCF_RESKEY_pgport
-U $OCF_RESKEY_pgdba $OCF_RESKEY_pgdb
}
slony1_getlocalnodeid () {
SQL="SELECT
_${OCF_RESKEY_cluster_name}.getlocalnodeid('_${OCF_RESKEY_cluster_name}');"
runasowner psql -A -t -c \" $SQL \" -h localhost -p $OCF_RESKEY_pgport
-U $OCF_RESKEY_pgdba $OCF_RESKEY_pgdb
}
slony1_getmasternodeid () {
SQL="SELECT set_origin FROM _${OCF_RESKEY_cluster_name}.sl_set GROUP
BY set_origin HAVING COUNT(*) > 0;"
runasowner psql -A -t -c \" $SQL \" -h localhost -p $OCF_RESKEY_pgport
-U $OCF_RESKEY_pgdba $OCF_RESKEY_pgdb
}
slony1_get1 () {
SQL="SELECT 1;"
runasowner psql -A -t -c \" $SQL \" -h localhost -p $OCF_RESKEY_pgport
-U $OCF_RESKEY_pgdba $OCF_RESKEY_pgdb
}
slony1_monitor() {
# Monitor _MUST!_ differentiate correctly between running
# (SUCCESS), failed (ERROR) or _cleanly_ stopped (NOT RUNNING).
# That is THREE states, not just yes/no.
MASTER="`slony1_is_master`"
if [ "$MASTER" == "1" ]; then
return $OCF_SUCCESS
fi
return $OCF_NOT_RUNNING
}
slony1_validate() {
# Is psql installed?
if [ ! -x "$OCF_RESKEY_psql" ]; then
return $OCF_ERR_INSTALLED
fi
# Is PostgreSQL running?
ONE="`slony1_get1`"
if [ "$ONE" != "1" ]; then
return $OCF_ERR_INSTALLED
fi
if [ -z $OCF_RESKEY_node_id ]; then
return $OCF_ERR_CONFIGURED
fi
return $OCF_SUCCESS
}
: ${OCF_RESKEY_config=/etc/slon_tools.conf}
: ${OCF_RESKEY_cluster_name=replication}
: ${OCF_RESKEY_psql=/usr/bin/psql}
: ${OCF_RESKEY_pgdba=postgres}
: ${OCF_RESKEY_pgport=5432}
: ${OCF_RESKEY_pgdb=template1}
case $__OCF_ACTION in
meta-data) meta_data
exit $OCF_SUCCESS
;;
start) slony1_start;;
stop) slony1_stop;;
monitor) slony1_monitor;;
validate-all) slony1_validate;;
usage|help) slony1_usage
exit $OCF_SUCCESS
;;
*) slony1_usage
exit $OCF_ERR_UNIMPLEMENTED
;;
esac
rc=$?
ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
exit $rc
_______________________________________________
Linux-HA mailing list
[email protected]
http://lists.linux-ha.org/mailman/listinfo/linux-ha
See also: http://linux-ha.org/ReportingProblems