Hi,
Sorry for the delay, here is the patch that add a new configuration
directive 'follow_master_command'.
After each master failover only and if the directive contains a command
line, PgPool will degenerate all desynchronized SR backend and will
executed the command when new master is up. This allow to set a bash
script or other program to force the reconstruction of all secondary
servers.
A typical follow master shell script will do :
#!/bin/sh
PGPOOLIP=192.168.1.10
PGUSER=postgres
PGPASS=pcp_pass_phrase
/usr/local/pgpool/bin/pcp_recovery_node 10 $PGPOOLIP 9898 $PGUSER
$PGPASS $1
/usr/local/pgpool/bin/pcp_attach_node 10 $PGPOOLIP 9898 $PGUSER
$PGPASS $1
and the follow_master_command will be set like this in pgpool.conf:
# Execute command at end of failover.
# special values: %d = node id
# %h = host name
# %p = port number
# %D = database cluster path
# %m = new master node id
# %H = hostname of the new master node
# %M = old master node id
# %P = old primary node id
# %% = '%' character
#
follow_master_command = '/home/postgres/bin/follow_master.sh %d'
This is a first try, as I only use PgPool with Pg Streaming Replication,
I don't know if it breaks something in the other modes. But that I've
tried to do with this path and the other one which allow promoting any
node as master is to change the less possible the actual code. So there
should not be any problem with other mode and if so it could be simply
fixed. Any feedback will be appreciated.
Thanks for your help,
Regards,
--
Gilles Darold
http://dalibo.com - http://dalibo.org
diff -r -u pgpool-II/main.c pgpool-II-follow/main.c
--- pgpool-II/main.c 2011-01-11 01:43:30.000000000 +0100
+++ pgpool-II-follow/main.c 2011-01-25 15:08:35.000000000 +0100
@@ -106,6 +106,7 @@
static int pool_pause(struct timeval *timeout);
static void kill_all_children(int sig);
static int get_next_master_node(void);
+static pid_t fork_follow_child(void);
static RETSIGTYPE exit_handler(int sig);
static RETSIGTYPE reap_handler(int sig);
@@ -137,6 +138,7 @@
static int unix_fd; /* unix domain socket fd */
static int inet_fd; /* inet domain socket fd */
+static int follow_pid; /* pid for child process handling follow command */
static int pcp_pid; /* pid for child process handling PCP */
static int pcp_unix_fd; /* unix domain socket fd for PCP (not used) */
static int pcp_inet_fd; /* inet domain socket fd for PCP */
@@ -1597,6 +1599,34 @@
Req_info->master_node_id = new_master;
}
+ /*
+ In master/slaver streaming replication we start degenerating
+ all backends as they are not replicated anymore
+ */
+ int follow_cnt = 0;
+ /* Only if the failover is against the current master */
+ if ((Req_info->kind == NODE_DOWN_REQUEST) && (node_id == Req_info->master_node_id)) {
+ for (i = 0; i < pool_config->backend_desc->num_backends; i++)
+ {
+ /* do not degenerate the new master */
+ if (i != new_master) {
+ BackendInfo *bkinfo;
+ bkinfo = pool_get_node_info(i);
+ pool_log("starting follow degeneration. shutdown host %s(%d)",
+ bkinfo->backend_hostname,
+ bkinfo->backend_port);
+ bkinfo->backend_status = CON_DOWN; /* set down status */
+ follow_cnt++;
+ }
+ }
+ if (follow_cnt == 0)
+ {
+ pool_log("failover: no follow backends are degenerated");
+ } else {
+ pool_log("failover: %d follow backends have been degenerated", follow_cnt);
+ }
+ }
+
/* no need to wait since it will be done in reap_handler */
#ifdef NOT_USED
while (wait(NULL) > 0)
@@ -1635,9 +1665,16 @@
switching = 0;
/* kick wakeup_handler in pcp_child to notice that
- * faiover/failback done
+ * failover/failback done
*/
kill(pcp_pid, SIGUSR2);
+
+ /* exec follow_master_command */
+ if ( (follow_cnt > 0) && (pool_config->follow_master_command) )
+ {
+ follow_pid = fork_follow_child();
+ }
+
}
/*
@@ -2360,3 +2397,34 @@
pool_log("find_primary_node: primary node id is %d", i);
return i;
}
+
+/*
+* fork a follow child
+*/
+pid_t fork_follow_child(void)
+{
+ pid_t pid;
+ int i;
+
+ pid = fork();
+
+ if (pid == 0)
+ {
+ for (i = 0; i < pool_config->backend_desc->num_backends; i++)
+ {
+ BackendInfo *bkinfo;
+ bkinfo = pool_get_node_info(i);
+ pool_log("start triggering follow command.");
+ if (bkinfo->backend_status == CON_DOWN)
+ trigger_failover_command(i, pool_config->follow_master_command);
+ }
+ exit(0);
+ }
+ else if (pid == -1)
+ {
+ pool_error("follow fork() failed. reason: %s", strerror(errno));
+ myexit(1);
+ }
+ return pid;
+}
+
diff -r -u pgpool-II/pgpool.conf.sample pgpool-II-follow/pgpool.conf.sample
--- pgpool-II/pgpool.conf.sample 2011-01-16 11:31:37.000000000 +0100
+++ pgpool-II-follow/pgpool.conf.sample 2011-01-25 15:24:27.000000000 +0100
@@ -176,6 +176,19 @@
#
failback_command = ''
+# Execute command at end of master failover.
+# special values: %d = node id
+# %h = host name
+# %p = port number
+# %D = database cluster path
+# %m = new master node id
+# %H = hostname of the new master node
+# %M = old master node id
+# %P = old primary node id
+# %% = '%' character
+#
+follow_master_command = ''
+
# If true, trigger fail over when writing to the backend communication
# socket fails. This is the same behavior of pgpool-II 2.2.x or
# earlier. If set to false, pgpool will report an error and disconnect
diff -r -u pgpool-II/pgpool.conf.sample-master-slave pgpool-II-follow/pgpool.conf.sample-master-slave
--- pgpool-II/pgpool.conf.sample-master-slave 2011-01-16 11:31:38.000000000 +0100
+++ pgpool-II-follow/pgpool.conf.sample-master-slave 2011-01-25 15:24:43.000000000 +0100
@@ -176,6 +176,19 @@
#
failback_command = ''
+# Execute command at end of master failover.
+# special values: %d = node id
+# %h = host name
+# %p = port number
+# %D = database cluster path
+# %m = new master node id
+# %H = hostname of the new master node
+# %M = old master node id
+# %P = old primary node id
+# %% = '%' character
+#
+follow_master_command = ''
+
# If true, trigger fail over when writing to the backend communication
# socket fails. This is the same behavior of pgpool-II 2.2.x or
# earlier. If set to false, pgpool will report an error and disconnect
diff -r -u pgpool-II/pgpool.conf.sample-replication pgpool-II-follow/pgpool.conf.sample-replication
--- pgpool-II/pgpool.conf.sample-replication 2011-01-16 11:31:38.000000000 +0100
+++ pgpool-II-follow/pgpool.conf.sample-replication 2011-01-25 15:24:58.000000000 +0100
@@ -176,6 +176,19 @@
#
failback_command = ''
+# Execute command at end of master failover.
+# special values: %d = node id
+# %h = host name
+# %p = port number
+# %D = database cluster path
+# %m = new master node id
+# %H = hostname of the new master node
+# %M = old master node id
+# %P = old primary node id
+# %% = '%' character
+#
+follow_master_command = ''
+
# If true, trigger fail over when writing to the backend communication
# socket fails. This is the same behavior of pgpool-II 2.2.x or
# earlier. If set to false, pgpool will report an error and disconnect
diff -r -u pgpool-II/pgpool.conf.sample-stream pgpool-II-follow/pgpool.conf.sample-stream
--- pgpool-II/pgpool.conf.sample-stream 2011-01-16 11:31:38.000000000 +0100
+++ pgpool-II-follow/pgpool.conf.sample-stream 2011-01-25 15:25:20.000000000 +0100
@@ -176,6 +176,19 @@
#
failback_command = ''
+# Execute command at end of master failover.
+# special values: %d = node id
+# %h = host name
+# %p = port number
+# %D = database cluster path
+# %m = new master node id
+# %H = hostname of the new master node
+# %M = old master node id
+# %P = old primary node id
+# %% = '%' character
+#
+follow_master_command = ''
+
# If true, trigger fail over when writing to the backend communication
# socket fails. This is the same behavior of pgpool-II 2.2.x or
# earlier. If set to false, pgpool will report an error and disconnect
diff -r -u pgpool-II/pool_config.c pgpool-II-follow/pool_config.c
--- pgpool-II/pool_config.c 2011-01-16 11:31:38.000000000 +0100
+++ pgpool-II-follow/pool_config.c 2011-01-25 15:14:18.000000000 +0100
@@ -1904,6 +1904,7 @@
pool_config->health_check_period = 0;
pool_config->health_check_user = "nobody";
pool_config->failover_command = "";
+ pool_config->follow_master_command = "";
pool_config->failback_command = "";
pool_config->fail_over_on_backend_error = 1;
pool_config->insert_lock = 1;
@@ -2809,6 +2810,26 @@
pool_config->failover_command = str;
}
+ else if (!strcmp(key, "follow_master_command") &&
+ CHECK_CONTEXT(INIT_CONFIG|RELOAD_CONFIG, context))
+ {
+ char *str;
+
+ if (token != POOL_STRING && token != POOL_UNQUOTED_STRING && token != POOL_KEY)
+ {
+ PARSE_ERROR();
+ fclose(fd);
+ return(-1);
+ }
+ str = extract_string(yytext, token);
+ if (str == NULL)
+ {
+ fclose(fd);
+ return(-1);
+ }
+ pool_config->follow_master_command = str;
+ }
+
else if (!strcmp(key, "failback_command") &&
CHECK_CONTEXT(INIT_CONFIG|RELOAD_CONFIG, context))
{
diff -r -u pgpool-II/pool_config.h pgpool-II-follow/pool_config.h
--- pgpool-II/pool_config.h 2011-01-16 11:31:38.000000000 +0100
+++ pgpool-II-follow/pool_config.h 2011-01-25 15:18:20.000000000 +0100
@@ -108,6 +108,7 @@
int health_check_period; /* health check period */
char *health_check_user; /* PostgreSQL user name for health check */
char *failover_command; /* execute command when failover happens */
+ char *follow_master_command; /* execute command when failover is ended */
char *failback_command; /* execute command when failback happens */
/*
diff -r -u pgpool-II/pool_config.l pgpool-II-follow/pool_config.l
--- pgpool-II/pool_config.l 2011-01-16 11:31:38.000000000 +0100
+++ pgpool-II-follow/pool_config.l 2011-01-25 15:19:10.000000000 +0100
@@ -179,6 +179,7 @@
pool_config->health_check_period = 0;
pool_config->health_check_user = "nobody";
pool_config->failover_command = "";
+ pool_config->follow_master_command = "";
pool_config->failback_command = "";
pool_config->fail_over_on_backend_error = 1;
pool_config->insert_lock = 1;
@@ -1084,6 +1085,26 @@
pool_config->failover_command = str;
}
+ else if (!strcmp(key, "follow_master_command") &&
+ CHECK_CONTEXT(INIT_CONFIG|RELOAD_CONFIG, context))
+ {
+ char *str;
+
+ if (token != POOL_STRING && token != POOL_UNQUOTED_STRING && token != POOL_KEY)
+ {
+ PARSE_ERROR();
+ fclose(fd);
+ return(-1);
+ }
+ str = extract_string(yytext, token);
+ if (str == NULL)
+ {
+ fclose(fd);
+ return(-1);
+ }
+ pool_config->follow_master_command = str;
+ }
+
else if (!strcmp(key, "failback_command") &&
CHECK_CONTEXT(INIT_CONFIG|RELOAD_CONFIG, context))
{
diff -r -u pgpool-II/pool_process_reporting.c pgpool-II-follow/pool_process_reporting.c
--- pgpool-II/pool_process_reporting.c 2011-01-16 11:31:38.000000000 +0100
+++ pgpool-II-follow/pool_process_reporting.c 2011-01-25 15:21:05.000000000 +0100
@@ -318,6 +318,11 @@
strncpy(status[i].desc, "failover command", POOLCONFIG_MAXDESCLEN);
i++;
+ strncpy(status[i].name, "follow_master_command", POOLCONFIG_MAXNAMELEN);
+ snprintf(status[i].value, POOLCONFIG_MAXVALLEN, "%s", pool_config->follow_master_command);
+ strncpy(status[i].desc, "follow master command", POOLCONFIG_MAXDESCLEN);
+ i++;
+
strncpy(status[i].name, "failback_command", POOLCONFIG_MAXNAMELEN);
snprintf(status[i].value, POOLCONFIG_MAXVALLEN, "%s", pool_config->failback_command);
strncpy(status[i].desc, "failback command", POOLCONFIG_MAXDESCLEN);
_______________________________________________
Pgpool-hackers mailing list
[email protected]
http://pgfoundry.org/mailman/listinfo/pgpool-hackers