The branch, master has been updated via 1a7ff4577d33f0dd470f7465c7d0e875c962f54e (commit) via 403c68f96e1380dd07217c688de2730464f77ea0 (commit) from 59a879626a6a55fb6a43cadf5338c1aa6afe96d1 (commit)
http://gitweb.samba.org/?p=sahlberg/ctdb.git;a=shortlog;h=master - Log ----------------------------------------------------------------- commit 1a7ff4577d33f0dd470f7465c7d0e875c962f54e Author: Ronnie Sahlberg <[EMAIL PROTECTED]> Date: Wed Oct 22 11:06:18 2008 +1100 new version 1.0.64 commit 403c68f96e1380dd07217c688de2730464f77ea0 Author: Ronnie Sahlberg <[EMAIL PROTECTED]> Date: Wed Oct 22 11:04:41 2008 +1100 add a context and a timed event so that once we have been in recovery mode for too long we drop all public ip addresses ----------------------------------------------------------------------- Summary of changes: include/ctdb_private.h | 2 ++ packaging/RPM/ctdb.spec | 5 ++++- server/ctdb_control.c | 8 +------- server/ctdb_recover.c | 40 ++++++++++++++++++++++++++++++++++++++++ server/ctdb_takeover.c | 3 +++ server/eventscript.c | 6 +++--- tools/ctdb.c | 3 +-- 7 files changed, 54 insertions(+), 13 deletions(-) Changeset truncated at 500 lines: diff --git a/include/ctdb_private.h b/include/ctdb_private.h index 756c62a..9c06409 100644 --- a/include/ctdb_private.h +++ b/include/ctdb_private.h @@ -420,6 +420,7 @@ struct ctdb_context { uint32_t event_script_timeouts; /* counting how many consecutive times an eventscript has timedout */ TALLOC_CTX *eventscripts_ctx; /* a context to hold data for the RUN_EVENTSCRIPTS control */ uint32_t *recd_ping_count; + TALLOC_CTX *release_ips_ctx; /* a context used to automatically drop all IPs if we fail to recover the node */ }; struct ctdb_db_context { @@ -1422,6 +1423,7 @@ char *ctdb_addr_to_str(ctdb_sock_addr *addr); void ctdb_canonicalize_ip(const ctdb_sock_addr *ip, ctdb_sock_addr *cip); int32_t ctdb_control_recd_ping(struct ctdb_context *ctdb); +int32_t ctdb_control_set_recmaster(struct ctdb_context *ctdb, uint32_t opcode, TDB_DATA indata); extern int script_log_level; diff --git a/packaging/RPM/ctdb.spec b/packaging/RPM/ctdb.spec index a052910..2fed220 100644 --- a/packaging/RPM/ctdb.spec +++ b/packaging/RPM/ctdb.spec @@ -5,7 +5,7 @@ Vendor: Samba Team Packager: Samba Team <[EMAIL PROTECTED]> Name: ctdb Version: 1.0 -Release: 63 +Release: 64 Epoch: 0 License: GNU GPL version 3 Group: System Environment/Daemons @@ -120,6 +120,9 @@ fi %{_includedir}/ctdb_private.h %changelog +* Mon Oct 22 2008 : Version 1.0.64 + - Add a context and a timed event so that once we have been in recovery for + too long we drop all public addresses. * Mon Oct 20 2008 : Version 1.0.63 - Remove logging of "periodic cleanup ..." in 50.samba - When we reload a nodes file, we must detect this and reload the file also diff --git a/server/ctdb_control.c b/server/ctdb_control.c index fa38fea..5f65547 100644 --- a/server/ctdb_control.c +++ b/server/ctdb_control.c @@ -160,13 +160,7 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb, } case CTDB_CONTROL_SET_RECMASTER: { - CHECK_CONTROL_DATA_SIZE(sizeof(uint32_t)); - if (ctdb->freeze_mode != CTDB_FREEZE_FROZEN) { - DEBUG(DEBUG_NOTICE,("Attempt to set recmaster when not frozen\n")); - return -1; - } - ctdb->recovery_master = ((uint32_t *)(&indata.dptr[0]))[0]; - return 0; + return ctdb_control_set_recmaster(ctdb, opcode, indata); } case CTDB_CONTROL_GET_RECMASTER: diff --git a/server/ctdb_recover.c b/server/ctdb_recover.c index 8d61704..c8b0ba0 100644 --- a/server/ctdb_recover.c +++ b/server/ctdb_recover.c @@ -529,6 +529,19 @@ static void set_recmode_handler(struct event_context *ev, struct fd_event *fde, return; } +static void +ctdb_drop_all_ips_event(struct event_context *ev, struct timed_event *te, + struct timeval t, void *private_data) +{ + struct ctdb_context *ctdb = talloc_get_type(private_data, struct ctdb_context); + + DEBUG(DEBUG_INFO,(__location__ " Been in recovery mode for too long. Dropping all IPS\n")); + talloc_free(ctdb->release_ips_ctx); + ctdb->release_ips_ctx = NULL; + + ctdb_release_all_ips(ctdb); +} + /* set the recovery mode */ @@ -542,6 +555,21 @@ int32_t ctdb_control_set_recmode(struct ctdb_context *ctdb, struct ctdb_set_recmode_state *state; pid_t parent = getpid(); + /* if we enter recovery but stay in recovery for too long + we will eventually drop all our ip addresses + */ + if (recmode == CTDB_RECOVERY_NORMAL) { + talloc_free(ctdb->release_ips_ctx); + ctdb->release_ips_ctx = NULL; + } else { + talloc_free(ctdb->release_ips_ctx); + ctdb->release_ips_ctx = talloc_new(ctdb); + CTDB_NO_MEMORY(ctdb, ctdb->release_ips_ctx); + + event_add_timed(ctdb->ev, ctdb->release_ips_ctx, timeval_current_ofs(5,0), ctdb_drop_all_ips_event, ctdb); + } + + if (ctdb->freeze_mode != CTDB_FREEZE_FROZEN) { DEBUG(DEBUG_ERR,("Attempt to change recovery mode to %u when not frozen\n", recmode)); @@ -1045,3 +1073,15 @@ int32_t ctdb_control_recd_ping(struct ctdb_context *ctdb) return 0; } + + +int32_t ctdb_control_set_recmaster(struct ctdb_context *ctdb, uint32_t opcode, TDB_DATA indata) +{ + CHECK_CONTROL_DATA_SIZE(sizeof(uint32_t)); + if (ctdb->freeze_mode != CTDB_FREEZE_FROZEN) { + DEBUG(DEBUG_NOTICE,("Attempt to set recmaster when not frozen\n")); + return -1; + } + ctdb->recovery_master = ((uint32_t *)(&indata.dptr[0]))[0]; + return 0; +} diff --git a/server/ctdb_takeover.c b/server/ctdb_takeover.c index 6533d71..c14afb3 100644 --- a/server/ctdb_takeover.c +++ b/server/ctdb_takeover.c @@ -1291,6 +1291,9 @@ void ctdb_release_all_ips(struct ctdb_context *ctdb) if (!ctdb_sys_have_ip(&vnn->public_address)) { continue; } + if (vnn->pnn == ctdb->pnn) { + vnn->pnn = -1; + } ctdb_event_script(ctdb, "releaseip %s %s %u", vnn->iface, talloc_strdup(ctdb, ctdb_addr_to_str(&vnn->public_address)), diff --git a/server/eventscript.c b/server/eventscript.c index deaf750..6edd1a4 100644 --- a/server/eventscript.c +++ b/server/eventscript.c @@ -72,13 +72,13 @@ static int ctdb_event_script_v(struct ctdb_context *ctdb, const char *options) if (ctdb->recovery_mode != CTDB_RECOVERY_NORMAL) { /* we guarantee that only some specifically allowed event scripts are run while in recovery */ - const char *allowed_scripts[] = {"startrecovery", "shutdown" }; + const char *allowed_scripts[] = {"startrecovery", "shutdown", "releaseip" }; int i; for (i=0;i<ARRAY_SIZE(allowed_scripts);i++) { - if (strcmp(options, allowed_scripts[i]) == 0) break; + if (strncmp(options, allowed_scripts[i], strlen(allowed_scripts[i])) == 0) break; } if (i == ARRAY_SIZE(allowed_scripts)) { - DEBUG(0,("Refusing to run event scripts with option '%s' while in recovery\n", + DEBUG(DEBUG_ERR,("Refusing to run event scripts with option '%s' while in recovery\n", options)); return -1; } diff --git a/tools/ctdb.c b/tools/ctdb.c index 5055c26..5c553e6 100644 --- a/tools/ctdb.c +++ b/tools/ctdb.c @@ -441,8 +441,7 @@ static int control_status(struct ctdb_context *ctdb, int argc, const char **argv static int control_recmaster(struct ctdb_context *ctdb, int argc, const char **argv) { int ret; - uint32_t recmode, recmaster; - int mypnn; + uint32_t recmaster; ret = ctdb_ctrl_getrecmaster(ctdb, ctdb, TIMELIMIT(), options.pnn, &recmaster); if (ret != 0) { -- CTDB repository