The branch, 1.0.112 has been updated via f8b32385257c9526999a229e84020677deb79eaf (commit) via b00e33d98c75297c34560e765ce5604ab4d402ec (commit) via e274b4fde6084031646f0e5541ca5a2bdf9217d0 (commit) from 9cb1a3393efc479494de11839cfd3b080b7c8914 (commit)
http://gitweb.samba.org/?p=sahlberg/ctdb.git;a=shortlog;h=1.0.112 - Log ----------------------------------------------------------------- commit f8b32385257c9526999a229e84020677deb79eaf Author: Ronnie Sahlberg <ronniesahlb...@gmail.com> Date: Tue Feb 23 16:19:51 2010 +1100 new version 1.0.112-10 * Tue Feb 23 2010 : Version 1.0.112-10 - revert the change in 10.0.0.112-9 and make a new attempt to make the scripts\ behave. - make writing the ticklelist in 61.nfstickle a background task to avoid having a long cluster fs pause cause a node to become unhealthy - critical bugfix. during an error path in the "end recovery" code we could release a memory block before we had finished referencing it which could lead to a segv. bz 61068 - make sure we tear down the natgw configuration when a node become stopped or else we might end up with a duplicate ip address when a different node takes over the natgw role. bz 61036 commit b00e33d98c75297c34560e765ce5604ab4d402ec Author: Ronnie Sahlberg <ronniesahlb...@gmail.com> Date: Tue Feb 23 16:09:09 2010 +1100 store the nfs tickles for 61.nfstickle in a background shell instead of blocking while it finishes. this avoids having the eventscript hang/timeout if the underlying cluster filesystem hangs and blocks for 30+ seconds. commit e274b4fde6084031646f0e5541ca5a2bdf9217d0 Author: Ronnie Sahlberg <ronniesahlb...@gmail.com> Date: Tue Feb 23 16:07:17 2010 +1100 Revert "Ignore any scripts that timesout for most events, except startup." This reverts commit 527597ed6d9142c0b47a9c419c828793826ac95e. ----------------------------------------------------------------------- Summary of changes: config/events.d/61.nfstickle | 23 ++++++++++++++--------- packaging/RPM/ctdb.spec.in | 12 +++++++++++- server/eventscript.c | 16 +--------------- 3 files changed, 26 insertions(+), 25 deletions(-) Changeset truncated at 500 lines: diff --git a/config/events.d/61.nfstickle b/config/events.d/61.nfstickle index 14794fe..c91d393 100755 --- a/config/events.d/61.nfstickle +++ b/config/events.d/61.nfstickle @@ -15,6 +15,19 @@ ctdb_start_stop_service [ -z "$NFS_TICKLE_SHARED_DIRECTORY" ] && exit 0 +store_tickles() +{ + mydir=$NFS_TICKLE_SHARED_DIRECTORY/`hostname` + rm -f $mydir/* + # record our connections to shared storage + netstat -tn |egrep '^tcp[[:space:]]+[0-9]+[[:space:]]+[0-9]+[[:space:]]+[0-9\.]+:2049.*ESTABLISHED' | + awk '{print $4" "$5}' | + while read dest src; do + ip=${dest%:*} + echo $src >> $mydir/$ip + done +} + case "$1" in startup) ctdb_service_start @@ -39,15 +52,7 @@ case "$1" in ;; monitor) - mydir=$NFS_TICKLE_SHARED_DIRECTORY/`hostname` - rm -f $mydir/* - # record our connections to shared storage - netstat -tn |egrep '^tcp[[:space:]]+[0-9]+[[:space:]]+[0-9]+[[:space:]]+[0-9\.]+:2049.*ESTABLISHED' | - awk '{print $4" "$5}' | - while read dest src; do - ip=${dest%:*} - echo $src >> $mydir/$ip - done + store_tickles & ;; *) diff --git a/packaging/RPM/ctdb.spec.in b/packaging/RPM/ctdb.spec.in index de15224..9a2f10b 100644 --- a/packaging/RPM/ctdb.spec.in +++ b/packaging/RPM/ctdb.spec.in @@ -5,7 +5,7 @@ Vendor: Samba Team Packager: Samba Team <sa...@samba.org> Name: ctdb Version: 1.0.112 -Release: 9 +Release: 10 Epoch: 0 License: GNU GPL version 3 Group: System Environment/Daemons @@ -123,6 +123,16 @@ rm -rf $RPM_BUILD_ROOT %{_docdir}/ctdb/tests/bin/ctdb_transaction %changelog +* Tue Feb 23 2010 : Version 1.0.112-10 + - revert the change in 10.0.0.112-9 and make a new attempt to make the scripts behave. + - make writing the ticklelist in 61.nfstickle a background task to avoid + having a long cluster fs pause cause a node to become unhealthy + - critical bugfix. during an error path in the "end recovery" code + we could release a memory block before we had finished referencing it + which could lead to a segv. bz 61068 + - make sure we tear down the natgw configuration when a node become stopped + or else we might end up with a duplicate ip address when a different node + takes over the natgw role. bz 61036 * Tue Feb 16 2010 : Version 1.0.112-9 - treat all scripts that hang (except for startup) as success even if they hung * Mon Feb 15 2010 : Version 1.0.112-8 diff --git a/server/eventscript.c b/server/eventscript.c index 8cac635..e0908e1 100644 --- a/server/eventscript.c +++ b/server/eventscript.c @@ -516,21 +516,7 @@ static void ctdb_event_script_timeout(struct event_context *ev, struct timed_eve DEBUG(DEBUG_ERR,("Event script timed out : %s %s %s count : %u pid : %d\n", current->name, ctdb_eventscript_call_names[state->call], state->options, ctdb->event_script_timeouts, state->child)); - /* ignore timeouts for these events */ - switch (state->call) { - case CTDB_EVENT_START_RECOVERY: - case CTDB_EVENT_RECOVERED: - case CTDB_EVENT_TAKE_IP: - case CTDB_EVENT_RELEASE_IP: - case CTDB_EVENT_STOPPED: - case CTDB_EVENT_MONITOR: - case CTDB_EVENT_STATUS: - state->scripts->scripts[state->current].status = 0; - DEBUG(DEBUG_ERR,("Ignoring hung script for %s call %d\n", state->options, state->call)); - break; - default: - state->scripts->scripts[state->current].status = -ETIME; - } + state->scripts->scripts[state->current].status = -ETIME; if (kill(state->child, 0) != 0) { DEBUG(DEBUG_ERR,("Event script child process already dead, errno %s(%d)\n", strerror(errno), errno)); -- CTDB repository