The branch, 2.5 has been updated via b0ac45fcb7097d9db965a7c3858de872e16387b6 (commit) via 714b2189a91f8ced21c2dc1dd8e792a90e769fa0 (commit) via abcf24832161d841ee9840efc40d128e1f9edd3d (commit) via 0656c1969693b69a826ee184bb1a8b0e8c7ea9fc (commit) via af7c066630ebaf1398b6970acccb28d3a4bd42e7 (commit) from ed3c1f234ea76d32c35361f5c92a9dc2ead86121 (commit)
https://git.samba.org/?p=ctdb.git;a=shortlog;h=2.5 - Log ----------------------------------------------------------------- commit b0ac45fcb7097d9db965a7c3858de872e16387b6 Author: Martin Schwenke <mar...@meltin.net> Date: Fri Jul 24 15:32:42 2015 +1000 daemon: Check if updates are in flight when releasing all IPs Some code involved in releasing IPs is not re-entrant. Memory corruption can occur if, for example, overlapping attempts are made to ban a node. We haven't been able to recreate the corruption but this should protect against it. Signed-off-by: Martin Schwenke <mar...@meltin.net> Reviewed-by: Amitay Isaacs <ami...@gmail.com> (Imported from commit 952a50485f68b3cffdf57da84aa9bb9fde630b7e) commit 714b2189a91f8ced21c2dc1dd8e792a90e769fa0 Author: Amitay Isaacs <ami...@gmail.com> Date: Mon Jul 27 16:51:08 2015 +1000 banning: If node is already banned, do not run ctdb_local_node_got_banned() This calls release_all_ips() only once on the first ban. If the node gets banned again due to event script timeout while running release_all_ips(), then avoid calling release_all_ips() in re-entrant fashion. Signed-off-by: Amitay Isaacs <ami...@gmail.com> Reviewed-by: Martin Schwenke <mar...@meltin.net> (Imported from commit 8eb04d09b119e234c88150e1dc35fc5057f9c926) commit abcf24832161d841ee9840efc40d128e1f9edd3d Author: Amitay Isaacs <ami...@gmail.com> Date: Fri Jul 24 07:39:26 2015 +1000 client: Return the correct status sent from the daemon If a control fails and error message is set, the returned status of the control is always set to -1 ignoring the status passed by the daemon. Signed-off-by: Amitay Isaacs <ami...@gmail.com> Reviewed-by: Martin Schwenke <mar...@meltin.net> (Imported from commit 1286b02e24a521dafa7061d09fb5c21d1ebb3011) commit 0656c1969693b69a826ee184bb1a8b0e8c7ea9fc Author: Amitay Isaacs <ami...@gmail.com> Date: Tue Jul 21 16:37:04 2015 +1000 daemon: Correctly process the exit code from failed eventscripts Signed-off-by: Amitay Isaacs <ami...@gmail.com> Reviewed-by: Martin Schwenke <mar...@meltin.net> Autobuild-User(master): Martin Schwenke <mart...@samba.org> Autobuild-Date(master): Wed Jul 22 15:03:53 CEST 2015 on sn-devel-104 (Imported from commit 00ec3c477eba50206801b451ae4eb64c12aba5db) commit af7c066630ebaf1398b6970acccb28d3a4bd42e7 Author: Amitay Isaacs <ami...@gmail.com> Date: Mon Jul 20 16:37:58 2015 +1000 tool: Correctly print timed out event scripts output The timed out error is ignored for certain events (start_recovery, recoverd, takeip, releaseip). If these events time out, then the debug hung script outputs the following: 3 scripts were executed last releaseip cycle 00.ctdb Status:OK Duration:4.381 Thu Jul 16 23:45:24 2015 01.reclock Status:OK Duration:13.422 Thu Jul 16 23:45:28 2015 10.external Status:DISABLED 10.interface Status:OK Duration:-1437083142.208 Thu Jul 16 23:45:42 2015 The endtime for timed out scripts is not set. Since the status is not returned as -ETIME for some events, ctdb scriptstatus prints -ve duration. Signed-off-by: Amitay Isaacs <ami...@gmail.com> Reviewed-by: Martin Schwenke <mar...@meltin.net> (Imported from commit 71b89b2b7a9768de437347e6678370b2682da892) ----------------------------------------------------------------------- Summary of changes: client/ctdb_client.c | 2 +- server/ctdb_banning.c | 7 ++++++- server/ctdb_event_helper.c | 6 +++++- server/ctdb_takeover.c | 18 +++++++++++++++--- tools/ctdb.c | 8 ++++++++ 5 files changed, 35 insertions(+), 6 deletions(-) Changeset truncated at 500 lines: diff --git a/client/ctdb_client.c b/client/ctdb_client.c index 1c33e66..32c9357 100644 --- a/client/ctdb_client.c +++ b/client/ctdb_client.c @@ -1138,7 +1138,7 @@ int ctdb_control_recv(struct ctdb_context *ctdb, state->async.fn(state); } talloc_free(tmp_ctx); - return -1; + return (status == 0 ? -1 : state->status); } if (outdata) { diff --git a/server/ctdb_banning.c b/server/ctdb_banning.c index a9d1891..d8f7ab1 100644 --- a/server/ctdb_banning.c +++ b/server/ctdb_banning.c @@ -80,6 +80,7 @@ void ctdb_local_node_got_banned(struct ctdb_context *ctdb) int32_t ctdb_control_set_ban_state(struct ctdb_context *ctdb, TDB_DATA indata) { struct ctdb_ban_time *bantime = (struct ctdb_ban_time *)indata.dptr; + bool already_banned; DEBUG(DEBUG_INFO,("SET BAN STATE\n")); @@ -107,9 +108,11 @@ int32_t ctdb_control_set_ban_state(struct ctdb_context *ctdb, TDB_DATA indata) return 0; } + already_banned = false; if (ctdb->banning_ctx != NULL) { talloc_free(ctdb->banning_ctx); ctdb->banning_ctx = NULL; + already_banned = true; } if (bantime->time == 0) { @@ -136,7 +139,9 @@ int32_t ctdb_control_set_ban_state(struct ctdb_context *ctdb, TDB_DATA indata) event_add_timed(ctdb->ev, ctdb->banning_ctx, timeval_current_ofs(bantime->time,0), ctdb_ban_node_event, ctdb); - ctdb_local_node_got_banned(ctdb); + if (!already_banned) { + ctdb_local_node_got_banned(ctdb); + } return 0; } diff --git a/server/ctdb_event_helper.c b/server/ctdb_event_helper.c index f14e336..a1b5318 100644 --- a/server/ctdb_event_helper.c +++ b/server/ctdb_event_helper.c @@ -128,7 +128,11 @@ int main(int argc, char *argv[]) exit(1); } if (WIFEXITED(status)) { - output = -WEXITSTATUS(status); + output = WEXITSTATUS(status); + /* Only errors should be returned as -ve values */ + if (output == ENOENT || output == ENOEXEC) { + output = -output; + } sys_write(write_fd, &output, sizeof(output)); exit(0); } diff --git a/server/ctdb_takeover.c b/server/ctdb_takeover.c index 29d54f2..095ae41 100644 --- a/server/ctdb_takeover.c +++ b/server/ctdb_takeover.c @@ -3246,9 +3246,6 @@ void ctdb_takeover_client_destructor_hook(struct ctdb_client *client) } -/* - release all IPs on shutdown - */ void ctdb_release_all_ips(struct ctdb_context *ctdb) { struct ctdb_vnn *vnn; @@ -3263,6 +3260,20 @@ void ctdb_release_all_ips(struct ctdb_context *ctdb) continue; } + /* Don't allow multiple releases at once. Some code, + * particularly ctdb_tickle_sentenced_connections() is + * not re-entrant */ + if (vnn->update_in_flight) { + DEBUG(DEBUG_WARNING, + (__location__ + " Not releasing IP %s/%u on interface %s, an update is already in progess\n", + ctdb_addr_to_str(&vnn->public_address), + vnn->public_netmask_bits, + ctdb_vnn_iface_string(vnn))); + continue; + } + vnn->update_in_flight = true; + DEBUG(DEBUG_INFO,("Release of IP %s/%u on interface %s node:-1\n", ctdb_addr_to_str(&vnn->public_address), vnn->public_netmask_bits, @@ -3274,6 +3285,7 @@ void ctdb_release_all_ips(struct ctdb_context *ctdb) vnn->public_netmask_bits); release_kill_clients(ctdb, &vnn->public_address); ctdb_vnn_unassign_iface(ctdb, vnn); + vnn->update_in_flight = false; count++; } diff --git a/tools/ctdb.c b/tools/ctdb.c index ebbe84e..7979657 100644 --- a/tools/ctdb.c +++ b/tools/ctdb.c @@ -1453,6 +1453,14 @@ static int control_one_scriptstatus(struct ctdb_context *ctdb, for (i=0; i<script_status->num_scripts; i++) { const char *status = NULL; + /* The ETIME status is ignored for certain events. + * In that case the status is 0, but endtime is not set. + */ + if (script_status->scripts[i].status == 0 && + timeval_is_zero(&script_status->scripts[i].finished)) { + script_status->scripts[i].status = -ETIME; + } + switch (script_status->scripts[i].status) { case -ETIME: status = "TIMEDOUT"; -- CTDB repository