The branch, master has been updated via f1b6ee4a55cdca60f93d992f0431d91bf301af2c (commit) via 0785afba8e5cd501b9e0ecb4a6a44edf43b57ab0 (commit) via 38d7487bc68c8cf85980004aceeef24ae32d6f36 (commit) from 514a60c57557042e463efeff53dd11b9fec40561 (commit)
http://gitweb.samba.org/?p=sahlberg/ctdb.git;a=shortlog;h=master - Log ----------------------------------------------------------------- commit f1b6ee4a55cdca60f93d992f0431d91bf301af2c Author: Ronnie Sahlberg <ronniesahlb...@gmail.com> Date: Fri Oct 23 15:24:51 2009 +1100 Add a mechanism where we can register notifications to be sent out to a SRVID when the client disconnects. The way to use this is from a client to : 1, first create a message handle and bind it to a SRVID A special prefix for the srvid space has been set aside for samba : Only samba is allowed to use srvid's with the top 32 bits set like this. The lower 32 bits are for samba to use internally. 2, register a "notification" using the new control : CTDB_CONTROL_REGISTER_NOTIFY = 114, This control takes as indata a structure like this : struct ctdb_client_notify_register { uint64_t srvid; uint32_t len; uint8_t notify_data[1]; }; srvid is the srvid used in the space set aside above. len and notify_data is an arbitrary blob. When notifications are later sent out to all clients, this is the payload of that notification message. If a client has registered with control 114 and then disconnects from ctdbd, ctdbd will broadcast a message to that srvid to all nodes/listeners in the cluster. A client can resister itself with as many different srvid's it want, but this is handled through a linked list from the client structure so it mainly designed for "few notifications per client". 3, a client that no longer wants to have a notification set up can deregister using control CTDB_CONTROL_DEREGISTER_NOTIFY = 115, which takes this as arguments : struct ctdb_client_notify_deregister { uint64_t srvid; }; When a client deregisters, there will no longer be sent a message to all other clients when this client disconnects from ctdbd. commit 0785afba8e5cd501b9e0ecb4a6a44edf43b57ab0 Author: Ronnie Sahlberg <ronniesahlb...@gmail.com> Date: Fri Oct 23 13:55:21 2009 +1100 when scripts timeout, log pstree to a file in /tmp and just log the filename in the messages file commit 38d7487bc68c8cf85980004aceeef24ae32d6f36 Author: Ronnie Sahlberg <ronniesahlb...@gmail.com> Date: Fri Oct 23 13:54:45 2009 +1100 set the eventscripts to timeout after 20 seconds change the ban count to 10 failures before we ban by default ----------------------------------------------------------------------- Summary of changes: include/ctdb.h | 13 ++++++ include/ctdb_private.h | 8 +++- server/ctdb_control.c | 7 +++ server/ctdb_daemon.c | 103 +++++++++++++++++++++++++++++++++++++++++++++++- server/ctdb_tunables.c | 4 +- server/eventscript.c | 34 ++++------------ 6 files changed, 140 insertions(+), 29 deletions(-) Changeset truncated at 500 lines: diff --git a/include/ctdb.h b/include/ctdb.h index abe09a3..1ede662 100644 --- a/include/ctdb.h +++ b/include/ctdb.h @@ -111,6 +111,10 @@ struct ctdb_call_info { */ #define CTDB_SRVID_TAKEOVER_RUN_RESPONSE 0xFD00000000000000LL +/* A port reserved for samba (top 32 bits) + */ +#define CTDB_SRVID_SAMBA_NOTIFY 0xFE00000000000000LL + /* used on the domain socket, send a pdu to the local daemon */ #define CTDB_CURRENT_NODE 0xF0000001 /* send a broadcast to all nodes in the cluster, active or not */ @@ -144,6 +148,15 @@ struct ctdb_client_control_state { } async; }; +struct ctdb_client_notify_register { + uint64_t srvid; + uint32_t len; + uint8_t notify_data[1]; +}; + +struct ctdb_client_notify_deregister { + uint64_t srvid; +}; struct event_context; diff --git a/include/ctdb_private.h b/include/ctdb_private.h index 2f4937e..ad84628 100644 --- a/include/ctdb_private.h +++ b/include/ctdb_private.h @@ -160,7 +160,6 @@ typedef void (*ctdb_control_callback_fn_t)(struct ctdb_context *, int32_t status, TDB_DATA data, const char *errormsg, void *private_data); - /* structure describing a connected client in the daemon */ @@ -173,6 +172,7 @@ struct ctdb_client { struct ctdb_tcp_list *tcp_list; uint32_t db_id; uint32_t num_persistent_updates; + struct ctdb_client_notify_list *notify; }; @@ -612,6 +612,8 @@ enum ctdb_controls {CTDB_CONTROL_PROCESS_EXISTS = 0, CTDB_CONTROL_SET_DB_PRIORITY = 111, CTDB_CONTROL_GET_DB_PRIORITY = 112, CTDB_CONTROL_TRANSACTION_CANCEL = 113, + CTDB_CONTROL_REGISTER_NOTIFY = 114, + CTDB_CONTROL_DEREGISTER_NOTIFY = 115, }; /* @@ -1501,4 +1503,8 @@ int32_t ctdb_control_set_ban_state(struct ctdb_context *ctdb, TDB_DATA indata); int32_t ctdb_control_get_ban_state(struct ctdb_context *ctdb, TDB_DATA *outdata); int32_t ctdb_control_set_db_priority(struct ctdb_context *ctdb, TDB_DATA indata); +int32_t ctdb_control_register_notify(struct ctdb_context *ctdb, uint32_t client_id, TDB_DATA indata); + +int32_t ctdb_control_deregister_notify(struct ctdb_context *ctdb, uint32_t client_id, TDB_DATA indata); + #endif diff --git a/server/ctdb_control.c b/server/ctdb_control.c index 24d22d0..904bebe 100644 --- a/server/ctdb_control.c +++ b/server/ctdb_control.c @@ -552,6 +552,13 @@ static int32_t ctdb_control_dispatch(struct ctdb_context *ctdb, CHECK_CONTROL_DATA_SIZE(0); return ctdb_control_transaction_cancel(ctdb); + case CTDB_CONTROL_REGISTER_NOTIFY: + return ctdb_control_register_notify(ctdb, client_id, indata); + + case CTDB_CONTROL_DEREGISTER_NOTIFY: + CHECK_CONTROL_DATA_SIZE(sizeof(struct ctdb_client_notify_deregister)); + return ctdb_control_deregister_notify(ctdb, client_id, indata); + default: DEBUG(DEBUG_CRIT,(__location__ " Unknown CTDB control opcode %u\n", opcode)); return -1; diff --git a/server/ctdb_daemon.c b/server/ctdb_daemon.c index 532887c..a8dc651 100644 --- a/server/ctdb_daemon.c +++ b/server/ctdb_daemon.c @@ -31,7 +31,6 @@ static void daemon_incoming_packet(void *, struct ctdb_req_header *); - static void print_exit_message(void) { DEBUG(DEBUG_NOTICE,("CTDB daemon shutting down\n")); @@ -1043,3 +1042,105 @@ int ctdb_daemon_send_message(struct ctdb_context *ctdb, uint32_t pnn, return 0; } + + +struct ctdb_client_notify_list { + struct ctdb_client_notify_list *next, *prev; + struct ctdb_context *ctdb; + uint64_t srvid; + TDB_DATA data; +}; + + +static int ctdb_client_notify_destructor(struct ctdb_client_notify_list *nl) +{ + int ret; + + DEBUG(DEBUG_ERR,("Sending client notify message for srvid:%llu\n", (unsigned long long)nl->srvid)); + + ret = ctdb_daemon_send_message(nl->ctdb, CTDB_BROADCAST_CONNECTED, (unsigned long long)nl->srvid, nl->data); + if (ret != 0) { + DEBUG(DEBUG_ERR,("Failed to send client notify message\n")); + } + + return 0; +} + +int32_t ctdb_control_register_notify(struct ctdb_context *ctdb, uint32_t client_id, TDB_DATA indata) +{ + struct ctdb_client_notify_register *notify = (struct ctdb_client_notify_register *)indata.dptr; + struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client); + struct ctdb_client_notify_list *nl; + + DEBUG(DEBUG_ERR,("Register srvid %llu for client %d\n", (unsigned long long)notify->srvid, client_id)); + + if (indata.dsize < offsetof(struct ctdb_client_notify_register, notify_data)) { + DEBUG(DEBUG_ERR,(__location__ " Too little data in control : %d\n", (int)indata.dsize)); + return -1; + } + + if (indata.dsize != (notify->len + offsetof(struct ctdb_client_notify_register, notify_data))) { + DEBUG(DEBUG_ERR,(__location__ " Wrong amount of data in control. Got %d, expected %d\n", (int)indata.dsize, (int)(notify->len + offsetof(struct ctdb_client_notify_register, notify_data)))); + return -1; + } + + + if (client == NULL) { + DEBUG(DEBUG_ERR,(__location__ " Could not find client parent structure. You can not send this control to a remote node\n")); + return -1; + } + + for(nl=client->notify; nl; nl=nl->next) { + if (nl->srvid == notify->srvid) { + break; + } + } + if (nl != NULL) { + DEBUG(DEBUG_ERR,(__location__ " Notification for srvid:%llu already exists for this client\n", (unsigned long long)notify->srvid)); + return -1; + } + + nl = talloc(client, struct ctdb_client_notify_list); + CTDB_NO_MEMORY(ctdb, nl); + nl->ctdb = ctdb; + nl->srvid = notify->srvid; + nl->data.dsize = notify->len; + nl->data.dptr = talloc_size(nl, nl->data.dsize); + CTDB_NO_MEMORY(ctdb, nl->data.dptr); + memcpy(nl->data.dptr, notify->notify_data, nl->data.dsize); + + DLIST_ADD(client->notify, nl); + talloc_set_destructor(nl, ctdb_client_notify_destructor); + + return 0; +} + +int32_t ctdb_control_deregister_notify(struct ctdb_context *ctdb, uint32_t client_id, TDB_DATA indata) +{ + struct ctdb_client_notify_deregister *notify = (struct ctdb_client_notify_deregister *)indata.dptr; + struct ctdb_client *client = ctdb_reqid_find(ctdb, client_id, struct ctdb_client); + struct ctdb_client_notify_list *nl; + + DEBUG(DEBUG_ERR,("Deregister srvid %llu for client %d\n", (unsigned long long)notify->srvid, client_id)); + + if (client == NULL) { + DEBUG(DEBUG_ERR,(__location__ " Could not find client parent structure. You can not send this control to a remote node\n")); + return -1; + } + + for(nl=client->notify; nl; nl=nl->next) { + if (nl->srvid == notify->srvid) { + break; + } + } + if (nl == NULL) { + DEBUG(DEBUG_ERR,(__location__ " No notification for srvid:%llu found for this client\n", (unsigned long long)notify->srvid)); + return -1; + } + + DLIST_REMOVE(client->notify, nl); + talloc_set_destructor(nl, NULL); + talloc_free(nl); + + return 0; +} diff --git a/server/ctdb_tunables.c b/server/ctdb_tunables.c index c461bca..8ad4cde 100644 --- a/server/ctdb_tunables.c +++ b/server/ctdb_tunables.c @@ -37,8 +37,8 @@ static const struct { { "TakeoverTimeout", 5, offsetof(struct ctdb_tunable, takeover_timeout) }, { "MonitorInterval", 15, offsetof(struct ctdb_tunable, monitor_interval) }, { "TickleUpdateInterval",20, offsetof(struct ctdb_tunable, tickle_update_interval) }, - { "EventScriptTimeout", 60, offsetof(struct ctdb_tunable, script_timeout) }, - { "EventScriptBanCount", 5, offsetof(struct ctdb_tunable, script_ban_count) }, + { "EventScriptTimeout", 20, offsetof(struct ctdb_tunable, script_timeout) }, + { "EventScriptBanCount", 10, offsetof(struct ctdb_tunable, script_ban_count) }, { "EventScriptUnhealthyOnTimeout", 0, offsetof(struct ctdb_tunable, script_unhealthy_on_timeout) }, { "RecoveryGracePeriod", 120, offsetof(struct ctdb_tunable, recovery_grace_period) }, { "RecoveryBanPeriod", 300, offsetof(struct ctdb_tunable, recovery_ban_period) }, diff --git a/server/eventscript.c b/server/eventscript.c index aae5eef..3b86615 100644 --- a/server/eventscript.c +++ b/server/eventscript.c @@ -18,6 +18,7 @@ */ #include "includes.h" +#include <time.h> #include "system/filesys.h" #include "system/wait.h" #include "system/dir.h" @@ -36,36 +37,19 @@ static struct { */ static void sigterm(int sig) { - FILE *p; + char tbuf[100], buf[200]; + time_t t; DEBUG(DEBUG_ERR,("Timed out running script '%s' after %.1f seconds pid :%d\n", child_state.script_running, timeval_elapsed(&child_state.start), getpid())); - p = popen("pstree -p", "r"); - if (p == NULL) { - DEBUG(DEBUG_ERR,("Failed popen to collect pstree for hung script\n")); - } else { - char buf[256]; - int count; - - DEBUG(DEBUG_ERR,("PSTREE:\n")); - while(!feof(p)){ - count=fread(buf, 1, 255, p); - if (count == EOF) { - break; - } - if (count < 0) { - break; - } - if (count == 0) { - break; - } - buf[count] = 0; - DEBUG(DEBUG_ERR,("%s", buf)); - } - DEBUG(DEBUG_ERR,("END OF PSTREE OUTPUT\n")); - pclose(p); - } + t = time(NULL); + + strftime(tbuf, sizeof(tbuf)-1, "%Y%m%d%H%M%S", localtime(&t)); + sprintf(buf, "pstree -p >/tmp/ctdb.event.%s.%d", tbuf, getpid()); + system(buf); + + DEBUG(DEBUG_ERR,("Logged timedout eventscript : %s\n", buf)); /* all the child processes will be running in the same process group */ kill(-getpgrp(), SIGKILL); -- CTDB repository