The branch, 1.2.40 has been updated via 545c343b19258fce01562b15f274eaf1a1deafc8 (commit) via 9bde066f6eb46124168e5686fc41a323e67401e8 (commit) from c51154b79be94198324c321aaaa037045bb85cd9 (commit)
http://gitweb.samba.org/?p=ctdb.git;a=shortlog;h=1.2.40 - Log ----------------------------------------------------------------- commit 545c343b19258fce01562b15f274eaf1a1deafc8 Author: Ronnie Sahlberg <ronniesahlb...@gmail.com> Date: Wed Feb 8 13:42:30 2012 +1100 STATISTICS: add total counts for number of delegations and number of revokes Everytime we give a delegation to another node we count this as one delegation. If the same record is delegated to several nodes we count one for each node. Everytime a record has all its delegations revoked we count this as one revoke. commit 9bde066f6eb46124168e5686fc41a323e67401e8 Author: Ronnie Sahlberg <ronniesahlb...@gmail.com> Date: Fri Mar 2 14:12:37 2012 +1100 READONLY: readonly fetch collapse. Make sure we only keep one single readonly fetch for a record in flight at a time. ----------------------------------------------------------------------- Summary of changes: include/ctdb_private.h | 4 + include/ctdb_protocol.h | 2 + server/ctdb_call.c | 2 + server/ctdb_daemon.c | 209 +++++++++++++++++++++++++++++++++++++++++++++ server/ctdb_ltdb_server.c | 10 ++ tools/ctdb.c | 2 + 6 files changed, 229 insertions(+), 0 deletions(-) Changeset truncated at 500 lines: diff --git a/include/ctdb_private.h b/include/ctdb_private.h index 8180722..86b664b 100644 --- a/include/ctdb_private.h +++ b/include/ctdb_private.h @@ -533,6 +533,10 @@ struct ctdb_db_context { struct ctdb_ltdb_header *header, TDB_DATA data); + /* used to track which records we are currently fetching with readonly + requests so we can avoid sending duplicates + */ + struct trbt_tree *deferred_ro_fetch; }; diff --git a/include/ctdb_protocol.h b/include/ctdb_protocol.h index efcc2cf..c874148 100644 --- a/include/ctdb_protocol.h +++ b/include/ctdb_protocol.h @@ -634,6 +634,8 @@ struct ctdb_statistics { uint32_t num_recoveries; struct timeval statistics_start_time; struct timeval statistics_current_time; + uint32_t total_ro_delegations; + uint32_t total_ro_revokes; }; /* diff --git a/server/ctdb_call.c b/server/ctdb_call.c index 2657f8b..1ece85a 100644 --- a/server/ctdb_call.c +++ b/server/ctdb_call.c @@ -513,6 +513,7 @@ void ctdb_request_call(struct ctdb_context *ctdb, struct ctdb_req_header *hdr) if (header.flags & CTDB_REC_RO_REVOKE_COMPLETE) { header.flags &= ~(CTDB_REC_RO_HAVE_DELEGATIONS|CTDB_REC_RO_HAVE_READONLY|CTDB_REC_RO_REVOKING_READONLY|CTDB_REC_RO_REVOKE_COMPLETE); + CTDB_INCREMENT_STAT(ctdb, total_ro_revokes); if (ctdb_ltdb_store(ctdb_db, call->key, &header, data) != 0) { ctdb_fatal(ctdb, "Failed to write header with cleared REVOKE flag"); } @@ -619,6 +620,7 @@ void ctdb_request_call(struct ctdb_context *ctdb, struct ctdb_req_header *hdr) } ctdb_queue_packet(ctdb, &r->hdr); + CTDB_INCREMENT_STAT(ctdb, total_ro_delegations); talloc_free(r); return; diff --git a/server/ctdb_daemon.c b/server/ctdb_daemon.c index c4f46b1..b09aaf3 100644 --- a/server/ctdb_daemon.c +++ b/server/ctdb_daemon.c @@ -27,6 +27,7 @@ #include "system/wait.h" #include "../include/ctdb_client.h" #include "../include/ctdb_private.h" +#include "../common/rb_tree.h" #include <sys/socket.h> struct ctdb_client_pid_list { @@ -384,6 +385,192 @@ static void daemon_incoming_packet_wrap(void *p, struct ctdb_req_header *hdr) } +struct ctdb_deferred_fetch_call { + struct ctdb_deferred_fetch_call *next, *prev; + struct ctdb_req_call *c; + struct ctdb_daemon_packet_wrap *w; +}; + +struct ctdb_deferred_fetch_queue { + struct ctdb_deferred_fetch_call *deferred_calls; +}; + +struct ctdb_deferred_requeue { + struct ctdb_deferred_fetch_call *dfc; + struct ctdb_client *client; +}; + + +/* called from a timer event and starts reprocessing the deferred call.*/ +static void reprocess_deferred_call(struct event_context *ev, struct timed_event *te, + struct timeval t, void *private_data) +{ + struct ctdb_deferred_requeue *dfr = (struct ctdb_deferred_requeue *)private_data; + struct ctdb_client *client = dfr->client; + + talloc_steal(client, dfr->dfc->c); + daemon_incoming_packet(client, (struct ctdb_req_header *)dfr->dfc->c); + talloc_free(dfr); +} + +/* the referral context is destroyed either after a timeout or when the initial + fetch-lock has finished. + at this stage, immediately start reprocessing the queued up deferred + calls so they get reprocessed immediately (and since we are dmaster at + this stage, trigger the waiting smbd processes to pick up and aquire the + record right away. +*/ +static int deferred_fetch_queue_destructor(struct ctdb_deferred_fetch_queue *dfq) +{ + + /* need to reprocess the packets from the queue explicitely instead of + just using a normal destructor since we want, need, to + call the clients in the same oder as the requests queued up + */ + while (dfq->deferred_calls != NULL) { + struct ctdb_client *client; + struct ctdb_deferred_fetch_call *dfc = dfq->deferred_calls; + struct ctdb_deferred_requeue *dfr; + + DLIST_REMOVE(dfq->deferred_calls, dfc); + + client = ctdb_reqid_find(dfc->w->ctdb, dfc->w->client_id, struct ctdb_client); + if (client == NULL) { + DEBUG(DEBUG_ERR,(__location__ " Packet for disconnected client %u\n", + dfc->w->client_id)); + continue; + } + + /* process it by pushing it back onto the eventloop */ + dfr = talloc(client, struct ctdb_deferred_requeue); + if (dfr == NULL) { + DEBUG(DEBUG_ERR,("Failed to allocate deferred fetch requeue structure\n")); + continue; + } + + dfr->dfc = talloc_steal(dfr, dfc); + dfr->client = client; + + event_add_timed(dfc->w->ctdb->ev, client, timeval_zero(), reprocess_deferred_call, dfr); + } + + return 0; +} + +/* insert the new deferral context into the rb tree. + there should never be a pre-existing context here, but check for it + warn and destroy the previous context if there is already a deferral context + for this key. +*/ +static void *insert_dfq_callback(void *parm, void *data) +{ + if (data) { + DEBUG(DEBUG_ERR,("Already have DFQ registered. Free old %p and create new %p\n", data, parm)); + talloc_free(data); + } + return parm; +} + +/* if the original fetch-lock did not complete within a reasonable time, + free the context and context for all deferred requests to cause them to be + re-inserted into the event system. +*/ +static void dfq_timeout(struct event_context *ev, struct timed_event *te, + struct timeval t, void *private_data) +{ + talloc_free(private_data); +} + +/* This function is used in the local daemon to register a KEY in a database + for being "fetched" + While the remote fetch is in-flight, any futher attempts to re-fetch the + same record will be deferred until the fetch completes. +*/ +static int setup_deferred_fetch_locks(struct ctdb_db_context *ctdb_db, struct trbt_tree *tree, struct ctdb_call *call) +{ + uint32_t *k; + struct ctdb_deferred_fetch_queue *dfq; + + k = talloc_zero_size(call, ((call->key.dsize + 3) & 0xfffffffc) + 4); + if (k == NULL) { + DEBUG(DEBUG_ERR,("Failed to allocate key for deferred fetch\n")); + return -1; + } + + k[0] = (call->key.dsize + 3) / 4 + 1; + memcpy(&k[1], call->key.dptr, call->key.dsize); + + dfq = talloc(call, struct ctdb_deferred_fetch_queue); + if (dfq == NULL) { + DEBUG(DEBUG_ERR,("Failed to allocate key for deferred fetch queue structure\n")); + talloc_free(k); + return -1; + } + dfq->deferred_calls = NULL; + + trbt_insertarray32_callback(tree, k[0], &k[0], insert_dfq_callback, dfq); + + talloc_set_destructor(dfq, deferred_fetch_queue_destructor); + + /* if the fetch havent completed in 30 seconds, just tear it all down + and let it try again as the events are reissued */ + event_add_timed(ctdb_db->ctdb->ev, dfq, timeval_current_ofs(30, 0), dfq_timeout, dfq); + + talloc_free(k); + return 0; +} + +/* check if this is a duplicate request to a fetch already in-flight + if it is, make this call deferred to be reprocessed later when + the in-flight fetch completes. +*/ +static int requeue_duplicate_fetch(struct ctdb_db_context *ctdb_db, struct trbt_tree *tree, struct ctdb_client *client, TDB_DATA key, struct ctdb_req_call *c) +{ + uint32_t *k; + struct ctdb_deferred_fetch_queue *dfq; + struct ctdb_deferred_fetch_call *dfc; + + k = talloc_zero_size(c, ((key.dsize + 3) & 0xfffffffc) + 4); + if (k == NULL) { + DEBUG(DEBUG_ERR,("Failed to allocate key for deferred fetch\n")); + return -1; + } + + k[0] = (key.dsize + 3) / 4 + 1; + memcpy(&k[1], key.dptr, key.dsize); + + dfq = trbt_lookuparray32(tree, k[0], &k[0]); + if (dfq == NULL) { + talloc_free(k); + return -1; + } + + + talloc_free(k); + + dfc = talloc(dfq, struct ctdb_deferred_fetch_call); + if (dfc == NULL) { + DEBUG(DEBUG_ERR, ("Failed to allocate deferred fetch call structure\n")); + return -1; + } + + dfc->w = talloc(dfc, struct ctdb_daemon_packet_wrap); + if (dfc->w == NULL) { + DEBUG(DEBUG_ERR,("Failed to allocate deferred fetch daemon packet wrap structure\n")); + talloc_free(dfc); + return -1; + } + + dfc->c = talloc_steal(dfc, c); + dfc->w->ctdb = ctdb_db->ctdb; + dfc->w->client_id = client->client_id; + + DLIST_ADD_END(dfq->deferred_calls, dfc, NULL); + + return 0; +} + + /* this is called when the ctdb daemon received a ctdb request call from a local client over the unix domain socket @@ -453,8 +640,23 @@ static void daemon_request_call_from_client(struct ctdb_client *client, c->flags &= ~CTDB_WANT_READONLY; } + if (c->flags & CTDB_WANT_READONLY) { + /* check if this fetch-lock request is a duplicate for a + request we already have in flight. If so defer it until + the first request completes. + */ + if (requeue_duplicate_fetch(ctdb_db, ctdb_db->deferred_ro_fetch, client, key, c) == 0) { + ret = ctdb_ltdb_unlock(ctdb_db, key); + if (ret != 0) { + DEBUG(DEBUG_ERR,(__location__ " ctdb_ltdb_unlock() failed with error %d\n", ret)); + } + return; + } + } + if (header.flags & CTDB_REC_RO_REVOKE_COMPLETE) { header.flags &= ~(CTDB_REC_RO_HAVE_DELEGATIONS|CTDB_REC_RO_HAVE_READONLY|CTDB_REC_RO_REVOKING_READONLY|CTDB_REC_RO_REVOKE_COMPLETE); + CTDB_INCREMENT_STAT(ctdb, total_ro_revokes); if (ctdb_ltdb_store(ctdb_db, key, &header, data) != 0) { ctdb_fatal(ctdb, "Failed to write header with cleared REVOKE flag"); } @@ -549,6 +751,13 @@ static void daemon_request_call_from_client(struct ctdb_client *client, state = ctdb_call_local_send(ctdb_db, call, &header, &data); } else { state = ctdb_daemon_call_send_remote(ctdb_db, call, &header); + if (call->flags & CTDB_WANT_READONLY) { + /* This request triggered a remote ro fetch. + set up a deferral for this key so any additional + ro fetches are deferred until this one finishes. + */ + setup_deferred_fetch_locks(ctdb_db, ctdb_db->deferred_ro_fetch, call); + } } ret = ctdb_ltdb_unlock(ctdb_db, key); diff --git a/server/ctdb_ltdb_server.c b/server/ctdb_ltdb_server.c index 27d773b..d600278 100644 --- a/server/ctdb_ltdb_server.c +++ b/server/ctdb_ltdb_server.c @@ -964,6 +964,16 @@ again: } } + /* set up a rb tree we can use to track which records we have a + fetch-lock in-flight for so we can defer any additional calls + for the same record. + */ + ctdb_db->deferred_ro_fetch = trbt_create(ctdb_db, 0); + if (ctdb_db->deferred_ro_fetch == NULL) { + DEBUG(DEBUG_ERR,("Failed to create deferred ro fetch rb tree for ctdb database\n")); + talloc_free(ctdb_db); + return -1; + } DLIST_ADD(ctdb->db_list, ctdb_db); diff --git a/tools/ctdb.c b/tools/ctdb.c index 92ef63d..2c7fdc3 100644 --- a/tools/ctdb.c +++ b/tools/ctdb.c @@ -201,6 +201,8 @@ static void show_statistics(struct ctdb_statistics *s, int show_header) STATISTICS_FIELD(pending_childwrite_calls), STATISTICS_FIELD(memory_used), STATISTICS_FIELD(max_hop_count), + STATISTICS_FIELD(total_ro_delegations), + STATISTICS_FIELD(total_ro_revokes), }; tmp = s->statistics_current_time.tv_sec - s->statistics_start_time.tv_sec; seconds = tmp%60; -- CTDB repository