The branch, 1.2.40 has been updated via 1dfb6c1e5fdc9676a7de58bb974f8dddd16e9366 (commit) via 96a15964231716b4909d9d5ec19cc4ed1d744177 (commit) via da21e4df641a668bbf379720fbfc556fdadf71a3 (commit) from 91f522f928f28b3c3463963aedd71a251545b910 (commit)
http://gitweb.samba.org/?p=ctdb.git;a=shortlog;h=1.2.40 - Log ----------------------------------------------------------------- commit 1dfb6c1e5fdc9676a7de58bb974f8dddd16e9366 Author: Amitay Isaacs <ami...@gmail.com> Date: Tue Sep 10 16:30:01 2013 +1000 New version 1.2.68 Signed-off-by: Amitay Isaacs <ami...@gmail.com> commit 96a15964231716b4909d9d5ec19cc4ed1d744177 Author: Martin Schwenke <mar...@meltin.net> Date: Fri Sep 6 14:09:09 2013 +1000 recoverd: Nodes to be used as forced rebalance targets must be healthy Right now, if a node is marked as a rebalance target then it can host IPs even if it is unhealthy, inactive, ... Also move the log message into the conditional so it is only generated if the PNN is actually going to be a rebalancing target. Signed-off-by: Martin Schwenke <mar...@meltin.net> commit da21e4df641a668bbf379720fbfc556fdadf71a3 Author: Amitay Isaacs <ami...@gmail.com> Date: Wed Aug 21 14:42:06 2013 +1000 common/io: Limit the queue buffer size for fair scheduling via tevent If we process all the data available in a socket buffer, CTDB can stay busy processing lots of packets via immediate event mechanism in tevent. After processing an immediate event, tevent returns without epoll_wait. So as long as there are immediate events, tevent will never poll other FDs. CTDB will report this as "Event handling took xx seconds" warning. This is misleading since CTDB is very busy processing packets, but never gets to the point of polling FDs. The improvement in socket handling made it worse when handling traverse control. There were lots of packets filled in the socket buffer quickly and CTDB stayed busy processing those packets and not polling other FDs and timer events. This can lead to controls timing out and in worse case other nodes marking busy node as disconnected. Signed-off-by: Amitay Isaacs <ami...@gmail.com> (cherry picked from commit 92939c1178d04116d842708bc2d6a9c2950e36cc) ----------------------------------------------------------------------- Summary of changes: common/ctdb_io.c | 43 +++++++++++++++++++++++++++++++------------ packaging/RPM/ctdb.spec.in | 5 ++++- server/ctdb_takeover.c | 5 +++-- 3 files changed, 38 insertions(+), 15 deletions(-) Changeset truncated at 500 lines: diff --git a/common/ctdb_io.c b/common/ctdb_io.c index 4e164d9..99c50c1 100644 --- a/common/ctdb_io.c +++ b/common/ctdb_io.c @@ -30,11 +30,14 @@ #include "../include/ctdb_client.h" #include <stdarg.h> +#define QUEUE_BUFFER_SIZE (16*1024) + /* structures for packet queueing - see common/ctdb_io.c */ struct ctdb_buffer { uint8_t *data; uint32_t length; uint32_t size; + uint32_t extend; }; struct ctdb_queue_pkt { @@ -114,7 +117,9 @@ static void queue_process(struct ctdb_queue *queue) } if (queue->buffer.length < pkt_size) { - DEBUG(DEBUG_DEBUG, ("Partial packet data read\n")); + if (pkt_size > QUEUE_BUFFER_SIZE) { + queue->buffer.extend = pkt_size; + } return; } @@ -138,6 +143,11 @@ static void queue_process(struct ctdb_queue *queue) /* There is more data to be processed, schedule an event */ tevent_schedule_immediate(queue->im, queue->ctdb->ev, queue_process_event, queue); + } else { + if (queue->buffer.size > QUEUE_BUFFER_SIZE) { + TALLOC_FREE(queue->buffer.data); + queue->buffer.size = 0; + } } /* It is the responsibility of the callback to free 'data' */ @@ -159,6 +169,7 @@ static void queue_io_read(struct ctdb_queue *queue) int num_ready = 0; ssize_t nread; uint8_t *data; + int navail; if (ioctl(queue->fd, FIONREAD, &num_ready) != 0) { return; @@ -170,29 +181,37 @@ static void queue_io_read(struct ctdb_queue *queue) if (queue->buffer.data == NULL) { /* starting fresh, allocate buf to read data */ - queue->buffer.data = talloc_size(queue, num_ready); + queue->buffer.data = talloc_size(queue, QUEUE_BUFFER_SIZE); if (queue->buffer.data == NULL) { DEBUG(DEBUG_ERR, ("read error alloc failed for %u\n", num_ready)); goto failed; } - queue->buffer.size = num_ready; - } else if (queue->buffer.length + num_ready > queue->buffer.size) { + queue->buffer.size = QUEUE_BUFFER_SIZE; + } else if (queue->buffer.extend > 0) { /* extending buffer */ - data = talloc_realloc_size(queue, queue->buffer.data, queue->buffer.length + num_ready); + data = talloc_realloc_size(queue, queue->buffer.data, queue->buffer.extend); if (data == NULL) { - DEBUG(DEBUG_ERR, ("read error realloc failed for %u\n", queue->buffer.length + num_ready)); + DEBUG(DEBUG_ERR, ("read error realloc failed for %u\n", queue->buffer.extend)); goto failed; } queue->buffer.data = data; - queue->buffer.size = queue->buffer.length + num_ready; + queue->buffer.size = queue->buffer.extend; + queue->buffer.extend = 0; } - nread = read(queue->fd, queue->buffer.data + queue->buffer.length, num_ready); - if (nread <= 0) { - DEBUG(DEBUG_ERR, ("read error nread=%d\n", (int)nread)); - goto failed; + navail = queue->buffer.size - queue->buffer.length; + if (num_ready > navail) { + num_ready = navail; + } + + if (num_ready > 0) { + nread = read(queue->fd, queue->buffer.data + queue->buffer.length, num_ready); + if (nread <= 0) { + DEBUG(DEBUG_ERR, ("read error nread=%d\n", (int)nread)); + goto failed; + } + queue->buffer.length += nread; } - queue->buffer.length += nread; queue_process(queue); return; diff --git a/packaging/RPM/ctdb.spec.in b/packaging/RPM/ctdb.spec.in index b3daf2f..baaabbe 100644 --- a/packaging/RPM/ctdb.spec.in +++ b/packaging/RPM/ctdb.spec.in @@ -3,7 +3,7 @@ Name: ctdb Summary: Clustered TDB Vendor: Samba Team Packager: Samba Team <sa...@samba.org> -Version: 1.2.67 +Version: 1.2.68 Release: 1GITHASH Epoch: 0 License: GNU GPL version 3 @@ -155,6 +155,9 @@ development libraries for ctdb %changelog +* Tue Sep 10 2013 : Version 1.2.68 + - Use fixed size queue buffers for fair scheduling across tevent FDs + - Nodes to be used as forced rebalance targets must be healthy * Wed Aug 14 2013 : Version 1.2.67 - When takeover fails, call fail callback only once and not once per IP - Do not send ipreallocated event to banned nodes diff --git a/server/ctdb_takeover.c b/server/ctdb_takeover.c index 9fdf227..721be29 100644 --- a/server/ctdb_takeover.c +++ b/server/ctdb_takeover.c @@ -1556,11 +1556,12 @@ void lcp2_init(struct ctdb_context * tmp_ctx, while (force_rebalance_list != NULL) { struct ctdb_rebalancenodes *next = force_rebalance_list->next; - if (force_rebalance_list->pnn <= nodemap->num) { + if (force_rebalance_list->pnn <= nodemap->num && + !(nodemap->nodes[force_rebalance_list->pnn].flags & mask)) { (*newly_healthy)[force_rebalance_list->pnn] = true; + DEBUG(DEBUG_ERR,("During ipreallocation, forced rebalance of node %d\n", force_rebalance_list->pnn)); } - DEBUG(DEBUG_ERR,("During ipreallocation, forced rebalance of node %d\n", force_rebalance_list->pnn)); talloc_free(force_rebalance_list); force_rebalance_list = next; } -- CTDB repository