The branch, master has been updated via fc18188b7b63eb0dafbc47e3abf80e306e1dfc31 (commit) via e7dc10da3ced54ea9d719ad167ee42dcca8dce75 (commit) via a0c30c820fd47d4f8620dc060c825be10754f5d1 (commit) via f586e8a2911fc6e7f6698f516653145d8fd45dad (commit) via cc9d96f4248e45ea99c5f00db1526426ac26fbc2 (commit) via 9119a568c2b4601318f7751f537dca2f92a7230b (commit) from c29a943f9bbcfecb861e71d007c7698a53dc8773 (commit)
http://gitweb.samba.org/?p=ctdb.git;a=shortlog;h=master - Log ----------------------------------------------------------------- commit fc18188b7b63eb0dafbc47e3abf80e306e1dfc31 Author: Martin Schwenke <mar...@meltin.net> Date: Fri Jul 6 20:43:46 2012 +1000 recoverd: All inactive nodes should yield recovery master role Not just stopped nodes. In reality, this means that banned nodes will also yield, since nodes in the other inactive states won't be running a daemon. This seems sensible since if another node notices that an inactive node is the recovery master then it will force an election anyway. Signed-off-by: Martin Schwenke <mar...@meltin.net> commit e7dc10da3ced54ea9d719ad167ee42dcca8dce75 Author: Martin Schwenke <mar...@meltin.net> Date: Fri Jul 6 20:36:48 2012 +1000 recoverd: An inactive node should not force recovery master elections An inactive node can't become the recovery master. So if an inactive node notices that the recovery master is inactive, it shouldn't force an election for recovery master and nominate itself as a candidate. This can cause the recovery master to flip-flop between nodes when all nodes are inactive. If there is actually an active node then it will trigger the election. This is fairly cosmetic but is a step along the way towards ironing out weirdness when all nodes are stopped. Also, fix a related comment. Signed-off-by: Martin Schwenke <mar...@meltin.net> commit a0c30c820fd47d4f8620dc060c825be10754f5d1 Author: Martin Schwenke <mar...@meltin.net> Date: Tue Jul 3 10:30:29 2012 +1000 recoverd: main_loop() should not verify local IPs if node is stopped Doing these checks is pointless and potentially causes unnecessary log messages. Signed-off-by: Martin Schwenke <mar...@meltin.net> commit f586e8a2911fc6e7f6698f516653145d8fd45dad Author: Martin Schwenke <mar...@meltin.net> Date: Tue Jul 3 10:15:25 2012 +1000 recoverd: verify_local_ip_allocation() should dup ifaces before early return If CTDB starts in STOPPED state then it thinks it is in the middle of a recovery. rec->ifaces is also NULL and an early exit further down (that checks to see if a recovery is in process) means that it stays that way. However, each time this function is entered the need for a takeover run is re-flagged. The takeover run never happens due to the the early exit, causing a couple of unneeded messages to be logged each time. This is avoided by moving the code that sets rec->ifaces so that it is executed earlier and, in this case, in the middle of a recovery. Signed-off-by: Martin Schwenke <mar...@meltin.net> commit cc9d96f4248e45ea99c5f00db1526426ac26fbc2 Author: Martin Schwenke <mar...@meltin.net> Date: Mon Jul 2 17:26:04 2012 +1000 recoverd: Update a log message that has bit-rotted This message used to be correct because the ipreallocated event only handled updating the NAT gateway. However, that has changed so the message needs to be updated. Signed-off-by: Martin Schwenke <mar...@meltin.net> commit 9119a568c2b4601318f7751f537dca2f92a7230b Author: Martin Schwenke <mar...@meltin.net> Date: Fri Jun 22 14:01:02 2012 +1000 recoverd: Fix bogus info in message about changed flags Signed-off-by: Martin Schwenke <mar...@meltin.net> ----------------------------------------------------------------------- Summary of changes: server/ctdb_recoverd.c | 25 +++++++++++++++++-------- server/ctdb_takeover.c | 11 ++++++++--- 2 files changed, 25 insertions(+), 11 deletions(-) Changeset truncated at 500 lines: diff --git a/server/ctdb_recoverd.c b/server/ctdb_recoverd.c index 7b7435c..02ce69f 100644 --- a/server/ctdb_recoverd.c +++ b/server/ctdb_recoverd.c @@ -2484,7 +2484,7 @@ static void monitor_handler(struct ctdb_context *ctdb, uint64_t srvid, } if (nodemap->nodes[i].flags != c->new_flags) { - DEBUG(DEBUG_NOTICE,("Node %u has changed flags - now 0x%x was 0x%x\n", c->pnn, c->new_flags, c->old_flags)); + DEBUG(DEBUG_NOTICE,("Node %u has changed flags - now 0x%x was 0x%x\n", c->pnn, c->new_flags, nodemap->nodes[i].flags)); } disabled_flag_changed = (nodemap->nodes[i].flags ^ c->new_flags) & NODE_FLAGS_DISABLED; @@ -2791,6 +2791,9 @@ static int verify_local_ip_allocation(struct ctdb_context *ctdb, struct ctdb_rec need_iface_check = true; } + talloc_free(rec->ifaces); + rec->ifaces = talloc_steal(rec, ifaces); + if (need_iface_check) { DEBUG(DEBUG_NOTICE, ("The interfaces status has changed on " "local node %u - force takeover run\n", @@ -2839,9 +2842,6 @@ static int verify_local_ip_allocation(struct ctdb_context *ctdb, struct ctdb_rec return 0; } - talloc_free(rec->ifaces); - rec->ifaces = talloc_steal(rec, ifaces); - /* verify that we have the ip addresses we should have and we dont have ones we shouldnt have. if we find an inconsistency we set recmode to @@ -3325,8 +3325,8 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec, /* If the local node is stopped, verify we are not the recmaster and yield this role if so */ - if ((nodemap->nodes[pnn].flags & NODE_FLAGS_STOPPED) && (rec->recmaster == pnn)) { - DEBUG(DEBUG_ERR,("Local node is STOPPED. Yielding recmaster role\n")); + if ((nodemap->nodes[pnn].flags & NODE_FLAGS_INACTIVE) && (rec->recmaster == pnn)) { + DEBUG(DEBUG_ERR,("Local node is INACTIVE. Yielding recmaster role\n")); force_election(rec, pnn, nodemap); return; } @@ -3387,7 +3387,7 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec, return; } - /* grap the nodemap from the recovery master to check if it is banned */ + /* get nodemap from the recovery master to check if it is inactive */ ret = ctdb_ctrl_getnodemap(ctdb, CONTROL_TIMEOUT(), nodemap->nodes[j].pnn, mem_ctx, &recmaster_nodemap); if (ret != 0) { @@ -3397,12 +3397,21 @@ static void main_loop(struct ctdb_context *ctdb, struct ctdb_recoverd *rec, } - if (recmaster_nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) { + if ((recmaster_nodemap->nodes[j].flags & NODE_FLAGS_INACTIVE) && + (rec->node_flags & NODE_FLAGS_INACTIVE) == 0) { DEBUG(DEBUG_NOTICE, ("Recmaster node %u no longer available. Force reelection\n", nodemap->nodes[j].pnn)); force_election(rec, pnn, nodemap); return; } + /* If this node is stopped then it is not the recovery master + * so the only remaining action is to potentially to verify + * the local IP allocation below. This won't accomplish + * anything useful so skip it. + */ + if (rec->node_flags & NODE_FLAGS_STOPPED) { + return; + } /* verify that we have all ip addresses we should have and we dont * have addresses we shouldnt have. diff --git a/server/ctdb_takeover.c b/server/ctdb_takeover.c index 538f776..40bf4bc 100644 --- a/server/ctdb_takeover.c +++ b/server/ctdb_takeover.c @@ -2244,8 +2244,13 @@ int ctdb_takeover_run(struct ctdb_context *ctdb, struct ctdb_node_map *nodemap) } ipreallocated: - /* tell all nodes to update natwg */ - /* send the flags update natgw on all connected nodes */ + /* + * Tell all nodes to run eventscripts to process the + * "ipreallocated" event. This can do a lot of things, + * including restarting services to reconfigure them if public + * IPs have moved. Once upon a time this event only used to + * update natwg. + */ data.dptr = discard_const("ipreallocated"); data.dsize = strlen((char *)data.dptr) + 1; nodes = list_of_connected_nodes(ctdb, nodemap, tmp_ctx, true); @@ -2254,7 +2259,7 @@ ipreallocated: false, data, NULL, NULL, NULL) != 0) { - DEBUG(DEBUG_ERR, (__location__ " ctdb_control to updatenatgw failed\n")); + DEBUG(DEBUG_ERR, (__location__ " failed to send control to run eventscripts with \"ipreallocated\"\n")); } talloc_free(tmp_ctx); -- CTDB repository