The branch, master has been updated via 39bc356 ctdb-ipalloc: Document the steps involved in a takeover run via e320725 ctdb-ipalloc: Split IP allocation into its own build subsystem from 19d3fd1 s4-rpc_server: Add missing include for ROLE_ACTIVE_DIRECTORY_DC
https://git.samba.org/?p=samba.git;a=shortlog;h=master - Log ----------------------------------------------------------------- commit 39bc356ccb3fdfb9bd69c33ccf0fb1cb76f3c090 Author: Martin Schwenke <mar...@meltin.net> Date: Fri Dec 11 16:07:51 2015 +1100 ctdb-ipalloc: Document the steps involved in a takeover run Signed-off-by: Martin Schwenke <mar...@meltin.net> Reviewed-by: Amitay Isaacs <ami...@gmail.com> Autobuild-User(master): Amitay Isaacs <ami...@samba.org> Autobuild-Date(master): Wed Jan 13 23:27:01 CET 2016 on sn-devel-144 commit e320725f0206c56f5fe8b8b580d677c2aa56ca47 Author: Martin Schwenke <mar...@meltin.net> Date: Mon Nov 23 16:18:16 2015 +1100 ctdb-ipalloc: Split IP allocation into its own build subsystem Signed-off-by: Martin Schwenke <mar...@meltin.net> Reviewed-by: Amitay Isaacs <ami...@gmail.com> ----------------------------------------------------------------------- Summary of changes: ctdb/server/ctdb_takeover.c | 879 +-------------------------------- ctdb/server/ipalloc.c | 53 ++ ctdb/server/ipalloc.h | 63 +++ ctdb/server/ipalloc_common.c | 206 ++++++++ ctdb/server/ipalloc_deterministic.c | 62 +++ ctdb/server/ipalloc_lcp2.c | 515 +++++++++++++++++++ ctdb/server/ipalloc_nondeterministic.c | 147 ++++++ ctdb/server/ipalloc_private.h | 43 ++ ctdb/tests/src/ctdbd_test.c | 5 + ctdb/wscript | 16 +- 10 files changed, 1135 insertions(+), 854 deletions(-) create mode 100644 ctdb/server/ipalloc.c create mode 100644 ctdb/server/ipalloc.h create mode 100644 ctdb/server/ipalloc_common.c create mode 100644 ctdb/server/ipalloc_deterministic.c create mode 100644 ctdb/server/ipalloc_lcp2.c create mode 100644 ctdb/server/ipalloc_nondeterministic.c create mode 100644 ctdb/server/ipalloc_private.h Changeset truncated at 500 lines: diff --git a/ctdb/server/ctdb_takeover.c b/ctdb/server/ctdb_takeover.c index 227bd16..a613aa0 100644 --- a/ctdb/server/ctdb_takeover.c +++ b/ctdb/server/ctdb_takeover.c @@ -41,34 +41,13 @@ #include "common/common.h" #include "common/logging.h" +#include "server/ipalloc.h" #define TAKEOVER_TIMEOUT() timeval_current_ofs(ctdb->tunable.takeover_timeout,0) #define CTDB_ARP_INTERVAL 1 #define CTDB_ARP_REPEAT 3 -/* Flags used in IP allocation algorithms. */ -enum ipalloc_algorithm { - IPALLOC_DETERMINISTIC, - IPALLOC_NONDETERMINISTIC, - IPALLOC_LCP2, -}; - -struct ipalloc_state { - uint32_t num; - - /* Arrays with data for each node */ - struct ctdb_public_ip_list_old **known_public_ips; - struct ctdb_public_ip_list_old **available_public_ips; - bool *noiptakeover; - bool *noiphost; - - struct public_ip_list *all_ips; - enum ipalloc_algorithm algorithm; - uint32_t no_ip_failback; - uint32_t *force_rebalance_nodes; -}; - struct ctdb_interface { struct ctdb_interface *prev, *next; const char *name; @@ -1249,138 +1228,6 @@ int ctdb_set_single_public_ip(struct ctdb_context *ctdb, return 0; } -struct public_ip_list { - struct public_ip_list *next; - uint32_t pnn; - ctdb_sock_addr addr; -}; - -/* Given a physical node, return the number of - public addresses that is currently assigned to this node. -*/ -static int node_ip_coverage(int32_t pnn, struct public_ip_list *ips) -{ - int num=0; - - for (;ips;ips=ips->next) { - if (ips->pnn == pnn) { - num++; - } - } - return num; -} - - -/* Can the given node host the given IP: is the public IP known to the - * node and is NOIPHOST unset? -*/ -static bool can_node_host_ip(struct ipalloc_state *ipalloc_state, - int32_t pnn, - struct public_ip_list *ip) -{ - struct ctdb_public_ip_list_old *public_ips; - int i; - - if (ipalloc_state->noiphost[pnn]) { - return false; - } - - public_ips = ipalloc_state->available_public_ips[pnn]; - - if (public_ips == NULL) { - return false; - } - - for (i=0; i<public_ips->num; i++) { - if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) { - /* yes, this node can serve this public ip */ - return true; - } - } - - return false; -} - -static bool can_node_takeover_ip(struct ipalloc_state *ipalloc_state, - int32_t pnn, - struct public_ip_list *ip) -{ - if (ipalloc_state->noiptakeover[pnn]) { - return false; - } - - return can_node_host_ip(ipalloc_state, pnn, ip); -} - -/* search the node lists list for a node to takeover this ip. - pick the node that currently are serving the least number of ips - so that the ips get spread out evenly. -*/ -static int find_takeover_node(struct ipalloc_state *ipalloc_state, - struct public_ip_list *ip) -{ - int pnn, min=0, num; - int i, numnodes; - - numnodes = ipalloc_state->num; - pnn = -1; - for (i=0; i<numnodes; i++) { - /* verify that this node can serve this ip */ - if (!can_node_takeover_ip(ipalloc_state, i, ip)) { - /* no it couldnt so skip to the next node */ - continue; - } - - num = node_ip_coverage(i, ipalloc_state->all_ips); - /* was this the first node we checked ? */ - if (pnn == -1) { - pnn = i; - min = num; - } else { - if (num < min) { - pnn = i; - min = num; - } - } - } - if (pnn == -1) { - DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take over public address '%s'\n", - ctdb_addr_to_str(&ip->addr))); - - return -1; - } - - ip->pnn = pnn; - return 0; -} - -#define IP_KEYLEN 4 -static uint32_t *ip_key(ctdb_sock_addr *ip) -{ - static uint32_t key[IP_KEYLEN]; - - bzero(key, sizeof(key)); - - switch (ip->sa.sa_family) { - case AF_INET: - key[3] = htonl(ip->ip.sin_addr.s_addr); - break; - case AF_INET6: { - uint32_t *s6_a32 = (uint32_t *)&(ip->ip6.sin6_addr.s6_addr); - key[0] = htonl(s6_a32[0]); - key[1] = htonl(s6_a32[1]); - key[2] = htonl(s6_a32[2]); - key[3] = htonl(s6_a32[3]); - break; - } - default: - DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed :%u\n", ip->sa.sa_family)); - return key; - } - - return key; -} - static void *add_ip_callback(void *parm, void *data) { struct public_ip_list *this_ip = parm; @@ -1518,679 +1365,6 @@ create_merged_ip_list(struct ctdb_context *ctdb, struct ipalloc_state *ipalloc_s return ip_list; } -/* - * This is the length of the longtest common prefix between the IPs. - * It is calculated by XOR-ing the 2 IPs together and counting the - * number of leading zeroes. The implementation means that all - * addresses end up being 128 bits long. - * - * FIXME? Should we consider IPv4 and IPv6 separately given that the - * 12 bytes of 0 prefix padding will hurt the algorithm if there are - * lots of nodes and IP addresses? - */ -static uint32_t ip_distance(ctdb_sock_addr *ip1, ctdb_sock_addr *ip2) -{ - uint32_t ip1_k[IP_KEYLEN]; - uint32_t *t; - int i; - uint32_t x; - - uint32_t distance = 0; - - memcpy(ip1_k, ip_key(ip1), sizeof(ip1_k)); - t = ip_key(ip2); - for (i=0; i<IP_KEYLEN; i++) { - x = ip1_k[i] ^ t[i]; - if (x == 0) { - distance += 32; - } else { - /* Count number of leading zeroes. - * FIXME? This could be optimised... - */ - while ((x & (1 << 31)) == 0) { - x <<= 1; - distance += 1; - } - } - } - - return distance; -} - -/* Calculate the IP distance for the given IP relative to IPs on the - given node. The ips argument is generally the all_ips variable - used in the main part of the algorithm. - */ -static uint32_t ip_distance_2_sum(ctdb_sock_addr *ip, - struct public_ip_list *ips, - int pnn) -{ - struct public_ip_list *t; - uint32_t d; - - uint32_t sum = 0; - - for (t = ips; t != NULL; t = t->next) { - if (t->pnn != pnn) { - continue; - } - - /* Optimisation: We never calculate the distance - * between an address and itself. This allows us to - * calculate the effect of removing an address from a - * node by simply calculating the distance between - * that address and all of the exitsing addresses. - * Moreover, we assume that we're only ever dealing - * with addresses from all_ips so we can identify an - * address via a pointer rather than doing a more - * expensive address comparison. */ - if (&(t->addr) == ip) { - continue; - } - - d = ip_distance(ip, &(t->addr)); - sum += d * d; /* Cheaper than pulling in math.h :-) */ - } - - return sum; -} - -/* Return the LCP2 imbalance metric for addresses currently assigned - to the given node. - */ -static uint32_t lcp2_imbalance(struct public_ip_list * all_ips, int pnn) -{ - struct public_ip_list *t; - - uint32_t imbalance = 0; - - for (t = all_ips; t != NULL; t = t->next) { - if (t->pnn != pnn) { - continue; - } - /* Pass the rest of the IPs rather than the whole - all_ips input list. - */ - imbalance += ip_distance_2_sum(&(t->addr), t->next, pnn); - } - - return imbalance; -} - -/* Allocate any unassigned IPs just by looping through the IPs and - * finding the best node for each. - */ -static void basic_allocate_unassigned(struct ipalloc_state *ipalloc_state) -{ - struct public_ip_list *t; - - /* loop over all ip's and find a physical node to cover for - each unassigned ip. - */ - for (t = ipalloc_state->all_ips; t != NULL; t = t->next) { - if (t->pnn == -1) { - if (find_takeover_node(ipalloc_state, t)) { - DEBUG(DEBUG_WARNING, - ("Failed to find node to cover ip %s\n", - ctdb_addr_to_str(&t->addr))); - } - } - } -} - -/* Basic non-deterministic rebalancing algorithm. - */ -static void basic_failback(struct ipalloc_state *ipalloc_state, - int num_ips) -{ - int i, numnodes; - int maxnode, maxnum, minnode, minnum, num, retries; - struct public_ip_list *t; - - numnodes = ipalloc_state->num; - retries = 0; - -try_again: - maxnum=0; - minnum=0; - - /* for each ip address, loop over all nodes that can serve - this ip and make sure that the difference between the node - serving the most and the node serving the least ip's are - not greater than 1. - */ - for (t = ipalloc_state->all_ips; t != NULL; t = t->next) { - if (t->pnn == -1) { - continue; - } - - /* Get the highest and lowest number of ips's served by any - valid node which can serve this ip. - */ - maxnode = -1; - minnode = -1; - for (i=0; i<numnodes; i++) { - /* only check nodes that can actually serve this ip */ - if (!can_node_takeover_ip(ipalloc_state, i, - t)) { - /* no it couldnt so skip to the next node */ - continue; - } - - num = node_ip_coverage(i, ipalloc_state->all_ips); - if (maxnode == -1) { - maxnode = i; - maxnum = num; - } else { - if (num > maxnum) { - maxnode = i; - maxnum = num; - } - } - if (minnode == -1) { - minnode = i; - minnum = num; - } else { - if (num < minnum) { - minnode = i; - minnum = num; - } - } - } - if (maxnode == -1) { - DEBUG(DEBUG_WARNING, - (__location__ " Could not find maxnode. May not be able to serve ip '%s'\n", - ctdb_addr_to_str(&t->addr))); - - continue; - } - - /* if the spread between the smallest and largest coverage by - a node is >=2 we steal one of the ips from the node with - most coverage to even things out a bit. - try to do this a limited number of times since we dont - want to spend too much time balancing the ip coverage. - */ - if ((maxnum > minnum+1) && - (retries < (num_ips + 5))){ - struct public_ip_list *tt; - - /* Reassign one of maxnode's VNNs */ - for (tt = ipalloc_state->all_ips; tt != NULL; tt = tt->next) { - if (tt->pnn == maxnode) { - (void)find_takeover_node(ipalloc_state, - tt); - retries++; - goto try_again;; - } - } - } - } -} - -static bool lcp2_init(struct ipalloc_state *ipalloc_state, - uint32_t **lcp2_imbalances, - bool **rebalance_candidates) -{ - int i, numnodes; - struct public_ip_list *t; - - numnodes = ipalloc_state->num; - - *rebalance_candidates = talloc_array(ipalloc_state, bool, numnodes); - if (*rebalance_candidates == NULL) { - DEBUG(DEBUG_ERR, (__location__ " out of memory\n")); - return false; - } - *lcp2_imbalances = talloc_array(ipalloc_state, uint32_t, numnodes); - if (*lcp2_imbalances == NULL) { - DEBUG(DEBUG_ERR, (__location__ " out of memory\n")); - return false; - } - - for (i=0; i<numnodes; i++) { - (*lcp2_imbalances)[i] = - lcp2_imbalance(ipalloc_state->all_ips, i); - /* First step: assume all nodes are candidates */ - (*rebalance_candidates)[i] = true; - } - - /* 2nd step: if a node has IPs assigned then it must have been - * healthy before, so we remove it from consideration. This - * is overkill but is all we have because we don't maintain - * state between takeover runs. An alternative would be to - * keep state and invalidate it every time the recovery master - * changes. - */ - for (t = ipalloc_state->all_ips; t != NULL; t = t->next) { - if (t->pnn != -1) { - (*rebalance_candidates)[t->pnn] = false; - } - } - - /* 3rd step: if a node is forced to re-balance then - we allow failback onto the node */ - if (ipalloc_state->force_rebalance_nodes == NULL) { - return true; - } - for (i = 0; - i < talloc_array_length(ipalloc_state->force_rebalance_nodes); - i++) { - uint32_t pnn = ipalloc_state->force_rebalance_nodes[i]; - if (pnn >= numnodes) { - DEBUG(DEBUG_ERR, - (__location__ "unknown node %u\n", pnn)); - continue; - } - - DEBUG(DEBUG_NOTICE, - ("Forcing rebalancing of IPs to node %u\n", pnn)); - (*rebalance_candidates)[pnn] = true; - } - - return true; -} - -/* Allocate any unassigned addresses using the LCP2 algorithm to find - * the IP/node combination that will cost the least. - */ -static void lcp2_allocate_unassigned(struct ipalloc_state *ipalloc_state, - uint32_t *lcp2_imbalances) -{ - struct public_ip_list *t; - int dstnode, numnodes; - - int minnode; - uint32_t mindsum, dstdsum, dstimbl, minimbl; - struct public_ip_list *minip; - - bool should_loop = true; - bool have_unassigned = true; - - numnodes = ipalloc_state->num; - - while (have_unassigned && should_loop) { - should_loop = false; - - DEBUG(DEBUG_DEBUG,(" ----------------------------------------\n")); - DEBUG(DEBUG_DEBUG,(" CONSIDERING MOVES (UNASSIGNED)\n")); - - minnode = -1; - mindsum = 0; - minip = NULL; - - /* loop over each unassigned ip. */ - for (t = ipalloc_state->all_ips; t != NULL ; t = t->next) { - if (t->pnn != -1) { - continue; - } - - for (dstnode = 0; dstnode < numnodes; dstnode++) { - /* only check nodes that can actually takeover this ip */ - if (!can_node_takeover_ip(ipalloc_state, - dstnode, - t)) { - /* no it couldnt so skip to the next node */ - continue; - } - - dstdsum = ip_distance_2_sum(&(t->addr), -- Samba Shared Repository