The branch, master has been updated
       via  39bc356 ctdb-ipalloc: Document the steps involved in a takeover run
       via  e320725 ctdb-ipalloc: Split IP allocation into its own build 
subsystem
      from  19d3fd1 s4-rpc_server: Add missing include for 
ROLE_ACTIVE_DIRECTORY_DC

https://git.samba.org/?p=samba.git;a=shortlog;h=master


- Log -----------------------------------------------------------------
commit 39bc356ccb3fdfb9bd69c33ccf0fb1cb76f3c090
Author: Martin Schwenke <mar...@meltin.net>
Date:   Fri Dec 11 16:07:51 2015 +1100

    ctdb-ipalloc: Document the steps involved in a takeover run
    
    Signed-off-by: Martin Schwenke <mar...@meltin.net>
    Reviewed-by: Amitay Isaacs <ami...@gmail.com>
    
    Autobuild-User(master): Amitay Isaacs <ami...@samba.org>
    Autobuild-Date(master): Wed Jan 13 23:27:01 CET 2016 on sn-devel-144

commit e320725f0206c56f5fe8b8b580d677c2aa56ca47
Author: Martin Schwenke <mar...@meltin.net>
Date:   Mon Nov 23 16:18:16 2015 +1100

    ctdb-ipalloc: Split IP allocation into its own build subsystem
    
    Signed-off-by: Martin Schwenke <mar...@meltin.net>
    Reviewed-by: Amitay Isaacs <ami...@gmail.com>

-----------------------------------------------------------------------

Summary of changes:
 ctdb/server/ctdb_takeover.c            | 879 +--------------------------------
 ctdb/server/ipalloc.c                  |  53 ++
 ctdb/server/ipalloc.h                  |  63 +++
 ctdb/server/ipalloc_common.c           | 206 ++++++++
 ctdb/server/ipalloc_deterministic.c    |  62 +++
 ctdb/server/ipalloc_lcp2.c             | 515 +++++++++++++++++++
 ctdb/server/ipalloc_nondeterministic.c | 147 ++++++
 ctdb/server/ipalloc_private.h          |  43 ++
 ctdb/tests/src/ctdbd_test.c            |   5 +
 ctdb/wscript                           |  16 +-
 10 files changed, 1135 insertions(+), 854 deletions(-)
 create mode 100644 ctdb/server/ipalloc.c
 create mode 100644 ctdb/server/ipalloc.h
 create mode 100644 ctdb/server/ipalloc_common.c
 create mode 100644 ctdb/server/ipalloc_deterministic.c
 create mode 100644 ctdb/server/ipalloc_lcp2.c
 create mode 100644 ctdb/server/ipalloc_nondeterministic.c
 create mode 100644 ctdb/server/ipalloc_private.h


Changeset truncated at 500 lines:

diff --git a/ctdb/server/ctdb_takeover.c b/ctdb/server/ctdb_takeover.c
index 227bd16..a613aa0 100644
--- a/ctdb/server/ctdb_takeover.c
+++ b/ctdb/server/ctdb_takeover.c
@@ -41,34 +41,13 @@
 #include "common/common.h"
 #include "common/logging.h"
 
+#include "server/ipalloc.h"
 
 #define TAKEOVER_TIMEOUT() 
timeval_current_ofs(ctdb->tunable.takeover_timeout,0)
 
 #define CTDB_ARP_INTERVAL 1
 #define CTDB_ARP_REPEAT   3
 
-/* Flags used in IP allocation algorithms. */
-enum ipalloc_algorithm {
-       IPALLOC_DETERMINISTIC,
-       IPALLOC_NONDETERMINISTIC,
-       IPALLOC_LCP2,
-};
-
-struct ipalloc_state {
-       uint32_t num;
-
-       /* Arrays with data for each node */
-       struct ctdb_public_ip_list_old **known_public_ips;
-       struct ctdb_public_ip_list_old **available_public_ips;
-       bool *noiptakeover;
-       bool *noiphost;
-
-       struct public_ip_list *all_ips;
-       enum ipalloc_algorithm algorithm;
-       uint32_t no_ip_failback;
-       uint32_t *force_rebalance_nodes;
-};
-
 struct ctdb_interface {
        struct ctdb_interface *prev, *next;
        const char *name;
@@ -1249,138 +1228,6 @@ int ctdb_set_single_public_ip(struct ctdb_context *ctdb,
        return 0;
 }
 
-struct public_ip_list {
-       struct public_ip_list *next;
-       uint32_t pnn;
-       ctdb_sock_addr addr;
-};
-
-/* Given a physical node, return the number of
-   public addresses that is currently assigned to this node.
-*/
-static int node_ip_coverage(int32_t pnn, struct public_ip_list *ips)
-{
-       int num=0;
-
-       for (;ips;ips=ips->next) {
-               if (ips->pnn == pnn) {
-                       num++;
-               }
-       }
-       return num;
-}
-
-
-/* Can the given node host the given IP: is the public IP known to the
- * node and is NOIPHOST unset?
-*/
-static bool can_node_host_ip(struct ipalloc_state *ipalloc_state,
-                            int32_t pnn,
-                            struct public_ip_list *ip)
-{
-       struct ctdb_public_ip_list_old *public_ips;
-       int i;
-
-       if (ipalloc_state->noiphost[pnn]) {
-               return false;
-       }
-
-       public_ips = ipalloc_state->available_public_ips[pnn];
-
-       if (public_ips == NULL) {
-               return false;
-       }
-
-       for (i=0; i<public_ips->num; i++) {
-               if (ctdb_same_ip(&ip->addr, &public_ips->ips[i].addr)) {
-                       /* yes, this node can serve this public ip */
-                       return true;
-               }
-       }
-
-       return false;
-}
-
-static bool can_node_takeover_ip(struct ipalloc_state *ipalloc_state,
-                                int32_t pnn,
-                                struct public_ip_list *ip)
-{
-       if (ipalloc_state->noiptakeover[pnn]) {
-               return false;
-       }
-
-       return can_node_host_ip(ipalloc_state, pnn, ip);
-}
-
-/* search the node lists list for a node to takeover this ip.
-   pick the node that currently are serving the least number of ips
-   so that the ips get spread out evenly.
-*/
-static int find_takeover_node(struct ipalloc_state *ipalloc_state,
-                             struct public_ip_list *ip)
-{
-       int pnn, min=0, num;
-       int i, numnodes;
-
-       numnodes = ipalloc_state->num;
-       pnn    = -1;
-       for (i=0; i<numnodes; i++) {
-               /* verify that this node can serve this ip */
-               if (!can_node_takeover_ip(ipalloc_state, i, ip)) {
-                       /* no it couldnt   so skip to the next node */
-                       continue;
-               }
-
-               num = node_ip_coverage(i, ipalloc_state->all_ips);
-               /* was this the first node we checked ? */
-               if (pnn == -1) {
-                       pnn = i;
-                       min  = num;
-               } else {
-                       if (num < min) {
-                               pnn = i;
-                               min  = num;
-                       }
-               }
-       }
-       if (pnn == -1) {
-               DEBUG(DEBUG_WARNING,(__location__ " Could not find node to take 
over public address '%s'\n",
-                       ctdb_addr_to_str(&ip->addr)));
-
-               return -1;
-       }
-
-       ip->pnn = pnn;
-       return 0;
-}
-
-#define IP_KEYLEN      4
-static uint32_t *ip_key(ctdb_sock_addr *ip)
-{
-       static uint32_t key[IP_KEYLEN];
-
-       bzero(key, sizeof(key));
-
-       switch (ip->sa.sa_family) {
-       case AF_INET:
-               key[3]  = htonl(ip->ip.sin_addr.s_addr);
-               break;
-       case AF_INET6: {
-               uint32_t *s6_a32 = (uint32_t *)&(ip->ip6.sin6_addr.s6_addr);
-               key[0]  = htonl(s6_a32[0]);
-               key[1]  = htonl(s6_a32[1]);
-               key[2]  = htonl(s6_a32[2]);
-               key[3]  = htonl(s6_a32[3]);
-               break;
-       }
-       default:
-               DEBUG(DEBUG_ERR, (__location__ " ERROR, unknown family passed 
:%u\n", ip->sa.sa_family));
-               return key;
-       }
-
-       return key;
-}
-
 static void *add_ip_callback(void *parm, void *data)
 {
        struct public_ip_list *this_ip = parm;
@@ -1518,679 +1365,6 @@ create_merged_ip_list(struct ctdb_context *ctdb, struct 
ipalloc_state *ipalloc_s
        return ip_list;
 }
 
-/* 
- * This is the length of the longtest common prefix between the IPs.
- * It is calculated by XOR-ing the 2 IPs together and counting the
- * number of leading zeroes.  The implementation means that all
- * addresses end up being 128 bits long.
- *
- * FIXME? Should we consider IPv4 and IPv6 separately given that the
- * 12 bytes of 0 prefix padding will hurt the algorithm if there are
- * lots of nodes and IP addresses?
- */
-static uint32_t ip_distance(ctdb_sock_addr *ip1, ctdb_sock_addr *ip2)
-{
-       uint32_t ip1_k[IP_KEYLEN];
-       uint32_t *t;
-       int i;
-       uint32_t x;
-
-       uint32_t distance = 0;
-
-       memcpy(ip1_k, ip_key(ip1), sizeof(ip1_k));
-       t = ip_key(ip2);
-       for (i=0; i<IP_KEYLEN; i++) {
-               x = ip1_k[i] ^ t[i];
-               if (x == 0) {
-                       distance += 32;
-               } else {
-                       /* Count number of leading zeroes. 
-                        * FIXME? This could be optimised...
-                        */
-                       while ((x & (1 << 31)) == 0) {
-                               x <<= 1;
-                               distance += 1;
-                       }
-               }
-       }
-
-       return distance;
-}
-
-/* Calculate the IP distance for the given IP relative to IPs on the
-   given node.  The ips argument is generally the all_ips variable
-   used in the main part of the algorithm.
- */
-static uint32_t ip_distance_2_sum(ctdb_sock_addr *ip,
-                                 struct public_ip_list *ips,
-                                 int pnn)
-{
-       struct public_ip_list *t;
-       uint32_t d;
-
-       uint32_t sum = 0;
-
-       for (t = ips; t != NULL; t = t->next) {
-               if (t->pnn != pnn) {
-                       continue;
-               }
-
-               /* Optimisation: We never calculate the distance
-                * between an address and itself.  This allows us to
-                * calculate the effect of removing an address from a
-                * node by simply calculating the distance between
-                * that address and all of the exitsing addresses.
-                * Moreover, we assume that we're only ever dealing
-                * with addresses from all_ips so we can identify an
-                * address via a pointer rather than doing a more
-                * expensive address comparison. */
-               if (&(t->addr) == ip) {
-                       continue;
-               }
-
-               d = ip_distance(ip, &(t->addr));
-               sum += d * d;  /* Cheaper than pulling in math.h :-) */
-       }
-
-       return sum;
-}
-
-/* Return the LCP2 imbalance metric for addresses currently assigned
-   to the given node.
- */
-static uint32_t lcp2_imbalance(struct public_ip_list * all_ips, int pnn)
-{
-       struct public_ip_list *t;
-
-       uint32_t imbalance = 0;
-
-       for (t = all_ips; t != NULL; t = t->next) {
-               if (t->pnn != pnn) {
-                       continue;
-               }
-               /* Pass the rest of the IPs rather than the whole
-                  all_ips input list.
-               */
-               imbalance += ip_distance_2_sum(&(t->addr), t->next, pnn);
-       }
-
-       return imbalance;
-}
-
-/* Allocate any unassigned IPs just by looping through the IPs and
- * finding the best node for each.
- */
-static void basic_allocate_unassigned(struct ipalloc_state *ipalloc_state)
-{
-       struct public_ip_list *t;
-
-       /* loop over all ip's and find a physical node to cover for
-          each unassigned ip.
-       */
-       for (t = ipalloc_state->all_ips; t != NULL; t = t->next) {
-               if (t->pnn == -1) {
-                       if (find_takeover_node(ipalloc_state, t)) {
-                               DEBUG(DEBUG_WARNING,
-                                     ("Failed to find node to cover ip %s\n",
-                                      ctdb_addr_to_str(&t->addr)));
-                       }
-               }
-       }
-}
-
-/* Basic non-deterministic rebalancing algorithm.
- */
-static void basic_failback(struct ipalloc_state *ipalloc_state,
-                          int num_ips)
-{
-       int i, numnodes;
-       int maxnode, maxnum, minnode, minnum, num, retries;
-       struct public_ip_list *t;
-
-       numnodes = ipalloc_state->num;
-       retries = 0;
-
-try_again:
-       maxnum=0;
-       minnum=0;
-
-       /* for each ip address, loop over all nodes that can serve
-          this ip and make sure that the difference between the node
-          serving the most and the node serving the least ip's are
-          not greater than 1.
-       */
-       for (t = ipalloc_state->all_ips; t != NULL; t = t->next) {
-               if (t->pnn == -1) {
-                       continue;
-               }
-
-               /* Get the highest and lowest number of ips's served by any 
-                  valid node which can serve this ip.
-               */
-               maxnode = -1;
-               minnode = -1;
-               for (i=0; i<numnodes; i++) {
-                       /* only check nodes that can actually serve this ip */
-                       if (!can_node_takeover_ip(ipalloc_state, i,
-                                                 t)) {
-                               /* no it couldnt   so skip to the next node */
-                               continue;
-                       }
-
-                       num = node_ip_coverage(i, ipalloc_state->all_ips);
-                       if (maxnode == -1) {
-                               maxnode = i;
-                               maxnum  = num;
-                       } else {
-                               if (num > maxnum) {
-                                       maxnode = i;
-                                       maxnum  = num;
-                               }
-                       }
-                       if (minnode == -1) {
-                               minnode = i;
-                               minnum  = num;
-                       } else {
-                               if (num < minnum) {
-                                       minnode = i;
-                                       minnum  = num;
-                               }
-                       }
-               }
-               if (maxnode == -1) {
-                       DEBUG(DEBUG_WARNING,
-                             (__location__ " Could not find maxnode. May not 
be able to serve ip '%s'\n",
-                              ctdb_addr_to_str(&t->addr)));
-
-                       continue;
-               }
-
-               /* if the spread between the smallest and largest coverage by
-                  a node is >=2 we steal one of the ips from the node with
-                  most coverage to even things out a bit.
-                  try to do this a limited number of times since we dont
-                  want to spend too much time balancing the ip coverage.
-               */
-               if ((maxnum > minnum+1) &&
-                   (retries < (num_ips + 5))){
-                       struct public_ip_list *tt;
-
-                       /* Reassign one of maxnode's VNNs */
-                       for (tt = ipalloc_state->all_ips; tt != NULL; tt = 
tt->next) {
-                               if (tt->pnn == maxnode) {
-                                       (void)find_takeover_node(ipalloc_state,
-                                                                tt);
-                                       retries++;
-                                       goto try_again;;
-                               }
-                       }
-               }
-       }
-}
-
-static bool lcp2_init(struct ipalloc_state *ipalloc_state,
-                     uint32_t **lcp2_imbalances,
-                     bool **rebalance_candidates)
-{
-       int i, numnodes;
-       struct public_ip_list *t;
-
-       numnodes = ipalloc_state->num;
-
-       *rebalance_candidates = talloc_array(ipalloc_state, bool, numnodes);
-       if (*rebalance_candidates == NULL) {
-               DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
-               return false;
-       }
-       *lcp2_imbalances = talloc_array(ipalloc_state, uint32_t, numnodes);
-       if (*lcp2_imbalances == NULL) {
-               DEBUG(DEBUG_ERR, (__location__ " out of memory\n"));
-               return false;
-       }
-
-       for (i=0; i<numnodes; i++) {
-               (*lcp2_imbalances)[i] =
-                       lcp2_imbalance(ipalloc_state->all_ips, i);
-               /* First step: assume all nodes are candidates */
-               (*rebalance_candidates)[i] = true;
-       }
-
-       /* 2nd step: if a node has IPs assigned then it must have been
-        * healthy before, so we remove it from consideration.  This
-        * is overkill but is all we have because we don't maintain
-        * state between takeover runs.  An alternative would be to
-        * keep state and invalidate it every time the recovery master
-        * changes.
-        */
-       for (t = ipalloc_state->all_ips; t != NULL; t = t->next) {
-               if (t->pnn != -1) {
-                       (*rebalance_candidates)[t->pnn] = false;
-               }
-       }
-
-       /* 3rd step: if a node is forced to re-balance then
-          we allow failback onto the node */
-       if (ipalloc_state->force_rebalance_nodes == NULL) {
-               return true;
-       }
-       for (i = 0;
-            i < talloc_array_length(ipalloc_state->force_rebalance_nodes);
-            i++) {
-               uint32_t pnn = ipalloc_state->force_rebalance_nodes[i];
-               if (pnn >= numnodes) {
-                       DEBUG(DEBUG_ERR,
-                             (__location__ "unknown node %u\n", pnn));
-                       continue;
-               }
-
-               DEBUG(DEBUG_NOTICE,
-                     ("Forcing rebalancing of IPs to node %u\n", pnn));
-               (*rebalance_candidates)[pnn] = true;
-       }
-
-       return true;
-}
-
-/* Allocate any unassigned addresses using the LCP2 algorithm to find
- * the IP/node combination that will cost the least.
- */
-static void lcp2_allocate_unassigned(struct ipalloc_state *ipalloc_state,
-                                    uint32_t *lcp2_imbalances)
-{
-       struct public_ip_list *t;
-       int dstnode, numnodes;
-
-       int minnode;
-       uint32_t mindsum, dstdsum, dstimbl, minimbl;
-       struct public_ip_list *minip;
-
-       bool should_loop = true;
-       bool have_unassigned = true;
-
-       numnodes = ipalloc_state->num;
-
-       while (have_unassigned && should_loop) {
-               should_loop = false;
-
-               DEBUG(DEBUG_DEBUG,(" 
----------------------------------------\n"));
-               DEBUG(DEBUG_DEBUG,(" CONSIDERING MOVES (UNASSIGNED)\n"));
-
-               minnode = -1;
-               mindsum = 0;
-               minip = NULL;
-
-               /* loop over each unassigned ip. */
-               for (t = ipalloc_state->all_ips; t != NULL ; t = t->next) {
-                       if (t->pnn != -1) {
-                               continue;
-                       }
-
-                       for (dstnode = 0; dstnode < numnodes; dstnode++) {
-                               /* only check nodes that can actually takeover 
this ip */
-                               if (!can_node_takeover_ip(ipalloc_state,
-                                                         dstnode,
-                                                         t)) {
-                                       /* no it couldnt   so skip to the next 
node */
-                                       continue;
-                               }
-
-                               dstdsum = ip_distance_2_sum(&(t->addr),


-- 
Samba Shared Repository

Reply via email to