The branch, master has been updated
       via  5cdad2b8ebd71a5e458c301d00eac00a211feeb3 (commit)
       via  0fe79662e20e347d9e1cb12a42cd356e33572402 (commit)
       via  444521c852749558f39dc6131acce9e47eefd489 (commit)
       via  4bf0b1c9d21986eecb7682f935bd6154c65533cc (commit)
       via  d8eb2e7fdd7645719370dad4f2faa5c3fffa8249 (commit)
      from  f9556a6f1fe0046308c8b363e6dcaf3f7ce6f2b7 (commit)

http://gitweb.samba.org/?p=ctdb.git;a=shortlog;h=master


- Log -----------------------------------------------------------------
commit 5cdad2b8ebd71a5e458c301d00eac00a211feeb3
Author: Martin Schwenke <[email protected]>
Date:   Fri Aug 9 17:00:10 2013 +1000

    tools/ctdb: Fix message in showban when node is banned
    
    Signed-off-by: Martin Schwenke <[email protected]>

commit 0fe79662e20e347d9e1cb12a42cd356e33572402
Author: Martin Schwenke <[email protected]>
Date:   Fri Aug 9 16:58:42 2013 +1000

    tools/ctdb: Reimplement ban/unban using update_flags_wait_and_ipreallocate()
    
    This has the side effect of making these commands more resilient to
    control timeouts.
    
    Signed-off-by: Martin Schwenke <[email protected]>

commit 444521c852749558f39dc6131acce9e47eefd489
Author: Martin Schwenke <[email protected]>
Date:   Fri Aug 9 16:34:59 2013 +1000

    tools/ctdb: Factor out common pattern used in disable/enable/stop/continue
    
    Now we will only have one set of bugs.  :-)
    
    Signed-off-by: Martin Schwenke <[email protected]>
    Pair-programmed-with: Amitay Isaacs <[email protected]>

commit 4bf0b1c9d21986eecb7682f935bd6154c65533cc
Author: Martin Schwenke <[email protected]>
Date:   Fri Aug 9 15:41:37 2013 +1000

    tools/ctdb: Factor, simplify and improve robustness of ipreallocate code
    
    Having other functions call control_ipreallocate() suggests that the
    it might look at the argv/argv arguments that are passed.  This is not
    the case.  Change the callers so they call the new ipreallocate()
    function instead.
    
    Broadcast CTDB_SRVID_TAKEOVER_RUN to all connected nodes.  Inactive
    nodes will ignore it.  This is safe since we only want 1 reply.  If we
    didn't get a response, we don't actually care if there's no active
    recovery master - just fire, wait, retry, ...
    
    Ignore some failures on the basis that they might be transient, so it
    is probably worth retrying.
    
    Signed-off-by: Martin Schwenke <[email protected]>

commit d8eb2e7fdd7645719370dad4f2faa5c3fffa8249
Author: Martin Schwenke <[email protected]>
Date:   Thu Aug 15 04:38:02 2013 +1000

    tools/ctdb: Use ctdb_get_pnn() to get PNN of the current node
    
    This has already been stored at connect time and can't fail.
    
    Signed-off-by: Martin Schwenke <[email protected]>

-----------------------------------------------------------------------

Summary of changes:
 tools/ctdb.c |  376 ++++++++++++++++++++--------------------------------------
 1 files changed, 127 insertions(+), 249 deletions(-)


Changeset truncated at 500 lines:

diff --git a/tools/ctdb.c b/tools/ctdb.c
index b580779..eeff548 100644
--- a/tools/ctdb.c
+++ b/tools/ctdb.c
@@ -1973,75 +1973,53 @@ static void ctdb_every_second(struct event_context *ev, 
struct timed_event *te,
                                ctdb_every_second, ctdb);
 }
 
-/*
-  ask the recovery daemon on the recovery master to perform a ip reallocation
+/* Send an ipreallocate to the recovery daemon on all nodes.  Only the
+ * recovery master will answer.
  */
-static int control_ipreallocate(struct ctdb_context *ctdb, int argc, const 
char **argv)
+static int ipreallocate(struct ctdb_context *ctdb)
 {
-       int i, ret;
+       int ret;
        TDB_DATA data;
        struct takeover_run_reply rd;
-       struct ctdb_node_map *nodemap=NULL;
-       int count;
-       struct timeval tv = timeval_current();
+       struct timeval tv;
 
-       /* we need some events to trigger so we can timeout and restart
-          the loop
-       */
+       /* Time ticks to enable timeouts to be processed */
        event_add_timed(ctdb->ev, ctdb, 
                                timeval_current_ofs(1, 0),
                                ctdb_every_second, ctdb);
 
-       rd.pnn = ctdb_ctrl_getpnn(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE);
-       if (rd.pnn == -1) {
-               DEBUG(DEBUG_ERR, ("Failed to get pnn of local node\n"));
-               return -1;
-       }
+       rd.pnn = ctdb_get_pnn(ctdb);
        rd.srvid = getpid();
 
-       /* register a message port for receiveing the reply so that we
-          can receive the reply
-       */
+       /* Register message port for reply from recovery master */
        ctdb_client_set_message_handler(ctdb, rd.srvid, ip_reallocate_handler, 
NULL);
 
        data.dptr = (uint8_t *)&rd;
        data.dsize = sizeof(rd);
 
 again:
-       /* get the number of nodes and node flags */
-       if (ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), options.pnn, ctdb, 
&nodemap) != 0) {
-               DEBUG(DEBUG_ERR, ("Unable to get nodemap from local node\n"));
-               sleep(1);
-               goto again;
-       }
-
-       ipreallocate_finished = false;
-       count = 0;
-       for (i=0; i<nodemap->num;i++) {
-               if (nodemap->nodes[i].flags & NODE_FLAGS_INACTIVE) {
-                       continue;
-               } else {
-                       /* Send to all active nodes. Only recmaster will reply. 
*/
-                       ret = ctdb_client_send_message(ctdb, i, 
CTDB_SRVID_TAKEOVER_RUN, data);
-                       if (ret != 0) {
-                               DEBUG(DEBUG_ERR,("Failed to send ip takeover 
run request message to %u\n", options.pnn));
-                               return -1;
-                       }
-                       count++;
-               }
-       }
-       if (count == 0) {
-               DEBUG(DEBUG_ERR,("No recmaster available, no need to wait for 
cluster convergence\n"));
-               return 0;
+       /* Send to all connected nodes. Only recmaster replies */
+       ret = ctdb_client_send_message(ctdb, CTDB_BROADCAST_CONNECTED,
+                                      CTDB_SRVID_TAKEOVER_RUN, data);
+       if (ret != 0) {
+               /* This can only happen if the socket is closed and
+                * there's no way to recover from that, so don't try
+                * again.
+                */
+               DEBUG(DEBUG_WARNING,
+                     ("Failed to send IP reallocation request to connected 
nodes\n"));
+               return -1;
        }
 
        tv = timeval_current();
-       /* this loop will terminate when we have received the reply */
+       /* This loop terminates the reply is received */
        while (timeval_elapsed(&tv) < 5.0 && !ipreallocate_finished) {
                event_loop_once(ctdb->ev);
        }
 
        if (!ipreallocate_finished) {
+               DEBUG(DEBUG_NOTICE,
+                     ("Still waiting for confirmation of IP reallocation\n"));
                goto again;
        }
 
@@ -2049,6 +2027,11 @@ again:
 }
 
 
+static int control_ipreallocate(struct ctdb_context *ctdb, int argc, const 
char **argv)
+{
+       return ipreallocate(ctdb);
+}
+
 /*
   add a public ip address to a node
  */
@@ -2907,157 +2890,114 @@ static int control_getpid(struct ctdb_context *ctdb, 
int argc, const char **argv
        return 0;
 }
 
-/*
-  disable a remote node
- */
-static int control_disable(struct ctdb_context *ctdb, int argc, const char 
**argv)
+typedef bool update_flags_handler_t(struct ctdb_context *ctdb, void *data);
+
+static int update_flags_and_ipreallocate(struct ctdb_context *ctdb,
+                                             void *data,
+                                             update_flags_handler_t handler,
+                                             uint32_t flag,
+                                             const char *desc,
+                                             bool set_flag)
 {
-       int ret;
-       struct ctdb_node_map *nodemap=NULL;
+       struct ctdb_node_map *nodemap = NULL;
+       bool flag_is_set;
 
-       /* check if the node is already disabled */
+       /* Check if the node is already in the desired state */
        if (ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE, ctdb, 
&nodemap) != 0) {
                DEBUG(DEBUG_ERR, ("Unable to get nodemap from local node\n"));
                exit(10);
        }
-       if (nodemap->nodes[options.pnn].flags & 
NODE_FLAGS_PERMANENTLY_DISABLED) {
-               DEBUG(DEBUG_ERR,("Node %d is already disabled.\n", 
options.pnn));
+       flag_is_set = nodemap->nodes[options.pnn].flags & flag;
+       if (set_flag == flag_is_set) {
+               DEBUG(DEBUG_NOTICE, ("Node %d is %s %s\n", options.pnn,
+                                    (set_flag ? "already" : "not"), desc));
                return 0;
        }
 
        do {
-               ret = ctdb_ctrl_modflags(ctdb, TIMELIMIT(), options.pnn, 
NODE_FLAGS_PERMANENTLY_DISABLED, 0);
-               if (ret != 0) {
-                       DEBUG(DEBUG_ERR, ("Unable to disable node %u\n", 
options.pnn));
-                       return ret;
+               if (!handler(ctdb, data)) {
+                       DEBUG(DEBUG_WARNING,
+                             ("Failed to send control to set state %s on node 
%u, try again\n",
+                              desc, options.pnn));
                }
 
                sleep(1);
 
-               /* read the nodemap and verify the change took effect */
-               if (ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE, 
ctdb, &nodemap) != 0) {
-                       DEBUG(DEBUG_ERR, ("Unable to get nodemap from local 
node\n"));
-                       exit(10);
+               /* Read the nodemap and verify the change took effect.
+                * Even if the above control/hanlder timed out then it
+                * could still have worked!
+                */
+               if (ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE,
+                                        ctdb, &nodemap) != 0) {
+                       DEBUG(DEBUG_WARNING,
+                             ("Unable to get nodemap from local node, try 
again\n"));
                }
+               flag_is_set = nodemap->nodes[options.pnn].flags & flag;
+       } while (nodemap == NULL || (set_flag != flag_is_set));
 
-       } while (!(nodemap->nodes[options.pnn].flags & 
NODE_FLAGS_PERMANENTLY_DISABLED));
-       ret = control_ipreallocate(ctdb, argc, argv);
-       if (ret != 0) {
-               DEBUG(DEBUG_ERR, ("IP Reallocate failed on node %u\n", 
options.pnn));
-               return ret;
-       }
-
-       return 0;
+       return ipreallocate(ctdb);
 }
 
-/*
-  enable a disabled remote node
- */
-static int control_enable(struct ctdb_context *ctdb, int argc, const char 
**argv)
+/* Administratively disable a node */
+static bool update_flags_disabled(struct ctdb_context *ctdb, void *data)
 {
-       int ret;
-
-       struct ctdb_node_map *nodemap=NULL;
-
-
-       /* check if the node is already enabled */
-       if (ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE, ctdb, 
&nodemap) != 0) {
-               DEBUG(DEBUG_ERR, ("Unable to get nodemap from local node\n"));
-               exit(10);
-       }
-       if (!(nodemap->nodes[options.pnn].flags & 
NODE_FLAGS_PERMANENTLY_DISABLED)) {
-               DEBUG(DEBUG_ERR,("Node %d is already enabled.\n", options.pnn));
-               return 0;
-       }
-
-       do {
-               ret = ctdb_ctrl_modflags(ctdb, TIMELIMIT(), options.pnn, 0, 
NODE_FLAGS_PERMANENTLY_DISABLED);
-               if (ret != 0) {
-                       DEBUG(DEBUG_ERR, ("Unable to enable node %u\n", 
options.pnn));
-                       return ret;
-               }
-
-               sleep(1);
+       return ctdb_ctrl_modflags(ctdb, TIMELIMIT(), options.pnn,
+                                 NODE_FLAGS_PERMANENTLY_DISABLED, 0) == 0;
+}
 
-               /* read the nodemap and verify the change took effect */
-               if (ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE, 
ctdb, &nodemap) != 0) {
-                       DEBUG(DEBUG_ERR, ("Unable to get nodemap from local 
node\n"));
-                       exit(10);
-               }
+static int control_disable(struct ctdb_context *ctdb, int argc, const char 
**argv)
+{
+       return update_flags_and_ipreallocate(ctdb, NULL,
+                                                 update_flags_disabled,
+                                                 
NODE_FLAGS_PERMANENTLY_DISABLED,
+                                                 "disabled",
+                                                 true /* set_flag*/);
+}
 
-       } while (nodemap->nodes[options.pnn].flags & 
NODE_FLAGS_PERMANENTLY_DISABLED);
+/* Administratively re-enable a node */
+static bool update_flags_not_disabled(struct ctdb_context *ctdb, void *data)
+{
+       return ctdb_ctrl_modflags(ctdb, TIMELIMIT(), options.pnn,
+                                 0, NODE_FLAGS_PERMANENTLY_DISABLED) == 0;
+}
 
-       ret = control_ipreallocate(ctdb, argc, argv);
-       if (ret != 0) {
-               DEBUG(DEBUG_ERR, ("IP Reallocate failed on node %u\n", 
options.pnn));
-               return ret;
-       }
+static int control_enable(struct ctdb_context *ctdb,  int argc, const char 
**argv)
+{
+       return update_flags_and_ipreallocate(ctdb, NULL,
+                                                 update_flags_not_disabled,
+                                                 
NODE_FLAGS_PERMANENTLY_DISABLED,
+                                                 "disabled",
+                                                 false /* set_flag*/);
+}
 
-       return 0;
+/* Stop a node */
+static bool update_flags_stopped(struct ctdb_context *ctdb, void *data)
+{
+       return ctdb_ctrl_stop_node(ctdb, TIMELIMIT(), options.pnn) == 0;
 }
 
-/*
-  stop a remote node
- */
 static int control_stop(struct ctdb_context *ctdb, int argc, const char **argv)
 {
-       int ret;
-       struct ctdb_node_map *nodemap=NULL;
-
-       do {
-               ret = ctdb_ctrl_stop_node(ctdb, TIMELIMIT(), options.pnn);
-               if (ret != 0) {
-                       DEBUG(DEBUG_ERR, ("Unable to stop node %u   try 
again\n", options.pnn));
-               }
-       
-               sleep(1);
-
-               /* read the nodemap and verify the change took effect */
-               if (ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE, 
ctdb, &nodemap) != 0) {
-                       DEBUG(DEBUG_ERR, ("Unable to get nodemap from local 
node\n"));
-               }
-
-       } while (nodemap == NULL || !(nodemap->nodes[options.pnn].flags & 
NODE_FLAGS_STOPPED));
-       ret = control_ipreallocate(ctdb, argc, argv);
-       if (ret != 0) {
-               DEBUG(DEBUG_ERR, ("IP Reallocate failed on node %u\n", 
options.pnn));
-               return ret;
-       }
+       return update_flags_and_ipreallocate(ctdb, NULL,
+                                                 update_flags_stopped,
+                                                 NODE_FLAGS_STOPPED,
+                                                 "stopped",
+                                                 true /* set_flag*/);
+}
 
-       return 0;
+/* Continue a stopped node */
+static bool update_flags_not_stopped(struct ctdb_context *ctdb, void *data)
+{
+       return ctdb_ctrl_continue_node(ctdb, TIMELIMIT(), options.pnn) == 0;
 }
 
-/*
-  restart a stopped remote node
- */
 static int control_continue(struct ctdb_context *ctdb, int argc, const char 
**argv)
 {
-       int ret;
-
-       struct ctdb_node_map *nodemap=NULL;
-
-       do {
-               ret = ctdb_ctrl_continue_node(ctdb, TIMELIMIT(), options.pnn);
-               if (ret != 0) {
-                       DEBUG(DEBUG_ERR, ("Unable to continue node %u\n", 
options.pnn));
-                       return ret;
-               }
-       
-               sleep(1);
-
-               /* read the nodemap and verify the change took effect */
-               if (ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE, 
ctdb, &nodemap) != 0) {
-                       DEBUG(DEBUG_ERR, ("Unable to get nodemap from local 
node\n"));
-               }
-
-       } while (nodemap == NULL || nodemap->nodes[options.pnn].flags & 
NODE_FLAGS_STOPPED);
-       ret = control_ipreallocate(ctdb, argc, argv);
-       if (ret != 0) {
-               DEBUG(DEBUG_ERR, ("IP Reallocate failed on node %u\n", 
options.pnn));
-               return ret;
-       }
-
-       return 0;
+       return update_flags_and_ipreallocate(ctdb, NULL,
+                                                 update_flags_not_stopped,
+                                                 NODE_FLAGS_STOPPED,
+                                                 "stopped",
+                                                 false /* set_flag */);
 }
 
 static uint32_t get_generation(struct ctdb_context *ctdb)
@@ -3101,90 +3041,47 @@ static uint32_t get_generation(struct ctdb_context 
*ctdb)
        }
 }
 
-/*
-  ban a node from the cluster
- */
+/* Ban a node */
+static bool update_state_banned(struct ctdb_context *ctdb, void *data)
+{
+       struct ctdb_ban_time *bantime = (struct ctdb_ban_time *)data;
+       return ctdb_ctrl_set_ban(ctdb, TIMELIMIT(), options.pnn, bantime) == 0;
+}
+
 static int control_ban(struct ctdb_context *ctdb, int argc, const char **argv)
 {
-       int ret;
-       struct ctdb_node_map *nodemap=NULL;
        struct ctdb_ban_time bantime;
 
        if (argc < 1) {
                usage();
        }
        
-       /* verify the node exists */
-       ret = ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE, ctdb, 
&nodemap);
-       if (ret != 0) {
-               DEBUG(DEBUG_ERR, ("Unable to get nodemap from local node\n"));
-               return ret;
-       }
-
-       if (nodemap->nodes[options.pnn].flags & NODE_FLAGS_BANNED) {
-               DEBUG(DEBUG_ERR,("Node %u is already banned.\n", options.pnn));
-               return -1;
-       }
-
        bantime.pnn  = options.pnn;
        bantime.time = strtoul(argv[0], NULL, 0);
 
-       ret = ctdb_ctrl_set_ban(ctdb, TIMELIMIT(), options.pnn, &bantime);
-       if (ret != 0) {
-               DEBUG(DEBUG_ERR,("Banning node %d for %d seconds failed.\n", 
bantime.pnn, bantime.time));
-               return -1;
-       }       
-
-       ret = control_ipreallocate(ctdb, argc, argv);
-       if (ret != 0) {
-               DEBUG(DEBUG_ERR, ("IP Reallocate failed on node %u\n", 
options.pnn));
-               return ret;
-       }
-
-       return 0;
+       return update_flags_and_ipreallocate(ctdb, &bantime,
+                                                 update_state_banned,
+                                                 NODE_FLAGS_BANNED,
+                                                 "banned",
+                                                 true /* set_flag*/);
 }
 
 
-/*
-  unban a node from the cluster
- */
+/* Unban a node */
 static int control_unban(struct ctdb_context *ctdb, int argc, const char 
**argv)
 {
-       int ret;
-       struct ctdb_node_map *nodemap=NULL;
        struct ctdb_ban_time bantime;
 
-       /* verify the node exists */
-       ret = ctdb_ctrl_getnodemap(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE, ctdb, 
&nodemap);
-       if (ret != 0) {
-               DEBUG(DEBUG_ERR, ("Unable to get nodemap from local node\n"));
-               return ret;
-       }
-
-       if (!(nodemap->nodes[options.pnn].flags & NODE_FLAGS_BANNED)) {
-               DEBUG(DEBUG_ERR,("Node %u is not banned.\n", options.pnn));
-               return -1;
-       }
-
        bantime.pnn  = options.pnn;
        bantime.time = 0;
 
-       ret = ctdb_ctrl_set_ban(ctdb, TIMELIMIT(), options.pnn, &bantime);
-       if (ret != 0) {
-               DEBUG(DEBUG_ERR,("Unbanning node %d failed.\n", bantime.pnn));
-               return -1;
-       }       
-
-       ret = control_ipreallocate(ctdb, argc, argv);
-       if (ret != 0) {
-               DEBUG(DEBUG_ERR, ("IP Reallocate failed on node %u\n", 
options.pnn));
-               return ret;
-       }
-
-       return 0;
+       return update_flags_and_ipreallocate(ctdb, &bantime,
+                                                 update_state_banned,
+                                                 NODE_FLAGS_BANNED,
+                                                 "banned",
+                                                 false /* set_flag*/);
 }
 
-
 /*
   show ban information for a node
  */
@@ -3210,7 +3107,8 @@ static int control_showban(struct ctdb_context *ctdb, int 
argc, const char **arg
        if (bantime->time == 0) {
                printf("Node %u is not banned\n", bantime->pnn);
        } else {
-               printf("Node %u is banned banned for %d seconds\n", 
bantime->pnn, bantime->time);
+               printf("Node %u is banned, %d seconds remaining\n",
+                      bantime->pnn, bantime->time);
        }
 
        return 0;
@@ -4169,15 +4067,9 @@ static int control_getlog(struct ctdb_context *ctdb, int 
argc, const char **argv
        TDB_DATA data;
        struct timeval tv;
 
-       /* Since this can fail, do it first */
-       log_addr.pnn = ctdb_ctrl_getpnn(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE);
-       if (log_addr.pnn == -1) {
-               DEBUG(DEBUG_ERR, ("Failed to get pnn of local node\n"));
-               return -1;
-       }
-
        /* Process options */
        main_daemon = true;
+       log_addr.pnn = ctdb_get_pnn(ctdb);
        log_addr.level = DEBUG_NOTICE;
        for (i = 0; i < argc; i++) {
                if (strcmp(argv[i], "recoverd") == 0) {
@@ -4302,15 +4194,9 @@ static int reloadips_all(struct ctdb_context *ctdb)
                }
        }
 
-
-       rips.pnn = ctdb_ctrl_getpnn(ctdb, TIMELIMIT(), CTDB_CURRENT_NODE);
-       if (rips.pnn == -1) {
-               DEBUG(DEBUG_ERR, ("Failed to get pnn of local node\n"));
-               return 1;
-       }
+       rips.pnn = ctdb_get_pnn(ctdb);
        rips.srvid = getpid();
 
-
        /* register a message port for receiveing the reply so that we
           can receive the reply


-- 
CTDB repository

Reply via email to