------------------------------------------------------------
revno: 319
revision-id: [EMAIL PROTECTED]
parent: [EMAIL PROTECTED]
committer: Andrew Tridgell <[EMAIL PROTECTED]>
branch nick: tridge
timestamp: Fri 2007-05-18 23:23:36 +1000
message:
  - up rx_cnt on all packet types
  - notice when a node becomes available again
modified:
  common/ctdb.c                  ctdb.c-20061127094323-t50f58d65iaao5of-2
  common/ctdb_call.c             ctdb_call.c-20061128065342-to93h6eejj5kon81-1
  common/ctdb_monitor.c          
ctdb_monitor.c-20070518100625-8jf4ft1mjzmb22ck-1
  include/ctdb_private.h         
ctdb_private.h-20061117234101-o3qt14umlg9en8z0-13
=== modified file 'common/ctdb.c'
--- a/common/ctdb.c     2007-05-18 09:19:35 +0000
+++ b/common/ctdb.c     2007-05-18 13:23:36 +0000
@@ -116,8 +116,7 @@
        node->name = talloc_asprintf(node, "%s:%u", 
                                     node->address.address, 
                                     node->address.port);
-       /* for now we just set the vnn to the line in the file - this
-          will change! */
+       /* this assumes that the nodes are kept in sorted order, and no gaps */
        node->vnn = ctdb->num_nodes;
 
        if (ctdb->address.address &&
@@ -275,6 +274,11 @@
                 "node %d to %d\n", hdr->reqid, hdr->operation, hdr->length,
                 hdr->srcnode, hdr->destnode));
 
+       /* up the counter for this source node, so we know its alive */
+       if (ctdb_validate_vnn(ctdb, hdr->srcnode)) {
+               ctdb->nodes[hdr->srcnode]->rx_cnt++;
+       }
+
        switch (hdr->operation) {
        case CTDB_REQ_CALL:
        case CTDB_REPLY_CALL:
@@ -345,7 +349,6 @@
 
        case CTDB_REQ_KEEPALIVE:
                ctdb->status.keepalive_packets_recv++;
-               ctdb_request_keepalive(ctdb, hdr);
                break;
 
        default:

=== modified file 'common/ctdb_call.c'
--- a/common/ctdb_call.c        2007-05-18 09:19:35 +0000
+++ b/common/ctdb_call.c        2007-05-18 13:23:36 +0000
@@ -785,13 +785,11 @@
 /* 
    send a keepalive packet to the other node
 */
-void ctdb_send_keepalive(struct ctdb_context *ctdb,
-                               TALLOC_CTX *mem_ctx,
-                               uint32_t destnode)
+void ctdb_send_keepalive(struct ctdb_context *ctdb, uint32_t destnode)
 {
        struct ctdb_req_keepalive *r;
        
-       r = ctdb_transport_allocate(ctdb, mem_ctx, CTDB_REQ_KEEPALIVE,
+       r = ctdb_transport_allocate(ctdb, ctdb, CTDB_REQ_KEEPALIVE,
                                    sizeof(struct ctdb_req_keepalive), 
                                    struct ctdb_req_keepalive);
        CTDB_NO_MEMORY_FATAL(ctdb, r);

=== modified file 'common/ctdb_monitor.c'
--- a/common/ctdb_monitor.c     2007-05-18 10:06:29 +0000
+++ b/common/ctdb_monitor.c     2007-05-18 13:23:36 +0000
@@ -26,73 +26,55 @@
 #include "../include/ctdb_private.h"
 
 /*
-  called when a CTDB_REQ_KEEPALIVE packet comes in
-*/
-void ctdb_request_keepalive(struct ctdb_context *ctdb, struct ctdb_req_header 
*hdr)
-{
-       struct ctdb_req_keepalive *r = (struct ctdb_req_keepalive *)hdr;
-       struct ctdb_node *node = NULL;
-       int i;
-
-       for (i=0;i<ctdb->num_nodes;i++) {
-               if (ctdb->nodes[i]->vnn == r->hdr.srcnode) {
-                       node = ctdb->nodes[i];
-                       break;
-               }
-       }
-       if (!node) {
-               DEBUG(0,(__location__ " Keepalive received from node not in 
ctdb->nodes : %u\n", r->hdr.srcnode));
-               return;
-       }
-
-       node->rx_cnt++;
-}
-
-
+  see if any nodes are dead
+ */
 static void ctdb_check_for_dead_nodes(struct event_context *ev, struct 
timed_event *te, 
                           struct timeval t, void *private_data)
 {
        struct ctdb_context *ctdb = talloc_get_type(private_data, struct 
ctdb_context);
        int i;
-       TALLOC_CTX *mem_ctx = talloc_new(ctdb);
 
        /* send a keepalive to all other nodes, unless */
        for (i=0;i<ctdb->num_nodes;i++) {
-               if (!(ctdb->nodes[i]->flags & NODE_FLAGS_CONNECTED)) {
+               struct ctdb_node *node = ctdb->nodes[i];
+               if (node->vnn == ctdb->vnn) {
                        continue;
                }
-               if (ctdb->nodes[i]->vnn == ctdb_get_vnn(ctdb)) {
-                       continue;
+               
+               /* it might have come alive again */
+               if (!(node->flags & NODE_FLAGS_CONNECTED) && node->rx_cnt != 0) 
{
+                       DEBUG(0,("Node %u is alive again - marking as 
connected\n", node->vnn));
+                       node->flags |= NODE_FLAGS_CONNECTED;
                }
 
-               if (ctdb->nodes[i]->rx_cnt == 0) {
-                       ctdb->nodes[i]->dead_count++;
+               if (node->rx_cnt == 0) {
+                       node->dead_count++;
                } else {
-                       ctdb->nodes[i]->dead_count = 0;
+                       node->dead_count = 0;
                }
 
-               if (ctdb->nodes[i]->dead_count>=3) {
-                       ctdb->nodes[i]->flags &= ~NODE_FLAGS_CONNECTED;
-                       /* should probably tell the transport layer
-                          to kill the sockets as well 
+               node->rx_cnt = 0;
+
+               if (node->dead_count >= CTDB_MONITORING_DEAD_COUNT) {
+                       DEBUG(0,("Node %u is dead - marking as not 
connected\n", node->vnn));
+                       node->flags &= ~NODE_FLAGS_CONNECTED;
+                       /* maybe tell the transport layer to kill the
+                          sockets as well?
                        */
                        continue;
                }
 
-               ctdb_send_keepalive(ctdb, mem_ctx, i);
-               ctdb->nodes[i]->rx_cnt = 0;
+               ctdb_send_keepalive(ctdb, node->vnn);
        }
-
-
-
        
-       talloc_free(mem_ctx);
-
        event_add_timed(ctdb->ev, ctdb, 
                        timeval_current_ofs(CTDB_MONITORING_TIMEOUT, 0), 
                        ctdb_check_for_dead_nodes, ctdb);
 }
 
+/*
+  start watching for nodes that might be dead
+ */
 int ctdb_start_monitoring(struct ctdb_context *ctdb)
 {
        event_add_timed(ctdb->ev, ctdb, 

=== modified file 'include/ctdb_private.h'
--- a/include/ctdb_private.h    2007-05-18 09:19:35 +0000
+++ b/include/ctdb_private.h    2007-05-18 13:23:36 +0000
@@ -311,6 +311,9 @@
 /* timeout between dead-node monitoring events */
 #define CTDB_MONITORING_TIMEOUT 5
 
+/* number of monitoring timeouts before a node is considered dead */
+#define CTDB_MONITORING_DEAD_COUNT 3
+
 
 /* number of consecutive calls from the same node before we give them
    the record */
@@ -710,7 +713,6 @@
 void ctdb_reqid_remove(struct ctdb_context *ctdb, uint32_t reqid);
 
 void ctdb_request_control(struct ctdb_context *ctdb, struct ctdb_req_header 
*hdr);
-void ctdb_request_keepalive(struct ctdb_context *ctdb, struct ctdb_req_header 
*hdr);
 void ctdb_reply_control(struct ctdb_context *ctdb, struct ctdb_req_header 
*hdr);
 
 int ctdb_daemon_send_control(struct ctdb_context *ctdb, uint32_t destnode,
@@ -819,6 +821,6 @@
 uint32_t ctdb_get_num_connected_nodes(struct ctdb_context *ctdb);
 
 int ctdb_start_monitoring(struct ctdb_context *ctdb);
-void ctdb_send_keepalive(struct ctdb_context *ctdb, TALLOC_CTX *mem_ctx, 
uint32_t destnode);
+void ctdb_send_keepalive(struct ctdb_context *ctdb, uint32_t destnode);
 
 #endif

Reply via email to