We currently do:

1. when we find the node on corosync, we allocate the node and add it
to cpg_node_list.

2. when the node joins sheepdog (completes the JOIN process), we
allocate the node and add it to sd_node_list.


This patch simplifies the above:

1. when we find the node on corosync, we allocate the node and add it
to cpg_node_list.

2. when the node joins sheepdog (completes the JOIN process), we
move the node from cpg_node_list to sd_node_list.

The node is on cpg_node_list _OR_ sd_node_list.

I thought about managing the nodes on a single list but the code
doesn't look simpler than this approach.

I also add some comments.

Signed-off-by: FUJITA Tomonori <[email protected]>
---
 collie/collie.h |    5 ++++
 collie/group.c  |   62 +++++++++++++++++++++++++++++++++++++++----------------
 2 files changed, 49 insertions(+), 18 deletions(-)

diff --git a/collie/collie.h b/collie/collie.h
index 1d066ad..826ac3a 100644
--- a/collie/collie.h
+++ b/collie/collie.h
@@ -65,8 +65,13 @@ struct cluster_info {
        uint32_t epoch;
        uint32_t status;
 
+       /*
+        * we add a node to cpg_node_list in confchg then move it to
+        * sd_node_list when the node joins sheepdog.
+        */
        struct list_head cpg_node_list;
        struct list_head sd_node_list;
+
        int node_list_idx;
        struct list_head vm_list;
        struct list_head pending_list;
diff --git a/collie/group.c b/collie/group.c
index 276ed91..3df4930 100644
--- a/collie/group.c
+++ b/collie/group.c
@@ -495,11 +495,28 @@ static void get_vdi_bitmap_from_all(void)
        }
 }
 
+static int move_node_to_sd_list(uint32_t nodeid, uint32_t pid,
+                               struct sheepdog_node_list_entry ent)
+{
+       struct node *node;
+
+       node = find_node(&sys->cpg_node_list, nodeid, pid);
+       if (!node)
+               return 1;
+
+       if (!node->ent.id)
+               node->ent = ent;
+
+       list_del(&node->list);
+       list_add_tail(&node->list, &sys->sd_node_list);
+
+       return 0;
+}
+
 static void update_cluster_info(struct join_message *msg)
 {
        int i;
        int ret, nr_nodes = msg->nr_nodes;
-       struct node *node;
        struct sheepdog_node_list_entry entry[SD_MAX_NODES];
 
        if (!sys->nr_sobjs)
@@ -509,16 +526,16 @@ static void update_cluster_info(struct join_message *msg)
                goto out;
 
        for (i = 0; i < nr_nodes; i++) {
-               node = find_node(&sys->cpg_node_list, msg->nodes[i].nodeid,
-                                msg->nodes[i].pid);
-               if (!node)
-                       continue;
-
-               if (!node->ent.id)
-                       node->ent = msg->nodes[i].ent;
-
-               add_node(&sys->sd_node_list, msg->nodes[i].nodeid, 
msg->nodes[i].pid,
-                        &msg->nodes[i].ent);
+               ret = move_node_to_sd_list(msg->nodes[i].nodeid,
+                                          msg->nodes[i].pid,
+                                          msg->nodes[i].ent);
+               /*
+                * the node belonged to sheepdog when the master build
+                * the JOIN response however it has gone.
+                */
+               if (ret)
+                       vprintf(SDOG_INFO "nodeid: %x, pid: %d has gone\n",
+                               msg->nodes[i].nodeid, msg->nodes[i].pid);
        }
 
        sys->synchronized = 1;
@@ -542,7 +559,14 @@ static void update_cluster_info(struct join_message *msg)
        }
 
 out:
-       add_node(&sys->sd_node_list, msg->nodeid, msg->pid, &msg->header.from);
+       ret = move_node_to_sd_list(msg->nodeid, msg->pid, msg->header.from);
+       /*
+        * this should not happen since __sd_deliver() checks if the
+        * host from msg on cpg_node_list.
+        */
+       if (ret)
+               vprintf(SDOG_ERR "nodeid: %x, pid: %d has gone\n",
+                       msg->nodeid, msg->pid);
 
        if (sys->status == SD_STATUS_OK) {
                nr_nodes = get_ordered_sd_node_list(entry);
@@ -902,12 +926,7 @@ static void __sd_confch(struct work *work, int idx)
        }
 
        for (i = 0; i < left_list_entries; i++) {
-               node = find_node(&sys->cpg_node_list, left_list[i].nodeid, 
left_list[i].pid);
-               if (node) {
-                       list_del(&node->list);
-                       free(node);
-               } else
-                       eprintf("System error\n");
+               /* the node must be on sd_node_list or cpg_node_list. */
 
                node = find_node(&sys->sd_node_list, left_list[i].nodeid, 
left_list[i].pid);
                if (node) {
@@ -927,6 +946,13 @@ static void __sd_confch(struct work *work, int idx)
 
                                update_epoch_store(sys->epoch);
                        }
+               } else {
+                       node = find_node(&sys->cpg_node_list, 
left_list[i].nodeid,
+                                        left_list[i].pid);
+                       if (node) {
+                               list_del(&node->list);
+                               free(node);
+                       }
                }
        }
 
-- 
1.6.5

-- 
sheepdog mailing list
[email protected]
http://lists.wpkg.org/mailman/listinfo/sheepdog

Reply via email to