From: levin li <[email protected]>

When epoch changes, new node join or old node leave, we should
recalculate the vnode_info for every sd_node, and the disk space
is stored in sd_node, transfered to every other node together with
join message.

Signed-off-by: levin li <[email protected]>
---
 include/internal_proto.h |    1 +
 sheep/group.c            |   24 ++++++++++++++++++++++++
 2 files changed, 25 insertions(+), 0 deletions(-)

diff --git a/include/internal_proto.h b/include/internal_proto.h
index 3e22124..d48c8af 100644
--- a/include/internal_proto.h
+++ b/include/internal_proto.h
@@ -174,6 +174,7 @@ struct sd_node {
        struct node_id  nid;
        uint16_t        nr_vnodes;
        uint32_t        zone;
+       uint32_t        space;
 };
 
 struct epoch_log {
diff --git a/sheep/group.c b/sheep/group.c
index 16cbdaf..960987f 100644
--- a/sheep/group.c
+++ b/sheep/group.c
@@ -772,6 +772,24 @@ static void get_vdi_bitmap(struct sd_node *nodes, size_t 
nr_nodes)
        queue_work(sys->block_wqueue, &w->work);
 }
 
+static void recalculate_vnodes(struct sd_node *nodes, int nr_nodes)
+{
+       int i;
+       uint64_t avg_size = 0;
+       float factor;
+
+       for (i = 0; i < nr_nodes; i++)
+               avg_size += nodes[i].space;
+       avg_size /= nr_nodes;
+
+       for (i = 0;i < nr_nodes; i++) {
+               factor = (float)nodes[i].space / (float)avg_size;
+               nodes[i].nr_vnodes = SD_DEFAULT_VNODES * factor;
+               dprintf("node %d has %d vnodes, free space %" PRIu32 "\n",
+                       nodes[i].nid.port, nodes[i].nr_vnodes, nodes[i].space);
+       }
+}
+
 static void update_cluster_info(struct join_message *msg,
                                struct sd_node *joined, struct sd_node *nodes,
                                size_t nr_nodes)
@@ -784,6 +802,8 @@ static void update_cluster_info(struct join_message *msg,
        if (!sys->join_finished)
                finish_join(msg, joined, nodes, nr_nodes);
 
+       recalculate_vnodes(nodes, nr_nodes);
+
        old_vnode_info = current_vnode_info;
        current_vnode_info = alloc_vnode_info(nodes, nr_nodes);
 
@@ -1090,6 +1110,8 @@ void sd_leave_handler(struct sd_node *left, struct 
sd_node *members,
        if (sys->status == SD_STATUS_SHUTDOWN)
                return;
 
+       recalculate_vnodes(members, nr_members);
+
        old_vnode_info = current_vnode_info;
        current_vnode_info = alloc_vnode_info(members, nr_members);
 
@@ -1151,6 +1173,8 @@ int create_cluster(int port, int64_t zone, int nr_vnodes,
                sys->this_node.zone = zone;
        dprintf("zone id = %u\n", sys->this_node.zone);
 
+       sys->this_node.space = sys->disk_space;
+
        if (get_latest_epoch() > 0) {
                sys->status = SD_STATUS_WAIT_FOR_JOIN;
 
-- 
1.7.1

-- 
sheepdog mailing list
[email protected]
http://lists.wpkg.org/mailman/listinfo/sheepdog

Reply via email to