unlock information in newly joining node

Hitoshi Mitake Thu, 26 Jun 2014 23:16:33 -0700

We need to handle a case like below:
1. a new sheep joins to cluster
2. before the new sheep finishes copying snapshot of vdi state, a
   client issues lock request
3. the new sheep finishes copying the state


If this execution pattern happens, the lock information produced in
the step 2 cannot be obtained by the new sheep. This patch solve this
problem.

Brief description of the solution:
When sheep joins cluster, it sets its status as
SD_STATUS_COLLECTING_CINFO. When the sheep receives lock/unlock
requests in this state, it logs the operation in its internal
queue. After copying snapshot (step 3), the sheep play the log and
construct complete locking status.

Signed-off-by: Hitoshi Mitake <mitake.hito...@lab.ntt.co.jp>
---
 sheep/group.c      |    2 +
 sheep/ops.c        |   21 ++++++++++++++++++++
 sheep/sheep_priv.h |    3 ++
 sheep/vdi.c        |   54 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 80 insertions(+), 0 deletions(-)

diff --git a/sheep/group.c b/sheep/group.c
index 2a02d11..52c822b 100644
--- a/sheep/group.c
+++ b/sheep/group.c
@@ -746,6 +746,8 @@ static void cinfo_collection_done(struct work *work)
        free(w);
        collect_work = NULL;
 
+       play_logged_vdi_ops();
+
        sd_debug("cluster info collection finished");
        sys->cinfo.status = next_status;
 }
diff --git a/sheep/ops.c b/sheep/ops.c
index ac219cb..659563c 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -239,6 +239,15 @@ static int cluster_lock_vdi_work(struct request *req)
 {
        int ret;
 
+       if (sys->cinfo.status == SD_STATUS_COLLECTING_CINFO) {
+               /*
+                * this node is collecting vdi locking status, not ready for
+                * allowing lock by itself
+                */
+               sd_err("This node is not ready for vdi locking, try later");
+               return SD_RES_COLLECTING_CINFO;
+       }
+
        if (req->ci->locking_interest_vid) {
                /* 1 fd cannot lock more than 2 VIDs */
                sd_debug("unlocking VID: %"PRIx32, req->ci->interest_vid);
@@ -1269,6 +1278,12 @@ static int cluster_lock_vdi(const struct sd_req *req, 
struct sd_rsp *rsp,
 
        sd_info("node: %s is locking VDI: %"PRIx32, node_to_str(sender), vid);
 
+       if (sys->cinfo.status == SD_STATUS_COLLECTING_CINFO) {
+               sd_debug("logging vdi lock information for later replay");
+               log_vdi_op_lock(vid, &sender->nid);
+               return SD_RES_SUCCESS;
+       }
+
        if (!lock_vdi(vid, &sender->nid)) {
                sd_err("locking %"PRIx32 "failed", vid);
                return SD_RES_VDI_NOT_LOCKED;
@@ -1294,6 +1309,12 @@ static int cluster_release_vdi_main(const struct sd_req 
*req,
 
        sd_info("node: %s is unlocking VDI: %"PRIx32, node_to_str(sender), vid);
 
+       if (sys->cinfo.status == SD_STATUS_COLLECTING_CINFO) {
+               sd_debug("logging vdi unlock information for later replay");
+               log_vdi_op_unlock(vid, &sender->nid);
+               return SD_RES_SUCCESS;
+       }
+
        unlock_vdi(vid, &sender->nid);
 
        if (node_is_local(sender)) {
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index ab6180f..1d4641b 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -359,6 +359,9 @@ void notify_release_vdi(uint32_t vid);
 void take_vdi_state_snapshot(int epoch);
 int get_vdi_state_snapshot(int epoch, void *data);
 void free_vdi_state_snapshot(int epoch);
+void log_vdi_op_lock(uint32_t vid, const struct node_id *owner);
+void log_vdi_op_unlock(uint32_t vid, const struct node_id *owner);
+void play_logged_vdi_ops(void);
 
 extern int ec_max_data_strip;
 
diff --git a/sheep/vdi.c b/sheep/vdi.c
index 13f0f5d..ced0ed9 100644
--- a/sheep/vdi.c
+++ b/sheep/vdi.c
@@ -378,6 +378,60 @@ out:
        sd_rw_unlock(&vdi_state_lock);
 }
 
+static LIST_HEAD(logged_vdi_ops);
+
+struct vdi_op_log {
+       bool lock;
+       uint32_t vid;
+       struct node_id owner;
+
+       struct list_node list;
+};
+
+void log_vdi_op_lock(uint32_t vid, const struct node_id *owner)
+{
+       struct vdi_op_log *op;
+
+       op = xzalloc(sizeof(*op));
+       op->lock = true;
+       op->vid = vid;
+       memcpy(&op->owner, owner, sizeof(*owner));
+       INIT_LIST_NODE(&op->list);
+       list_add_tail(&op->list, &logged_vdi_ops);
+}
+
+void log_vdi_op_unlock(uint32_t vid, const struct node_id *owner)
+{
+       struct vdi_op_log *op;
+
+       op = xzalloc(sizeof(*op));
+       op->lock = false;
+       op->vid = vid;
+       memcpy(&op->owner, owner, sizeof(*owner));
+       INIT_LIST_NODE(&op->list);
+       list_add_tail(&op->list, &logged_vdi_ops);
+}
+
+void play_logged_vdi_ops(void)
+{
+       struct vdi_op_log *op;
+
+       list_for_each_entry(op, &logged_vdi_ops, list) {
+               struct vdi_state entry;
+
+               memset(&entry, 0, sizeof(entry));
+               entry.vid = op->vid;
+               memcpy(&entry.lock_owner, &op->owner,
+                      sizeof(op->owner));
+               if (op->lock)
+                       entry.lock_state = LOCK_STATE_LOCKED;
+               else
+                       entry.lock_state = LOCK_STATE_UNLOCKED;
+
+               apply_vdi_lock_state(&entry);
+       }
+}
+
 static struct sd_inode *alloc_inode(const struct vdi_iocb *iocb,
                                    uint32_t new_snapid, uint32_t new_vid,
                                    uint32_t *data_vdi_id,
-- 
1.7.1

-- 
sheepdog mailing list
sheepdog@lists.wpkg.org
http://lists.wpkg.org/mailman/listinfo/sheepdog

[sheepdog] [PATCH 9/9] sheep: log and play locking/unlock information in newly joining node

Reply via email to