We need to handle a case like below: 1. a new sheep joins to cluster 2. before the new sheep finishes copying snapshot of vdi state, a client issues lock request 3. the new sheep finishes copying the state
If this execution pattern happens, the lock information produced in the step 2 cannot be obtained by the new sheep. This patch solve this problem. Brief description of the solution: When sheep joins cluster, it sets its status as SD_STATUS_COLLECTING_CINFO. When the sheep receives lock/unlock requests in this state, it logs the operation in its internal queue. After copying snapshot (step 3), the sheep play the log and construct complete locking status. Signed-off-by: Hitoshi Mitake <mitake.hito...@lab.ntt.co.jp> --- sheep/group.c | 2 + sheep/ops.c | 21 ++++++++++++++++++++ sheep/sheep_priv.h | 3 ++ sheep/vdi.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 80 insertions(+), 0 deletions(-) diff --git a/sheep/group.c b/sheep/group.c index 2a02d11..52c822b 100644 --- a/sheep/group.c +++ b/sheep/group.c @@ -746,6 +746,8 @@ static void cinfo_collection_done(struct work *work) free(w); collect_work = NULL; + play_logged_vdi_ops(); + sd_debug("cluster info collection finished"); sys->cinfo.status = next_status; } diff --git a/sheep/ops.c b/sheep/ops.c index ac219cb..659563c 100644 --- a/sheep/ops.c +++ b/sheep/ops.c @@ -239,6 +239,15 @@ static int cluster_lock_vdi_work(struct request *req) { int ret; + if (sys->cinfo.status == SD_STATUS_COLLECTING_CINFO) { + /* + * this node is collecting vdi locking status, not ready for + * allowing lock by itself + */ + sd_err("This node is not ready for vdi locking, try later"); + return SD_RES_COLLECTING_CINFO; + } + if (req->ci->locking_interest_vid) { /* 1 fd cannot lock more than 2 VIDs */ sd_debug("unlocking VID: %"PRIx32, req->ci->interest_vid); @@ -1269,6 +1278,12 @@ static int cluster_lock_vdi(const struct sd_req *req, struct sd_rsp *rsp, sd_info("node: %s is locking VDI: %"PRIx32, node_to_str(sender), vid); + if (sys->cinfo.status == SD_STATUS_COLLECTING_CINFO) { + sd_debug("logging vdi lock information for later replay"); + log_vdi_op_lock(vid, &sender->nid); + return SD_RES_SUCCESS; + } + if (!lock_vdi(vid, &sender->nid)) { sd_err("locking %"PRIx32 "failed", vid); return SD_RES_VDI_NOT_LOCKED; @@ -1294,6 +1309,12 @@ static int cluster_release_vdi_main(const struct sd_req *req, sd_info("node: %s is unlocking VDI: %"PRIx32, node_to_str(sender), vid); + if (sys->cinfo.status == SD_STATUS_COLLECTING_CINFO) { + sd_debug("logging vdi unlock information for later replay"); + log_vdi_op_unlock(vid, &sender->nid); + return SD_RES_SUCCESS; + } + unlock_vdi(vid, &sender->nid); if (node_is_local(sender)) { diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h index ab6180f..1d4641b 100644 --- a/sheep/sheep_priv.h +++ b/sheep/sheep_priv.h @@ -359,6 +359,9 @@ void notify_release_vdi(uint32_t vid); void take_vdi_state_snapshot(int epoch); int get_vdi_state_snapshot(int epoch, void *data); void free_vdi_state_snapshot(int epoch); +void log_vdi_op_lock(uint32_t vid, const struct node_id *owner); +void log_vdi_op_unlock(uint32_t vid, const struct node_id *owner); +void play_logged_vdi_ops(void); extern int ec_max_data_strip; diff --git a/sheep/vdi.c b/sheep/vdi.c index 13f0f5d..ced0ed9 100644 --- a/sheep/vdi.c +++ b/sheep/vdi.c @@ -378,6 +378,60 @@ out: sd_rw_unlock(&vdi_state_lock); } +static LIST_HEAD(logged_vdi_ops); + +struct vdi_op_log { + bool lock; + uint32_t vid; + struct node_id owner; + + struct list_node list; +}; + +void log_vdi_op_lock(uint32_t vid, const struct node_id *owner) +{ + struct vdi_op_log *op; + + op = xzalloc(sizeof(*op)); + op->lock = true; + op->vid = vid; + memcpy(&op->owner, owner, sizeof(*owner)); + INIT_LIST_NODE(&op->list); + list_add_tail(&op->list, &logged_vdi_ops); +} + +void log_vdi_op_unlock(uint32_t vid, const struct node_id *owner) +{ + struct vdi_op_log *op; + + op = xzalloc(sizeof(*op)); + op->lock = false; + op->vid = vid; + memcpy(&op->owner, owner, sizeof(*owner)); + INIT_LIST_NODE(&op->list); + list_add_tail(&op->list, &logged_vdi_ops); +} + +void play_logged_vdi_ops(void) +{ + struct vdi_op_log *op; + + list_for_each_entry(op, &logged_vdi_ops, list) { + struct vdi_state entry; + + memset(&entry, 0, sizeof(entry)); + entry.vid = op->vid; + memcpy(&entry.lock_owner, &op->owner, + sizeof(op->owner)); + if (op->lock) + entry.lock_state = LOCK_STATE_LOCKED; + else + entry.lock_state = LOCK_STATE_UNLOCKED; + + apply_vdi_lock_state(&entry); + } +} + static struct sd_inode *alloc_inode(const struct vdi_iocb *iocb, uint32_t new_snapid, uint32_t new_vid, uint32_t *data_vdi_id, -- 1.7.1 -- sheepdog mailing list sheepdog@lists.wpkg.org http://lists.wpkg.org/mailman/listinfo/sheepdog