This notifies SD_OP_COMPLETE_RECOVERY to all nodes when object recovery finishes. Sheep removes stale objects when it receives SD_OP_COMPLETE_RECOVERY from all nodes.
Signed-off-by: MORITA Kazutaka <[email protected]> --- include/internal_proto.h | 1 + sheep/ops.c | 54 +++++++++++++++++++++++++++++++++++++++++++-- sheep/plain_store.c | 2 +- sheep/recovery.c | 30 ++++++++++++++++++++++++- sheep/sheep_priv.h | 4 +- 5 files changed, 84 insertions(+), 7 deletions(-) diff --git a/include/internal_proto.h b/include/internal_proto.h index 3d70ba9..c1d116a 100644 --- a/include/internal_proto.h +++ b/include/internal_proto.h @@ -64,6 +64,7 @@ #define SD_OP_DISABLE_RECOVER 0xA9 #define SD_OP_INFO_RECOVER 0xAA #define SD_OP_GET_VDI_COPIES 0xAB +#define SD_OP_COMPLETE_RECOVERY 0xAC /* internal flags for hdr.flags, must be above 0x80 */ #define SD_FLAG_CMD_RECOVERY 0x0080 diff --git a/sheep/ops.c b/sheep/ops.c index c6a4f3b..499c773 100644 --- a/sheep/ops.c +++ b/sheep/ops.c @@ -536,8 +536,6 @@ static int cluster_cleanup(const struct sd_req *req, struct sd_rsp *rsp, void *data) { int ret; - struct siocb iocb = { 0 }; - iocb.epoch = sys->epoch; if (node_in_recovery()) return SD_RES_NODE_IN_RECOVERY; @@ -546,7 +544,7 @@ static int cluster_cleanup(const struct sd_req *req, struct sd_rsp *rsp, return SD_RES_SUCCESS; if (sd_store->cleanup) - ret = sd_store->cleanup(&iocb); + ret = sd_store->cleanup(); else ret = SD_RES_NO_SUPPORT; @@ -561,6 +559,49 @@ static int cluster_notify_vdi_del(const struct sd_req *req, struct sd_rsp *rsp, return objlist_cache_cleanup(vid); } +static int cluster_recovery_completion(const struct sd_req *req, + struct sd_rsp *rsp, + void *data) +{ + static struct sd_node recovereds[SD_MAX_NODES], *node; + static size_t nr_recovereds; + static int latest_epoch; + struct vnode_info *vnode_info; + int i; + + node = (struct sd_node *)data; + + if (latest_epoch < req->epoch) { + dprintf("new epoch %d\n", req->epoch); + latest_epoch = req->epoch; + nr_recovereds = 0; + } + + recovereds[nr_recovereds++] = *(struct sd_node *)node; + qsort(recovereds, nr_recovereds, sizeof(*recovereds), node_id_cmp); + + dprintf("%s is recovered at epoch %d\n", node_to_str(node), req->epoch); + for (i = 0; i < nr_recovereds; i++) + dprintf("[%x] %s\n", i, node_to_str(recovereds + i)); + + if (sys->epoch != latest_epoch) + return SD_RES_SUCCESS; + + vnode_info = get_vnode_info(); + + if (vnode_info->nr_nodes == nr_recovereds && + memcmp(vnode_info->nodes, recovereds, + sizeof(*recovereds) * nr_recovereds) == 0) { + dprintf("all nodes are recovered at epoch %d\n", req->epoch); + if (sd_store->cleanup) + sd_store->cleanup(); + } + + put_vnode_info(vnode_info); + + return SD_RES_SUCCESS; +} + static int local_set_cache_size(const struct sd_req *req, struct sd_rsp *rsp, void *data) { @@ -947,6 +988,13 @@ static struct sd_op_template sd_ops[] = { .process_main = cluster_notify_vdi_del, }, + [SD_OP_COMPLETE_RECOVERY] = { + .name = "COMPLETE_RECOVERY", + .type = SD_OP_TYPE_CLUSTER, + .force = 1, + .process_main = cluster_recovery_completion, + }, + /* local operations */ [SD_OP_GET_STORE_LIST] = { .name = "GET_STORE_LIST", diff --git a/sheep/plain_store.c b/sheep/plain_store.c index 8028f7d..8888521 100644 --- a/sheep/plain_store.c +++ b/sheep/plain_store.c @@ -138,7 +138,7 @@ out: return ret; } -int default_cleanup(struct siocb *iocb) +int default_cleanup(void) { rmdir_r(stale_dir); if (mkdir(stale_dir, 0755) < 0) { diff --git a/sheep/recovery.c b/sheep/recovery.c index 72c90cd..dec7261 100644 --- a/sheep/recovery.c +++ b/sheep/recovery.c @@ -337,6 +337,31 @@ static inline void run_next_rw(struct recovery_work *rw) dprintf("recovery work is superseded\n"); } +static void notify_recovery_completion_work(struct work *work) +{ + struct recovery_work *rw = container_of(work, struct recovery_work, + work); + struct sd_req hdr; + int ret; + + sd_init_req(&hdr, SD_OP_COMPLETE_RECOVERY); + hdr.epoch = rw->epoch; + hdr.flags = SD_FLAG_CMD_WRITE; + hdr.data_length = sizeof(sys->this_node); + + ret = exec_local_req(&hdr, &sys->this_node); + if (ret != SD_RES_SUCCESS) + eprintf("failed to notify recovery completion, %d\n", + rw->epoch); +} + +static void notify_recovery_completion_main(struct work *work) +{ + struct recovery_work *rw = container_of(work, struct recovery_work, + work); + free_recovery_work(rw); +} + static inline void finish_recovery(struct recovery_work *rw) { recovering_work = NULL; @@ -345,7 +370,10 @@ static inline void finish_recovery(struct recovery_work *rw) if (sd_store->end_recover) sd_store->end_recover(sys->epoch - 1, rw->old_vinfo); - free_recovery_work(rw); + /* notify recovery completion to other nodes */ + rw->work.fn = notify_recovery_completion_work; + rw->work.done = notify_recovery_completion_main; + queue_work(sys->recovery_wqueue, &rw->work); dprintf("recovery complete: new epoch %"PRIu32"\n", sys->recovered_epoch); diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h index 224be51..1bc7e60 100644 --- a/sheep/sheep_priv.h +++ b/sheep/sheep_priv.h @@ -154,7 +154,7 @@ struct store_driver { int (*purge_obj)(void); /* Operations for snapshot */ int (*snapshot)(struct siocb *); - int (*cleanup)(struct siocb *); + int (*cleanup)(void); int (*restore)(struct siocb *); int (*get_snap_file)(struct siocb *); }; @@ -166,7 +166,7 @@ int default_read(uint64_t oid, struct siocb *iocb); int default_link(uint64_t oid, struct siocb *iocb, uint32_t tgt_epoch); int default_atomic_put(uint64_t oid, struct siocb *iocb); int default_end_recover(uint32_t old_epoch, struct vnode_info *old_vnode_info); -int default_cleanup(struct siocb *iocb); +int default_cleanup(void); int default_format(char *name); int default_remove_object(uint64_t oid); int default_purge_obj(void); -- 1.7.2.5 -- sheepdog mailing list [email protected] http://lists.wpkg.org/mailman/listinfo/sheepdog
