After changing inode->data_vdi_id[] from array to B-tree, we can't write vid to data_vdi_id[] directly now. So we need to add new interface sd_inode_write_vdi() to update index of inode.
Signed-off-by: Robin Dong <[email protected]> --- dog/dog.h | 4 ++-- dog/vdi.c | 41 +++++++++++++++++++++-------------------- include/sheepdog_proto.h | 8 ++++++-- lib/sd_inode.c | 44 ++++++++++++++++++++++++++++++++++---------- sheep/ops.c | 27 ++++++++++++++++++++------- sheep/sheep_priv.h | 4 ++-- sheep/vdi.c | 8 ++++---- sheepfs/volume.c | 19 +++++++++---------- 8 files changed, 98 insertions(+), 57 deletions(-) diff --git a/dog/dog.h b/dog/dog.h index c2832bb..cc64d59 100644 --- a/dog/dog.h +++ b/dog/dog.h @@ -85,9 +85,9 @@ void show_progress(uint64_t done, uint64_t total, bool raw); size_t get_store_objsize(uint8_t copy_policy, uint64_t oid); bool is_erasure_oid(uint64_t oid, uint8_t policy); -int write_btree_node(uint64_t id, void *mem, unsigned int len, +int write_btree_node(uint64_t id, void *mem, unsigned int len, uint64_t offset, int copies, int copy_policy, int create); -int read_btree_node(uint64_t id, void **mem, unsigned int len); +int read_btree_node(uint64_t id, void **mem, unsigned int len, uint64_t offset); #define INODE_GET_VDI(inode, idx) (sd_inode_get_vdi(read_btree_node, \ inode, idx)) diff --git a/dog/vdi.c b/dog/vdi.c index 960e2a0..fe11671 100644 --- a/dog/vdi.c +++ b/dog/vdi.c @@ -58,16 +58,16 @@ struct get_vdi_info { uint8_t copy_policy; }; -int write_btree_node(uint64_t id, void *mem, unsigned int len, +int write_btree_node(uint64_t id, void *mem, unsigned int len, uint64_t offset, int copies, int copy_policy, int create) { - return sd_write_object(id, 0, mem, len, 0, 0, copies, copy_policy, - true, true); + return sd_write_object(id, 0, mem, len, offset, 0, copies, + copy_policy, true, true); } -int read_btree_node(uint64_t id, void **mem, unsigned int len) +int read_btree_node(uint64_t id, void **mem, unsigned int len, uint64_t offset) { - return sd_read_object(id, *mem, len, 0, true); + return sd_read_object(id, *mem, len, offset, true); } static inline bool is_data_obj_writeable(const struct sd_inode *inode, @@ -559,10 +559,8 @@ static int vdi_create(int argc, char **argv) } INODE_SET_VDI(inode, idx, vid); - ret = sd_write_object(vid_to_vdi_oid(vid), 0, &vid, sizeof(vid), - SD_INODE_HEADER_SIZE + sizeof(vid) * idx, - 0, inode->nr_copies, inode->copy_policy, - false, true); + ret = sd_inode_write_vdi(write_btree_node, inode, idx, + vid, false); if (ret) { ret = EXIT_FAILURE; goto out; @@ -628,7 +626,7 @@ static int vdi_clone(int argc, char **argv) uint32_t base_vid, new_vid, vdi_id; uint64_t oid; int idx, max_idx, ret; - struct sd_inode *inode = NULL; + struct sd_inode *inode = NULL, *new_inode = NULL; char *buf = NULL; dst_vdi = argv[optind]; @@ -658,6 +656,12 @@ static int vdi_clone(int argc, char **argv) if (ret != EXIT_SUCCESS || !vdi_cmd_data.prealloc) goto out; + new_inode = xmalloc(sizeof(*inode)); + ret = read_vdi_obj(dst_vdi, 0, "", NULL, new_inode, + SD_INODE_HEADER_SIZE); + if (ret != EXIT_SUCCESS) + goto out; + buf = xzalloc(SD_DATA_OBJ_SIZE); max_idx = count_data_objs(inode); @@ -685,11 +689,9 @@ static int vdi_clone(int argc, char **argv) goto out; } - ret = sd_write_object(vid_to_vdi_oid(new_vid), 0, &new_vid, - sizeof(new_vid), - SD_INODE_HEADER_SIZE + sizeof(new_vid) * idx, 0, - inode->nr_copies, inode->copy_policy, - false, true); + INODE_SET_VDI(new_inode, idx, new_vid); + ret = sd_inode_write_vdi(write_btree_node, new_inode, idx, + new_vid, false); if (ret) { ret = EXIT_FAILURE; goto out; @@ -706,6 +708,8 @@ static int vdi_clone(int argc, char **argv) } out: free(inode); + if (new_inode) + free(new_inode); free(buf); return ret; } @@ -1335,11 +1339,8 @@ static int vdi_write(int argc, char **argv) } if (create) { - ret = sd_write_object(vid_to_vdi_oid(vid), 0, &vid, - sizeof(vid), - SD_INODE_HEADER_SIZE + sizeof(vid) * idx, - flags, inode->nr_copies, - inode->copy_policy, false, false); + ret = sd_inode_write_vdi(write_btree_node, inode, + idx, vid, false); if (ret) { ret = EXIT_FAILURE; goto out; diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h index c338efa..8c9be31 100644 --- a/include/sheepdog_proto.h +++ b/include/sheepdog_proto.h @@ -249,8 +249,10 @@ struct sd_extent_header { }; typedef int (*write_node_fn)(uint64_t id, void *mem, unsigned int len, - int copies, int copy_policy, int create); -typedef int (*read_node_fn)(uint64_t id, void **mem, unsigned int len); + uint64_t offset, int copies, + int copy_policy, int create); +typedef int (*read_node_fn)(uint64_t id, void **mem, unsigned int len, + uint64_t offset); struct sheepdog_vdi_attr { char name[SD_MAX_VDI_LEN]; @@ -266,6 +268,8 @@ extern uint32_t sd_inode_get_vdi(read_node_fn reader, const struct sd_inode *inode, int idx); extern void sd_inode_set_vdi(write_node_fn writer, read_node_fn reader, struct sd_inode *inode, int idx, uint32_t vdi_id); +extern int sd_inode_write_vdi(write_node_fn writer, struct sd_inode *inode, + int idx, uint32_t vid, bool create); extern void sd_inode_copy_vdis(struct sd_inode *oldi, struct sd_inode *newi); /* 64 bit FNV-1a non-zero initial basis */ diff --git a/lib/sd_inode.c b/lib/sd_inode.c index 426e00c..60976d6 100644 --- a/lib/sd_inode.c +++ b/lib/sd_inode.c @@ -146,7 +146,7 @@ static void dump_btree(read_node_fn reader, struct sd_inode *inode) tmp = (void *)leaf_node; while (itor_idx != last_idx) { - reader(itor_idx->oid, &tmp, SD_INODE_INDEX_SIZE); + reader(itor_idx->oid, &tmp, SD_INODE_INDEX_SIZE, 0); sd_info("btree> %p idx: %d, %lu, %u", itor_idx, itor_idx->idx, itor_idx->oid, @@ -306,9 +306,9 @@ static void transfer_to_idx_root(write_node_fn writer, struct sd_inode *inode) left_oid = vid_to_btree_oid(inode->vdi_id, inode->btree_counter++); right_oid = vid_to_btree_oid(inode->vdi_id, inode->btree_counter++); - writer(left_oid, left, SD_INODE_INDEX_SIZE, inode->nr_copies, + writer(left_oid, left, SD_INODE_INDEX_SIZE, 0, inode->nr_copies, inode->copy_policy, 1); - writer(right_oid, right, SD_INODE_INDEX_SIZE, inode->nr_copies, + writer(right_oid, right, SD_INODE_INDEX_SIZE, 0, inode->nr_copies, inode->copy_policy, 1); /* change root from ext-node to idx-node */ @@ -340,7 +340,7 @@ static int search_whole_btree(read_node_fn reader, const struct sd_inode *inode, if (idx_in_range(header, path->p_idx)) { oid = path->p_idx->oid; - ret = reader(oid, &tmp, SD_INODE_INDEX_SIZE); + ret = reader(oid, &tmp, SD_INODE_INDEX_SIZE, 0); if (ret != SD_RES_SUCCESS) goto out; path->p_ext = search_ext_entry(leaf_node, idx); @@ -351,7 +351,7 @@ static int search_whole_btree(read_node_fn reader, const struct sd_inode *inode, } else { /* check if last idx-node has space */ oid = (path->p_idx - 1)->oid; - ret = reader(oid, &tmp, SD_INODE_INDEX_SIZE); + ret = reader(oid, &tmp, SD_INODE_INDEX_SIZE, 0); if (ret != SD_RES_SUCCESS) goto out; if (leaf_node->entries < EXT_MAX_ENTRIES) { @@ -408,9 +408,9 @@ static void split_ext_node(write_node_fn writer, struct sd_inode *inode, split_to_nodes(old, new_ext, old, num); new_oid = vid_to_btree_oid(inode->vdi_id, inode->btree_counter++); - writer(new_oid, new_ext, SD_INODE_INDEX_SIZE, inode->nr_copies, + writer(new_oid, new_ext, SD_INODE_INDEX_SIZE, 0, inode->nr_copies, inode->copy_policy, 1); - writer(path->p_idx->oid, old, SD_INODE_INDEX_SIZE, inode->nr_copies, + writer(path->p_idx->oid, old, SD_INODE_INDEX_SIZE, 0, inode->nr_copies, inode->copy_policy, 0); /* write new index */ @@ -451,7 +451,7 @@ static int insert_new_node(write_node_fn writer, read_node_fn reader, insert_ext_entry_nosearch(path->p_ext_header, path->p_ext, idx, vdi_id); writer(path->p_idx->oid, path->p_ext_header, - SD_INODE_INDEX_SIZE, inode->nr_copies, + SD_INODE_INDEX_SIZE, 0, inode->nr_copies, inode->copy_policy, 1); } else if (path->p_ext_header) { /* the last idx-node */ @@ -461,7 +461,7 @@ static int insert_new_node(write_node_fn writer, read_node_fn reader, path->p_idx->idx = (LAST_EXT(path->p_ext_header) - 1)->idx; writer(path->p_idx->oid, path->p_ext_header, - SD_INODE_INDEX_SIZE, inode->nr_copies, + SD_INODE_INDEX_SIZE, 0, inode->nr_copies, inode->copy_policy, 1); } else { /* create a new ext-node */ @@ -472,7 +472,7 @@ static int insert_new_node(write_node_fn writer, read_node_fn reader, insert_ext_entry_nosearch(leaf_node, FIRST_EXT(leaf_node), idx, vdi_id); writer(oid, leaf_node, SD_INODE_INDEX_SIZE, - inode->nr_copies, + 0, inode->nr_copies, inode->copy_policy, 1); insert_idx_entry_nosearch(header, path->p_idx, idx, oid); @@ -524,6 +524,30 @@ out: dump_btree(reader, inode); } +int sd_inode_write_vdi(write_node_fn writer, struct sd_inode *inode, int idx, + uint32_t vid, bool create) +{ + struct sd_extent_header *header = EXT_HEADER(inode->data_vdi_id); + int len, ret = SD_RES_SUCCESS; + + if (inode->store_policy == 0) + ret = writer(vid_to_vdi_oid(vid), inode, sizeof(vid), + SD_INODE_HEADER_SIZE + sizeof(vid) * idx, + inode->nr_copies, inode->copy_policy, create); + else { + len = SD_INODE_HEADER_SIZE + sizeof(struct sd_extent_header); + if (header->depth == 1) + len += sizeof(struct sd_extent) * header->entries; + else if (header->depth == 2) + len += sizeof(struct sd_extent_idx) * header->entries; + else + assert(0); + ret = writer(vid_to_vdi_oid(vid), inode, len, 0, + inode->nr_copies, inode->copy_policy, create); + } + return ret; +} + void sd_inode_copy_vdis(struct sd_inode *oldi, struct sd_inode *newi) { memcpy(newi->data_vdi_id, oldi->data_vdi_id, sizeof(newi->data_vdi_id)); diff --git a/sheep/ops.c b/sheep/ops.c index 7f73ab1..3638a49 100644 --- a/sheep/ops.c +++ b/sheep/ops.c @@ -811,20 +811,33 @@ static int local_flush_vdi(struct request *req) static int local_discard_obj(struct request *req) { uint64_t oid = req->rq.obj.oid; - uint32_t vid = oid_to_vid(oid), zero = 0; - int ret, idx = data_oid_to_idx(oid); + uint32_t vid = oid_to_vid(oid), zero = 0, tmp_vid; + int ret = SD_RES_SUCCESS, idx = data_oid_to_idx(oid); + struct sd_inode *inode = xmalloc(sizeof(struct sd_inode)); sd_debug("%"PRIx64, oid); - ret = write_object(vid_to_vdi_oid(vid), (char *)&zero, sizeof(zero), - SD_INODE_HEADER_SIZE + sizeof(vid) * idx, false); + ret = read_object(vid_to_vdi_oid(vid), (char *)inode, + sizeof(struct sd_inode), 0); if (ret != SD_RES_SUCCESS) - return ret; - if (remove_object(oid) != SD_RES_SUCCESS) - sd_err("failed to remove %"PRIx64, oid); + goto out; + + tmp_vid = INODE_GET_VDI(inode, idx); + /* if vid in idx is not exist, we don't need to remove it */ + if (tmp_vid) { + INODE_SET_VDI(inode, idx, vid); + sd_inode_write_vdi(write_btree_node, inode, idx, zero, false); + if (ret != SD_RES_SUCCESS) + goto out; + if (remove_object(oid) != SD_RES_SUCCESS) + sd_err("failed to remove %"PRIx64, oid); + } /* * Return success even if remove_object fails because we have updated * inode successfully. */ +out: + if (inode) + free(inode); return SD_RES_SUCCESS; } diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h index e28e1b1..f2d696b 100644 --- a/sheep/sheep_priv.h +++ b/sheep/sheep_priv.h @@ -375,9 +375,9 @@ void objlist_cache_remove(uint64_t oid); void put_request(struct request *req); -int write_btree_node(uint64_t id, void *mem, unsigned int len, +int write_btree_node(uint64_t id, void *mem, unsigned int len, uint64_t offset, int copies, int copy_policy, int create); -int read_btree_node(uint64_t id, void **mem, unsigned int len); +int read_btree_node(uint64_t id, void **mem, unsigned int len, uint64_t offset); #define INODE_GET_VDI(inode, idx) (sd_inode_get_vdi(read_btree_node, \ inode, idx)) diff --git a/sheep/vdi.c b/sheep/vdi.c index 203472a..43db614 100644 --- a/sheep/vdi.c +++ b/sheep/vdi.c @@ -22,15 +22,15 @@ struct vdi_state_entry { static struct rb_root vdi_state_root = RB_ROOT; static struct sd_lock vdi_state_lock = SD_LOCK_INITIALIZER; -int write_btree_node(uint64_t id, void *mem, unsigned int len, +int write_btree_node(uint64_t id, void *mem, unsigned int len, uint64_t offset, int copies, int copy_policy, int create) { - return write_object(id, mem, len, 0, create == 1); + return write_object(id, mem, len, offset, create == 1); } -int read_btree_node(uint64_t id, void **mem, unsigned int len) +int read_btree_node(uint64_t id, void **mem, unsigned int len, uint64_t offset) { - return read_object(id, *mem, len, 0); + return read_object(id, *mem, len, offset); } static int vdi_state_cmp(const struct vdi_state_entry *a, diff --git a/sheepfs/volume.c b/sheepfs/volume.c index 3fbc4a9..f94a517 100644 --- a/sheepfs/volume.c +++ b/sheepfs/volume.c @@ -66,8 +66,9 @@ static struct sd_lock vdi_inode_tree_lock = SD_LOCK_INITIALIZER; static int write_btree_node(uint64_t id, void *mem, unsigned int len, - int copies, int copy_policy, int create); -static int read_btree_node(uint64_t id, void **mem, unsigned int len); + uint64_t offset, int copies, int copy_policy, int create); +static int read_btree_node(uint64_t id, void **mem, unsigned int len, + uint64_t offset); #define INODE_GET_VDI(inode, idx) (sd_inode_get_vdi(\ read_btree_node, inode, idx)) @@ -194,10 +195,8 @@ static int volume_rw_object(char *buf, uint64_t oid, size_t size, if (create) { INODE_SET_VDI(vdi->inode, idx, vid); /* writeback inode update */ - if (volume_rw_object((char *)&vid, vid_to_vdi_oid(vid), - sizeof(vid), - SD_INODE_HEADER_SIZE + sizeof(vid) * idx, - VOLUME_WRITE) < 0) + if (sd_inode_write_vdi(write_btree_node, vdi->inode, idx, + vid, false) < 0) return -1; } done: @@ -247,20 +246,20 @@ static int volume_do_rw(const char *path, char *buf, size_t size, return 0; } -int write_btree_node(uint64_t id, void *mem, unsigned int len, +int write_btree_node(uint64_t id, void *mem, unsigned int len, uint64_t offset, int copies, int copy_policy, int create) { int ret; - ret = volume_rw_object(mem, id, len, 0, VOLUME_WRITE); + ret = volume_rw_object(mem, id, len, offset, VOLUME_WRITE); if (ret == len) return SD_RES_SUCCESS; return ret; } -int read_btree_node(uint64_t id, void **mem, unsigned int len) +int read_btree_node(uint64_t id, void **mem, unsigned int len, uint64_t offset) { int ret; - ret = volume_rw_object(*mem, id, len, 0, VOLUME_READ); + ret = volume_rw_object(*mem, id, len, offset, VOLUME_READ); if (ret == len) return SD_RES_SUCCESS; return ret; -- 1.7.1 -- sheepdog mailing list [email protected] http://lists.wpkg.org/mailman/listinfo/sheepdog
