After changing inode->data_vdi_id[] from array to B-tree, we can't write
vid to data_vdi_id[] directly now. So we need to add new interface
sd_inode_write_vid() to update index of inode.

It will clear all object_cache if use direct=true, so we use direct=false
to write middle-node object.

Signed-off-by: Robin Dong <[email protected]>
---
 dog/common.c             |    7 +---
 dog/dog.h                |    8 +++--
 dog/vdi.c                |   45 ++++++++++++-----------
 include/sheepdog_proto.h |   13 +++++--
 lib/sd_inode.c           |   91 +++++++++++++++++++++++++++++++++++++---------
 sheep/ops.c              |   27 ++++++++++----
 sheep/sheep_priv.h       |    6 ++-
 sheep/vdi.c              |   10 +++--
 sheepfs/volume.c         |   22 ++++++-----
 9 files changed, 156 insertions(+), 73 deletions(-)

diff --git a/dog/common.c b/dog/common.c
index 803a634..ee30e81 100644
--- a/dog/common.c
+++ b/dog/common.c
@@ -136,7 +136,7 @@ int parse_vdi(vdi_parser_func_t func, size_t size, void 
*data)
        struct sd_req req;
        struct sd_rsp *rsp = (struct sd_rsp *)&req;
        static DECLARE_BITMAP(vdi_inuse, SD_NR_VDIS);
-       unsigned int rlen = sizeof(vdi_inuse);
+       uint32_t rlen;
 
        sd_init_req(&req, SD_OP_READ_VDIS);
        req.data_length = sizeof(vdi_inuse);
@@ -165,10 +165,7 @@ int parse_vdi(vdi_parser_func_t func, size_t size, void 
*data)
                        continue;
 
                if (size > SD_INODE_HEADER_SIZE) {
-                       rlen = count_data_objs(&i) * sizeof(i.data_vdi_id[0]);
-                       if (rlen > size - SD_INODE_HEADER_SIZE)
-                               rlen = size - SD_INODE_HEADER_SIZE;
-
+                       rlen = sd_inode_get_meta_size(&i, size);
                        ret = sd_read_object(oid, ((char *)&i) + 
SD_INODE_HEADER_SIZE,
                                             rlen, SD_INODE_HEADER_SIZE, true);
 
diff --git a/dog/dog.h b/dog/dog.h
index 3cdf8e2..16cafbd 100644
--- a/dog/dog.h
+++ b/dog/dog.h
@@ -91,9 +91,11 @@ size_t get_store_objsize(uint8_t copy_policy, uint64_t oid);
 bool is_erasure_oid(uint64_t oid, uint8_t policy);
 uint8_t parse_copy(const char *str, uint8_t *copy_policy);
 
-int dog_bnode_writer(uint64_t oid, void *mem, unsigned int len,
-                    int copies, int copy_policy, int create);
-int dog_bnode_reader(uint64_t oid, void **mem, unsigned int len);
+int dog_bnode_writer(uint64_t oid, void *mem, unsigned int len, uint64_t 
offset,
+                    uint32_t flags, int copies, int copy_policy, bool create,
+                    bool direct);
+int dog_bnode_reader(uint64_t oid, void **mem, unsigned int len,
+                    uint64_t offset);
 
 #define INODE_GET_VID(inode, idx) (sd_inode_get_vid(dog_bnode_reader, \
                                                        inode, idx))
diff --git a/dog/vdi.c b/dog/vdi.c
index aa6eea6..b130f3a 100644
--- a/dog/vdi.c
+++ b/dog/vdi.c
@@ -58,16 +58,18 @@ struct get_vdi_info {
        uint8_t copy_policy;
 };
 
-int dog_bnode_writer(uint64_t oid, void *mem, unsigned int len,
-                    int copies, int copy_policy, int create)
+int dog_bnode_writer(uint64_t oid, void *mem, unsigned int len, uint64_t 
offset,
+                    uint32_t flags, int copies, int copy_policy, bool create,
+                    bool direct)
 {
-       return sd_write_object(oid, 0, mem, len, 0, 0, copies, copy_policy,
-                       true, true);
+       return sd_write_object(oid, 0, mem, len, offset, flags, copies,
+                              copy_policy, create, direct);
 }
 
-int dog_bnode_reader(uint64_t oid, void **mem, unsigned int len)
+int dog_bnode_reader(uint64_t oid, void **mem, unsigned int len,
+                    uint64_t offset)
 {
-       return sd_read_object(oid, *mem, len, 0, true);
+       return sd_read_object(oid, *mem, len, offset, true);
 }
 
 static inline bool is_data_obj_writeable(const struct sd_inode *inode,
@@ -582,10 +584,8 @@ static int vdi_create(int argc, char **argv)
                }
 
                INODE_SET_VID(inode, idx, vid);
-               ret = sd_write_object(vid_to_vdi_oid(vid), 0, &vid, sizeof(vid),
-                                     SD_INODE_HEADER_SIZE + sizeof(vid) * idx,
-                                     0, inode->nr_copies, inode->copy_policy,
-                                     false, true);
+               ret = sd_inode_write_vid(dog_bnode_writer, inode, idx, vid, vid,
+                                        0, false, true);
                if (ret) {
                        ret = EXIT_FAILURE;
                        goto out;
@@ -651,7 +651,7 @@ static int vdi_clone(int argc, char **argv)
        uint32_t base_vid, new_vid, vdi_id;
        uint64_t oid;
        uint32_t idx, max_idx, ret;
-       struct sd_inode *inode = NULL;
+       struct sd_inode *inode = NULL, *new_inode = NULL;
        char *buf = NULL;
 
        dst_vdi = argv[optind];
@@ -681,6 +681,12 @@ static int vdi_clone(int argc, char **argv)
        if (ret != EXIT_SUCCESS || !vdi_cmd_data.prealloc)
                goto out;
 
+       new_inode = xmalloc(sizeof(*inode));
+       ret = read_vdi_obj(dst_vdi, 0, "", NULL, new_inode,
+                       SD_INODE_HEADER_SIZE);
+       if (ret != EXIT_SUCCESS)
+               goto out;
+
        buf = xzalloc(SD_DATA_OBJ_SIZE);
        max_idx = count_data_objs(inode);
 
@@ -708,11 +714,9 @@ static int vdi_clone(int argc, char **argv)
                        goto out;
                }
 
-               ret = sd_write_object(vid_to_vdi_oid(new_vid), 0, &new_vid,
-                                     sizeof(new_vid),
-                               SD_INODE_HEADER_SIZE + sizeof(new_vid) * idx, 0,
-                                     inode->nr_copies, inode->copy_policy,
-                                     false, true);
+               INODE_SET_VID(new_inode, idx, new_vid);
+               ret = sd_inode_write_vid(dog_bnode_writer, new_inode, idx,
+                                        new_vid, new_vid, 0, false, true);
                if (ret) {
                        ret = EXIT_FAILURE;
                        goto out;
@@ -729,6 +733,8 @@ static int vdi_clone(int argc, char **argv)
        }
 out:
        free(inode);
+       if (new_inode)
+               free(new_inode);
        free(buf);
        return ret;
 }
@@ -1358,11 +1364,8 @@ static int vdi_write(int argc, char **argv)
                }
 
                if (create) {
-                       ret = sd_write_object(vid_to_vdi_oid(vid), 0, &vid,
-                                             sizeof(vid),
-                               SD_INODE_HEADER_SIZE + sizeof(vid) * idx,
-                                             flags, inode->nr_copies,
-                                             inode->copy_policy, false, false);
+                       ret = sd_inode_write_vid(dog_bnode_writer, inode, idx,
+                                                vid, vid, flags, false, false);
                        if (ret) {
                                ret = EXIT_FAILURE;
                                goto out;
diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h
index acae97d..b05d30e 100644
--- a/include/sheepdog_proto.h
+++ b/include/sheepdog_proto.h
@@ -109,8 +109,7 @@
 
 #define SD_INODE_SIZE (sizeof(struct sd_inode))
 #define SD_INODE_INDEX_SIZE (sizeof(uint32_t) * MAX_DATA_OBJS)
-#define SD_INODE_HEADER_SIZE ((unsigned long) \
-                             (&((struct sd_inode *)0)->data_vdi_id))
+#define SD_INODE_HEADER_SIZE offsetof(struct sd_inode, data_vdi_id)
 #define SD_ATTR_OBJ_SIZE (sizeof(struct sheepdog_vdi_attr))
 #define CURRENT_VDI_ID 0
 
@@ -250,8 +249,10 @@ struct sd_extent_header {
 };
 
 typedef int (*write_node_fn)(uint64_t id, void *mem, unsigned int len,
-                               int copies, int copy_policy, int create);
-typedef int (*read_node_fn)(uint64_t id, void **mem, unsigned int len);
+                               uint64_t offset, uint32_t flags, int copies,
+                               int copy_policy, bool create, bool direct);
+typedef int (*read_node_fn)(uint64_t id, void **mem, unsigned int len,
+                               uint64_t offset);
 
 struct sheepdog_vdi_attr {
        char name[SD_MAX_VDI_LEN];
@@ -268,6 +269,10 @@ extern uint32_t sd_inode_get_vid(read_node_fn reader,
 extern void sd_inode_set_vid(write_node_fn writer, read_node_fn reader,
                             struct sd_inode *inode, uint32_t idx,
                             uint32_t vdi_id);
+extern int sd_inode_write_vid(write_node_fn writer, struct sd_inode *inode,
+                             uint32_t idx, uint32_t vid, uint32_t value,
+                             int flags, bool create, bool direct);
+extern uint32_t sd_inode_get_meta_size(struct sd_inode *inode, size_t size);
 extern void sd_inode_copy_vdis(struct sd_inode *oldi, struct sd_inode *newi);
 
 /* 64 bit FNV-1a non-zero initial basis */
diff --git a/lib/sd_inode.c b/lib/sd_inode.c
index d3b4251..b3e546d 100644
--- a/lib/sd_inode.c
+++ b/lib/sd_inode.c
@@ -159,7 +159,7 @@ static void dump_btree(read_node_fn reader, struct sd_inode 
*inode)
                tmp = (void *)leaf_node;
 
                while (iter_idx != last_idx) {
-                       reader(iter_idx->oid, &tmp, SD_INODE_INDEX_SIZE);
+                       reader(iter_idx->oid, &tmp, SD_INODE_INDEX_SIZE, 0);
 
                        sd_info("btree> %p idx: %d, %lu, %u",
                                        iter_idx, iter_idx->idx, iter_idx->oid,
@@ -335,10 +335,10 @@ static void transfer_to_idx_root(write_node_fn writer, 
struct sd_inode *inode)
        left_oid = vid_to_btree_oid(inode->vdi_id, inode->btree_counter++);
        right_oid = vid_to_btree_oid(inode->vdi_id, inode->btree_counter++);
 
-       writer(left_oid, left, SD_INODE_INDEX_SIZE, inode->nr_copies,
-                       inode->copy_policy, 1);
-       writer(right_oid, right, SD_INODE_INDEX_SIZE, inode->nr_copies,
-                       inode->copy_policy, 1);
+       writer(left_oid, left, SD_INODE_INDEX_SIZE, 0, 0, inode->nr_copies,
+                       inode->copy_policy, true, false);
+       writer(right_oid, right, SD_INODE_INDEX_SIZE, 0, 0, inode->nr_copies,
+                       inode->copy_policy, true, false);
 
        /* change root from ext-node to idx-node */
        root->entries = 0;
@@ -373,7 +373,7 @@ static int search_whole_btree(read_node_fn reader, const 
struct sd_inode *inode,
 
                if (idx_in_range(header, path->p_idx)) {
                        oid = path->p_idx->oid;
-                       ret = reader(oid, &tmp, SD_INODE_INDEX_SIZE);
+                       ret = reader(oid, &tmp, SD_INODE_INDEX_SIZE, 0);
                        if (ret != SD_RES_SUCCESS)
                                goto out;
                        path->p_ext = search_ext_entry(leaf_node, idx);
@@ -384,7 +384,7 @@ static int search_whole_btree(read_node_fn reader, const 
struct sd_inode *inode,
                } else {
                        /* check if last idx-node has space */
                        oid = (path->p_idx - 1)->oid;
-                       ret = reader(oid, &tmp, SD_INODE_INDEX_SIZE);
+                       ret = reader(oid, &tmp, SD_INODE_INDEX_SIZE, 0);
                        if (ret != SD_RES_SUCCESS)
                                goto out;
                        if (leaf_node->entries < EXT_MAX_ENTRIES) {
@@ -445,10 +445,10 @@ static void split_ext_node(write_node_fn writer, struct 
sd_inode *inode,
        split_to_nodes(old, new_ext, old, num);
 
        new_oid = vid_to_btree_oid(inode->vdi_id, inode->btree_counter++);
-       writer(new_oid, new_ext, SD_INODE_INDEX_SIZE, inode->nr_copies,
-                       inode->copy_policy, 1);
-       writer(path->p_idx->oid, old, SD_INODE_INDEX_SIZE, inode->nr_copies,
-                       inode->copy_policy, 0);
+       writer(new_oid, new_ext, SD_INODE_INDEX_SIZE, 0, 0, inode->nr_copies,
+                       inode->copy_policy, true, false);
+       writer(path->p_idx->oid, old, SD_INODE_INDEX_SIZE, 0, 0,
+              inode->nr_copies, inode->copy_policy, false, false);
 
        /* write new index */
        insert_idx_entry(EXT_HEADER(inode->data_vdi_id),
@@ -492,8 +492,8 @@ static int insert_new_node(write_node_fn writer, 
read_node_fn reader,
                        insert_ext_entry_nosearch(path->p_ext_header,
                                        path->p_ext, idx, vdi_id);
                        writer(path->p_idx->oid, path->p_ext_header,
-                               SD_INODE_INDEX_SIZE, inode->nr_copies,
-                               inode->copy_policy, 1);
+                               SD_INODE_INDEX_SIZE, 0, 0, inode->nr_copies,
+                               inode->copy_policy, true, false);
                } else if (path->p_ext_header) {
                        /* the last idx-node */
                        insert_ext_entry_nosearch(path->p_ext_header,
@@ -502,8 +502,8 @@ static int insert_new_node(write_node_fn writer, 
read_node_fn reader,
                        path->p_idx->idx =
                                (LAST_EXT(path->p_ext_header) - 1)->idx;
                        writer(path->p_idx->oid, path->p_ext_header,
-                               SD_INODE_INDEX_SIZE, inode->nr_copies,
-                               inode->copy_policy, 1);
+                               SD_INODE_INDEX_SIZE, 0, 0, inode->nr_copies,
+                               inode->copy_policy, true, false);
                } else {
                        /* create a new ext-node */
                        leaf_node = xmalloc(SD_INODE_INDEX_SIZE);
@@ -513,8 +513,8 @@ static int insert_new_node(write_node_fn writer, 
read_node_fn reader,
                        insert_ext_entry_nosearch(leaf_node,
                                        FIRST_EXT(leaf_node), idx, vdi_id);
                        writer(oid, leaf_node, SD_INODE_INDEX_SIZE,
-                                       inode->nr_copies,
-                                       inode->copy_policy, 1);
+                                       0, 0, inode->nr_copies,
+                                       inode->copy_policy, true, false);
                        insert_idx_entry_nosearch(header, path->p_idx,
                                        idx, oid);
                }
@@ -566,6 +566,63 @@ out:
        dump_btree(reader, inode);
 }
 
+/*
+ * Return the size of meta-data in inode->data_vdi_id. When leaf-node of B-tree
+ * is not full, we don't need to read out all sizeof(sd_inode).
+ * The argument of 'size' is just for compatibility of parse_vdi().
+ */
+uint32_t sd_inode_get_meta_size(struct sd_inode *inode, size_t size)
+{
+       struct sd_extent_header *header;
+       uint32_t len;
+
+       if (inode->store_policy == 0) {
+               len = count_data_objs(inode) * sizeof(inode->data_vdi_id[0]);
+               if (len > size - SD_INODE_HEADER_SIZE - sizeof(uint32_t))
+                       len = size - SD_INODE_HEADER_SIZE - sizeof(uint32_t);
+       } else {
+               header = EXT_HEADER(inode->data_vdi_id);
+               len = sizeof(struct sd_extent_header);
+               if (header->depth == 1)
+                       len += sizeof(struct sd_extent) * header->entries;
+               else if (header->depth == 2)
+                       len += sizeof(struct sd_extent_idx) * header->entries;
+               else
+                       panic("Depth of B-tree is out of range(depth: %u)",
+                             header->depth);
+       }
+       return len;
+}
+
+/* Write the meta-data of inode out */
+int sd_inode_write_vid(write_node_fn writer, struct sd_inode *inode,
+                      uint32_t idx, uint32_t vid, uint32_t value,
+                      int flags, bool create, bool direct)
+{
+       uint32_t len;
+       int ret = SD_RES_SUCCESS;
+
+       if (inode->store_policy == 0)
+               ret = writer(vid_to_vdi_oid(vid), &value, sizeof(value),
+                            SD_INODE_HEADER_SIZE + sizeof(value) * idx,
+                            flags, inode->nr_copies, inode->copy_policy,
+                            create, direct);
+       else {
+               len = SD_INODE_HEADER_SIZE + sd_inode_get_meta_size(inode, 0);
+               ret = writer(vid_to_vdi_oid(vid), inode, len, 0, flags,
+                            inode->nr_copies, inode->copy_policy,
+                            create, false);
+               if (ret != SD_RES_SUCCESS)
+                       goto out;
+               ret = writer(vid_to_vdi_oid(vid), inode, sizeof(uint32_t),
+                            offsetof(struct sd_inode, btree_counter),
+                            flags, inode->nr_copies, inode->copy_policy,
+                            create, false);
+       }
+out:
+       return ret;
+}
+
 void sd_inode_copy_vdis(struct sd_inode *oldi, struct sd_inode *newi)
 {
        memcpy(newi->data_vdi_id, oldi->data_vdi_id, sizeof(newi->data_vdi_id));
diff --git a/sheep/ops.c b/sheep/ops.c
index de0a563..32eac97 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -818,20 +818,33 @@ static int local_flush_vdi(struct request *req)
 static int local_discard_obj(struct request *req)
 {
        uint64_t oid = req->rq.obj.oid;
-       uint32_t vid = oid_to_vid(oid), zero = 0;
-       int ret, idx = data_oid_to_idx(oid);
+       uint32_t vid = oid_to_vid(oid), zero = 0, tmp_vid;
+       int ret = SD_RES_SUCCESS, idx = data_oid_to_idx(oid);
+       struct sd_inode *inode = xmalloc(sizeof(struct sd_inode));
 
        sd_debug("%"PRIx64, oid);
-       ret = write_object(vid_to_vdi_oid(vid), (char *)&zero, sizeof(zero),
-                          SD_INODE_HEADER_SIZE + sizeof(vid) * idx, false);
+       ret = read_object(vid_to_vdi_oid(vid), (char *)inode,
+                       sizeof(struct sd_inode), 0);
        if (ret != SD_RES_SUCCESS)
-               return ret;
-       if (remove_object(oid) != SD_RES_SUCCESS)
-               sd_err("failed to remove %"PRIx64, oid);
+               goto out;
+
+       tmp_vid = INODE_GET_VID(inode, idx);
+       /* if vid in idx is not exist, we don't need to remove it */
+       if (tmp_vid) {
+               INODE_SET_VID(inode, idx, vid);
+               ret = sd_inode_write_vid(sheep_bnode_writer, inode, idx, vid,
+                                        zero, 0, false, false);
+               if (ret != SD_RES_SUCCESS)
+                       goto out;
+               if (remove_object(oid) != SD_RES_SUCCESS)
+                       sd_err("failed to remove %"PRIx64, oid);
+       }
        /*
         * Return success even if remove_object fails because we have updated
         * inode successfully.
         */
+out:
+       free(inode);
        return SD_RES_SUCCESS;
 }
 
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index e402fcd..bd7e158 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -377,8 +377,10 @@ void objlist_cache_remove(uint64_t oid);
 void put_request(struct request *req);
 
 int sheep_bnode_writer(uint64_t oid, void *mem, unsigned int len,
-                      int copies, int copy_policy, int create);
-int sheep_bnode_reader(uint64_t oid, void **mem, unsigned int len);
+                      uint64_t offset, uint32_t flags, int copies,
+                      int copy_policy, bool create, bool direct);
+int sheep_bnode_reader(uint64_t oid, void **mem, unsigned int len,
+                      uint64_t offset);
 
 #define INODE_GET_VID(inode, idx) (sd_inode_get_vid(sheep_bnode_reader, \
                                        inode, idx))
diff --git a/sheep/vdi.c b/sheep/vdi.c
index 2403bcc..99d4335 100644
--- a/sheep/vdi.c
+++ b/sheep/vdi.c
@@ -30,14 +30,16 @@ static struct sd_lock vdi_state_lock = SD_LOCK_INITIALIZER;
 int ec_max_data_strip;
 
 int sheep_bnode_writer(uint64_t oid, void *mem, unsigned int len,
-                      int copies, int copy_policy, int create)
+                      uint64_t offset, uint32_t flags, int copies,
+                      int copy_policy, bool create, bool direct)
 {
-       return write_object(oid, mem, len, 0, create == 1);
+       return write_object(oid, mem, len, offset, create);
 }
 
-int sheep_bnode_reader(uint64_t oid, void **mem, unsigned int len)
+int sheep_bnode_reader(uint64_t oid, void **mem, unsigned int len,
+                      uint64_t offset)
 {
-       return read_object(oid, *mem, len, 0);
+       return read_object(oid, *mem, len, offset);
 }
 
 static int vdi_state_cmp(const struct vdi_state_entry *a,
diff --git a/sheepfs/volume.c b/sheepfs/volume.c
index 000d533..2fbb54c 100644
--- a/sheepfs/volume.c
+++ b/sheepfs/volume.c
@@ -66,8 +66,10 @@ static struct sd_lock vdi_inode_tree_lock = 
SD_LOCK_INITIALIZER;
 
 
 static int sheepfs_bnode_writer(uint64_t oid, void *mem, unsigned int len,
-                               int copies, int copy_policy, int create);
-static int sheepfs_bnode_reader(uint64_t oid, void **mem, unsigned int len);
+                               uint64_t offset, uint32_t flags, int copies,
+                               int copy_policy, bool create, bool direct);
+static int sheepfs_bnode_reader(uint64_t oid, void **mem, unsigned int len,
+                               uint64_t offset);
 
 #define INODE_GET_VID(inode, idx) (sd_inode_get_vid(\
                                        sheepfs_bnode_reader, inode, idx))
@@ -196,10 +198,8 @@ static int volume_rw_object(char *buf, uint64_t oid, 
size_t size,
        if (create) {
                INODE_SET_VID(vdi->inode, idx, vid);
                /* writeback inode update */
-               if (volume_rw_object((char *)&vid, vid_to_vdi_oid(vid),
-                                    sizeof(vid),
-                                    SD_INODE_HEADER_SIZE + sizeof(vid) * idx,
-                                    VOLUME_WRITE) < 0)
+               if (sd_inode_write_vid(sheepfs_bnode_writer, vdi->inode, idx,
+                                       vid, vid, 0, false, false) < 0)
                        return -1;
        }
 done:
@@ -250,19 +250,21 @@ static int volume_do_rw(const char *path, char *buf, 
size_t size,
 }
 
 int sheepfs_bnode_writer(uint64_t oid, void *mem, unsigned int len,
-                        int copies, int copy_policy, int create)
+                        uint64_t offset, uint32_t flags, int copies,
+                        int copy_policy, bool create, bool direct)
 {
        int ret;
-       ret = volume_rw_object(mem, oid, len, 0, VOLUME_WRITE);
+       ret = volume_rw_object(mem, oid, len, offset, VOLUME_WRITE);
        if (ret == len)
                return SD_RES_SUCCESS;
        return ret;
 }
 
-int sheepfs_bnode_reader(uint64_t oid, void **mem, unsigned int len)
+int sheepfs_bnode_reader(uint64_t oid, void **mem, unsigned int len,
+                        uint64_t offset)
 {
        int ret;
-       ret = volume_rw_object(*mem, oid, len, 0, VOLUME_READ);
+       ret = volume_rw_object(*mem, oid, len, offset, VOLUME_READ);
        if (ret == len)
                return SD_RES_SUCCESS;
        return ret;
-- 
1.7.1

-- 
sheepdog mailing list
[email protected]
http://lists.wpkg.org/mailman/listinfo/sheepdog

Reply via email to