Add the max number of objects and add new MACRO for index of inode. Add store_policy into 'struct vdi_iocq' and 'struct sd_req'
Signed-off-by: Robin Dong <[email protected]> --- dog/common.c | 4 ++- dog/dog.h | 2 +- dog/farm/farm.c | 12 ++++++++-- dog/vdi.c | 47 +++++++++++++++++++++++++++++++------------- include/sheepdog_proto.h | 15 +++++++++---- lib/option.c | 7 ++++- lib/sd_inode.c | 48 +++++++++++++++++++++++---------------------- sheep/ops.c | 1 + sheep/sheep_priv.h | 1 + sheep/vdi.c | 9 +++++-- 10 files changed, 94 insertions(+), 52 deletions(-) diff --git a/dog/common.c b/dog/common.c index 1d088c8..bc8cb79 100644 --- a/dog/common.c +++ b/dog/common.c @@ -155,7 +155,9 @@ int parse_vdi(vdi_parser_func_t func, size_t size, void *data) oid = vid_to_vdi_oid(nr); - ret = sd_read_object(oid, &i, SD_INODE_HEADER_SIZE, 0, true); + /* for B-tree inode, we also need sd_extent_header */ + ret = sd_read_object(oid, &i, SD_INODE_HEADER_SIZE + + sizeof(struct sd_extent_header), 0, true); if (ret != SD_RES_SUCCESS) { sd_err("Failed to read inode header"); continue; diff --git a/dog/dog.h b/dog/dog.h index 0475cbc..5651ee6 100644 --- a/dog/dog.h +++ b/dog/dog.h @@ -79,7 +79,7 @@ void confirm(const char *message); void work_queue_wait(struct work_queue *q); int do_vdi_create(const char *vdiname, int64_t vdi_size, uint32_t base_vid, uint32_t *vdi_id, bool snapshot, - uint8_t nr_copies, uint8_t copy_policy); + uint8_t nr_copies, uint8_t copy_policy, uint8_t store_policy); int do_vdi_check(const struct sd_inode *inode); void show_progress(uint64_t done, uint64_t total, bool raw); size_t get_store_objsize(uint8_t copy_policy, uint64_t oid); diff --git a/dog/farm/farm.c b/dog/farm/farm.c index 990d26b..3886445 100644 --- a/dog/farm/farm.c +++ b/dog/farm/farm.c @@ -29,6 +29,7 @@ struct vdi_entry { uint32_t snap_id; uint8_t nr_copies; uint8_t copy_policy; + uint8_t store_policy; struct rb_node rb; }; static struct rb_root last_vdi_tree = RB_ROOT; @@ -57,7 +58,8 @@ static struct vdi_entry *find_vdi(const char *name) static struct vdi_entry *new_vdi(const char *name, uint64_t vdi_size, uint32_t vdi_id, uint32_t snap_id, - uint8_t nr_copies, uint8_t copy_policy) + uint8_t nr_copies, uint8_t copy_policy, + uint8_t store_policy) { struct vdi_entry *vdi; vdi = xmalloc(sizeof(struct vdi_entry)); @@ -67,6 +69,7 @@ static struct vdi_entry *new_vdi(const char *name, uint64_t vdi_size, vdi->snap_id = snap_id; vdi->nr_copies = nr_copies; vdi->copy_policy = copy_policy; + vdi->store_policy = store_policy; return vdi; } @@ -80,7 +83,8 @@ static void insert_vdi(struct sd_inode *new) new->vdi_id, new->snap_id, new->nr_copies, - new->copy_policy); + new->copy_policy, + new->store_policy); rb_insert(&last_vdi_tree, vdi, rb, vdi_cmp); } else if (vdi->snap_id < new->snap_id) { vdi->vdi_size = new->vdi_size; @@ -88,6 +92,7 @@ static void insert_vdi(struct sd_inode *new) vdi->snap_id = new->snap_id; vdi->nr_copies = new->nr_copies; vdi->copy_policy = new->copy_policy; + vdi->store_policy = new->store_policy; } } @@ -100,7 +105,8 @@ static int create_active_vdis(void) vdi->vdi_size, vdi->vdi_id, &new_vid, false, vdi->nr_copies, - vdi->copy_policy) < 0) + vdi->copy_policy, + vdi->store_policy) < 0) return -1; } return 0; diff --git a/dog/vdi.c b/dog/vdi.c index 313b51d..3042577 100644 --- a/dog/vdi.c +++ b/dog/vdi.c @@ -31,6 +31,7 @@ static struct sd_option vdi_options[] = { {'c', "copies", true, "specify the data redundancy level"}, {'F', "from", true, "create a differential backup from the snapshot"}, {'f', "force", false, "do operation forcibly"}, + {'b', "hyper", false, "create a hyper volume"}, { 0, NULL, false, NULL }, }; @@ -47,6 +48,7 @@ static struct vdi_cmd_data { char from_snapshot_tag[SD_MAX_VDI_TAG_LEN]; bool force; uint8_t copy_policy; + uint8_t store_policy; } vdi_cmd_data = { ~0, }; struct get_vdi_info { @@ -494,7 +496,7 @@ static int read_vdi_obj(const char *vdiname, int snapid, const char *tag, int do_vdi_create(const char *vdiname, int64_t vdi_size, uint32_t base_vid, uint32_t *vdi_id, bool snapshot, - uint8_t nr_copies, uint8_t copy_policy) + uint8_t nr_copies, uint8_t copy_policy, uint8_t store_policy) { struct sd_req hdr; struct sd_rsp *rsp = (struct sd_rsp *)&hdr; @@ -513,6 +515,7 @@ int do_vdi_create(const char *vdiname, int64_t vdi_size, hdr.vdi.vdi_size = vdi_size; hdr.vdi.copies = nr_copies; hdr.vdi.copy_policy = copy_policy; + hdr.vdi.store_policy = store_policy; ret = dog_exec_req(&sd_nid, &hdr, buf); if (ret < 0) @@ -546,8 +549,11 @@ static int vdi_create(int argc, char **argv) ret = option_parse_size(argv[optind], &size); if (ret < 0) return EXIT_USAGE; - if (size > SD_MAX_VDI_SIZE) { - sd_err("VDI size is too large"); + if (size > SD_OLD_MAX_VDI_SIZE && 0 == vdi_cmd_data.store_policy) { + sd_err("VDI size is larger than %s bytes, please use '-b' to " + "create a hyper volume with size up to %s bytes", + strnumber(SD_OLD_MAX_VDI_SIZE), + strnumber(SD_MAX_VDI_SIZE)); return EXIT_USAGE; } @@ -558,7 +564,8 @@ static int vdi_create(int argc, char **argv) } ret = do_vdi_create(vdiname, size, 0, &vid, false, - vdi_cmd_data.nr_copies, vdi_cmd_data.copy_policy); + vdi_cmd_data.nr_copies, vdi_cmd_data.copy_policy, + vdi_cmd_data.store_policy); if (ret != EXIT_SUCCESS || !vdi_cmd_data.prealloc) goto out; @@ -633,7 +640,8 @@ static int vdi_snapshot(int argc, char **argv) return EXIT_FAILURE; ret = do_vdi_create(vdiname, inode->vdi_size, vid, NULL, true, - inode->nr_copies, inode->copy_policy); + inode->nr_copies, inode->copy_policy, + inode->store_policy); if (ret == EXIT_SUCCESS && verbose) { if (raw_output) @@ -677,7 +685,8 @@ static int vdi_clone(int argc, char **argv) goto out; ret = do_vdi_create(dst_vdi, inode->vdi_size, base_vid, &new_vid, false, - vdi_cmd_data.nr_copies, inode->copy_policy); + vdi_cmd_data.nr_copies, inode->copy_policy, + inode->store_policy); if (ret != EXIT_SUCCESS || !vdi_cmd_data.prealloc) goto out; @@ -755,15 +764,20 @@ static int vdi_resize(int argc, char **argv) ret = option_parse_size(argv[optind], &new_size); if (ret < 0) return EXIT_USAGE; - if (new_size > SD_MAX_VDI_SIZE) { - sd_err("New VDI size is too large"); - return EXIT_USAGE; - } ret = read_vdi_obj(vdiname, 0, "", &vid, inode, SD_INODE_HEADER_SIZE); if (ret != EXIT_SUCCESS) return ret; + if (new_size > SD_OLD_MAX_VDI_SIZE && 0 == inode->store_policy) { + sd_err("New VDI size is too large"); + return EXIT_USAGE; + } else if (new_size > SD_MAX_VDI_SIZE && + 1 == inode->store_policy) { + sd_err("New VDI (B-tree) size is too large"); + return EXIT_USAGE; + } + if (new_size < inode->vdi_size) { sd_err("Shrinking VDIs is not implemented"); return EXIT_USAGE; @@ -868,7 +882,8 @@ static int vdi_rollback(int argc, char **argv) } ret = do_vdi_create(vdiname, inode->vdi_size, base_vid, &new_vid, - false, vdi_cmd_data.nr_copies, inode->copy_policy); + false, vdi_cmd_data.nr_copies, inode->copy_policy, + inode->store_policy); if (ret == EXIT_SUCCESS && verbose) { if (raw_output) @@ -1962,7 +1977,8 @@ static uint32_t do_restore(const char *vdiname, int snapid, const char *tag) goto out; ret = do_vdi_create(vdiname, inode->vdi_size, inode->vdi_id, &vid, - false, inode->nr_copies, inode->copy_policy); + false, inode->nr_copies, inode->copy_policy, + inode->store_policy); if (ret != EXIT_SUCCESS) { sd_err("Failed to read VDI"); goto out; @@ -2070,7 +2086,8 @@ out: recovery_ret = do_vdi_create(vdiname, current_inode->vdi_size, current_inode->parent_vdi_id, NULL, true, current_inode->nr_copies, - current_inode->copy_policy); + current_inode->copy_policy, + current_inode->store_policy); if (recovery_ret != EXIT_SUCCESS) { sd_err("failed to resume the current vdi"); ret = recovery_ret; @@ -2248,7 +2265,7 @@ static struct subcommand vdi_cmd[] = { {"check", "<vdiname>", "saph", "check and repair image's consistency", NULL, CMD_NEED_NODELIST|CMD_NEED_ARG, vdi_check, vdi_options}, - {"create", "<vdiname> <size>", "Pcaphrv", "create an image", + {"create", "<vdiname> <size>", "Pbcaphrv", "create an image", NULL, CMD_NEED_NODELIST|CMD_NEED_ARG, vdi_create, vdi_options}, {"snapshot", "<vdiname>", "saphrv", "create a snapshot", @@ -2401,6 +2418,8 @@ static int vdi_parser(int ch, const char *opt) case 'f': vdi_cmd_data.force = true; break; + case 'b': + vdi_cmd_data.store_policy = 1; } return 0; diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h index b05d30e..5b2bfa8 100644 --- a/include/sheepdog_proto.h +++ b/include/sheepdog_proto.h @@ -83,8 +83,7 @@ /* * Object ID rules * - * 0 - 19 (20 bits): data object space - * 20 - 31 (12 bits): reserved data object space + * 0 - 31 (32 bits): data object space * 32 - 55 (24 bits): VDI object space * 56 - 59 ( 4 bits): reserved VDI object space * 60 - 63 ( 4 bits): object type indentifier space @@ -96,7 +95,8 @@ #define VMSTATE_BIT (UINT64_C(1) << 62) #define VDI_ATTR_BIT (UINT64_C(1) << 61) #define VDI_BTREE_BIT (UINT64_C(1) << 60) -#define MAX_DATA_OBJS (1ULL << 20) +#define OLD_MAX_DATA_OBJS (1ULL << 20) +#define MAX_DATA_OBJS (1ULL << 32) #define MAX_CHILDREN 1024U #define SD_MAX_VDI_LEN 256U #define SD_MAX_VDI_TAG_LEN 256U @@ -105,10 +105,13 @@ #define SD_MAX_SNAPSHOT_TAG_LEN 256U #define SD_NR_VDIS (1U << 24) #define SD_DATA_OBJ_SIZE (UINT64_C(1) << 22) +#define SD_OLD_MAX_VDI_SIZE (SD_DATA_OBJ_SIZE * OLD_MAX_DATA_OBJS) #define SD_MAX_VDI_SIZE (SD_DATA_OBJ_SIZE * MAX_DATA_OBJS) #define SD_INODE_SIZE (sizeof(struct sd_inode)) #define SD_INODE_INDEX_SIZE (sizeof(uint32_t) * MAX_DATA_OBJS) +#define SD_INODE_DATA_INDEX (1ULL << 20) +#define SD_INODE_DATA_INDEX_SIZE (sizeof(uint32_t) * SD_INODE_DATA_INDEX) #define SD_INODE_HEADER_SIZE offsetof(struct sd_inode, data_vdi_id) #define SD_ATTR_OBJ_SIZE (sizeof(struct sheepdog_vdi_attr)) #define CURRENT_VDI_ID 0 @@ -142,7 +145,8 @@ struct sd_req { uint32_t base_vdi_id; uint8_t copies; uint8_t copy_policy; - uint8_t reserved[2]; + uint8_t store_policy; + uint8_t reserved; uint32_t snapid; } vdi; @@ -226,7 +230,7 @@ struct sd_inode { uint32_t vdi_id; uint32_t parent_vdi_id; uint32_t child_vdi_id[MAX_CHILDREN]; - uint32_t data_vdi_id[MAX_DATA_OBJS]; + uint32_t data_vdi_id[SD_INODE_DATA_INDEX]; uint32_t btree_counter; }; @@ -264,6 +268,7 @@ struct sheepdog_vdi_attr { char value[SD_MAX_VDI_ATTR_VALUE_LEN]; }; +extern void sd_inode_init(void *data, int depth); extern uint32_t sd_inode_get_vid(read_node_fn reader, const struct sd_inode *inode, uint32_t idx); extern void sd_inode_set_vid(write_node_fn writer, read_node_fn reader, diff --git a/lib/option.c b/lib/option.c index d12c205..872a573 100644 --- a/lib/option.c +++ b/lib/option.c @@ -71,6 +71,9 @@ int option_parse_size(const char *value, uint64_t *ret) goto err; switch (*postfix) { + case 'P': + case 'p': + sizef *= 1024; case 'T': case 't': sizef *= 1024; @@ -90,8 +93,8 @@ int option_parse_size(const char *value, uint64_t *ret) default: err: sd_err("Invalid size '%s'", value); - sd_err("You may use k, M, G or T suffixes for " - "kilobytes, megabytes, gigabytes and terabytes."); + sd_err("You may use k, M, G, T or P suffixes for " + "kilobytes, megabytes, gigabytes, terabytes and petabytes."); return -1; } diff --git a/lib/sd_inode.c b/lib/sd_inode.c index b3e546d..26c9a3a 100644 --- a/lib/sd_inode.c +++ b/lib/sd_inode.c @@ -76,7 +76,8 @@ #include "util.h" #include "sheepdog_proto.h" -#define EXT_MAX_SPACE (SD_INODE_INDEX_SIZE - sizeof(struct sd_extent_header)) +#define EXT_MAX_SPACE (SD_INODE_DATA_INDEX_SIZE - \ + sizeof(struct sd_extent_header)) #define EXT_MAX_ENTRIES (EXT_MAX_SPACE / sizeof(struct sd_extent)) #define EXT_IDX_MAX_ENTRIES (EXT_MAX_SPACE / sizeof(struct sd_extent_idx)) @@ -155,11 +156,12 @@ static void dump_btree(read_node_fn reader, struct sd_inode *inode) } else if (header->depth == 2) { last_idx = LAST_IDX(inode->data_vdi_id); iter_idx = FIRST_IDX(inode->data_vdi_id); - leaf_node = xmalloc(SD_INODE_INDEX_SIZE); + leaf_node = xmalloc(SD_INODE_DATA_INDEX_SIZE); tmp = (void *)leaf_node; while (iter_idx != last_idx) { - reader(iter_idx->oid, &tmp, SD_INODE_INDEX_SIZE, 0); + reader(iter_idx->oid, &tmp, + SD_INODE_DATA_INDEX_SIZE, 0); sd_info("btree> %p idx: %d, %lu, %u", iter_idx, iter_idx->idx, iter_idx->oid, @@ -205,7 +207,7 @@ static void *binary_search(void *first, void *last, void *key, return (void *)l; } -static void sd_inode_init(void *data, int depth) +void sd_inode_init(void *data, int depth) { struct sd_extent_header *header = EXT_HEADER(data); header->magic = INODE_BTREE_MAGIC; @@ -326,8 +328,8 @@ static void transfer_to_idx_root(write_node_fn writer, struct sd_inode *inode) uint32_t num = root->entries / 2; /* create two leaf-node and copy the entries from root-node */ - left = xmalloc(SD_INODE_INDEX_SIZE); - right = xmalloc(SD_INODE_INDEX_SIZE); + left = xmalloc(SD_INODE_DATA_INDEX_SIZE); + right = xmalloc(SD_INODE_DATA_INDEX_SIZE); split_to_nodes(root, left, right, num); @@ -335,10 +337,10 @@ static void transfer_to_idx_root(write_node_fn writer, struct sd_inode *inode) left_oid = vid_to_btree_oid(inode->vdi_id, inode->btree_counter++); right_oid = vid_to_btree_oid(inode->vdi_id, inode->btree_counter++); - writer(left_oid, left, SD_INODE_INDEX_SIZE, 0, 0, inode->nr_copies, - inode->copy_policy, true, false); - writer(right_oid, right, SD_INODE_INDEX_SIZE, 0, 0, inode->nr_copies, - inode->copy_policy, true, false); + writer(left_oid, left, SD_INODE_DATA_INDEX_SIZE, 0, 0, + inode->nr_copies, inode->copy_policy, true, false); + writer(right_oid, right, SD_INODE_DATA_INDEX_SIZE, 0, 0, + inode->nr_copies, inode->copy_policy, true, false); /* change root from ext-node to idx-node */ root->entries = 0; @@ -368,12 +370,12 @@ static int search_whole_btree(read_node_fn reader, const struct sd_inode *inode, if (header->depth == 2) { path->depth = 2; path->p_idx = search_idx_entry(header, idx); - leaf_node = xmalloc(SD_INODE_INDEX_SIZE); + leaf_node = xmalloc(SD_INODE_DATA_INDEX_SIZE); tmp = (void *)leaf_node; if (idx_in_range(header, path->p_idx)) { oid = path->p_idx->oid; - ret = reader(oid, &tmp, SD_INODE_INDEX_SIZE, 0); + ret = reader(oid, &tmp, SD_INODE_DATA_INDEX_SIZE, 0); if (ret != SD_RES_SUCCESS) goto out; path->p_ext = search_ext_entry(leaf_node, idx); @@ -384,7 +386,7 @@ static int search_whole_btree(read_node_fn reader, const struct sd_inode *inode, } else { /* check if last idx-node has space */ oid = (path->p_idx - 1)->oid; - ret = reader(oid, &tmp, SD_INODE_INDEX_SIZE, 0); + ret = reader(oid, &tmp, SD_INODE_DATA_INDEX_SIZE, 0); if (ret != SD_RES_SUCCESS) goto out; if (leaf_node->entries < EXT_MAX_ENTRIES) { @@ -440,14 +442,14 @@ static void split_ext_node(write_node_fn writer, struct sd_inode *inode, uint32_t num = old->entries / 2; uint64_t new_oid; - new_ext = xmalloc(SD_INODE_INDEX_SIZE); + new_ext = xmalloc(SD_INODE_DATA_INDEX_SIZE); split_to_nodes(old, new_ext, old, num); new_oid = vid_to_btree_oid(inode->vdi_id, inode->btree_counter++); - writer(new_oid, new_ext, SD_INODE_INDEX_SIZE, 0, 0, inode->nr_copies, - inode->copy_policy, true, false); - writer(path->p_idx->oid, old, SD_INODE_INDEX_SIZE, 0, 0, + writer(new_oid, new_ext, SD_INODE_DATA_INDEX_SIZE, 0, 0, + inode->nr_copies, inode->copy_policy, true, false); + writer(path->p_idx->oid, old, SD_INODE_DATA_INDEX_SIZE, 0, 0, inode->nr_copies, inode->copy_policy, false, false); /* write new index */ @@ -492,8 +494,8 @@ static int insert_new_node(write_node_fn writer, read_node_fn reader, insert_ext_entry_nosearch(path->p_ext_header, path->p_ext, idx, vdi_id); writer(path->p_idx->oid, path->p_ext_header, - SD_INODE_INDEX_SIZE, 0, 0, inode->nr_copies, - inode->copy_policy, true, false); + SD_INODE_DATA_INDEX_SIZE, 0, 0, inode->nr_copies, + inode->copy_policy, true, false); } else if (path->p_ext_header) { /* the last idx-node */ insert_ext_entry_nosearch(path->p_ext_header, @@ -502,17 +504,17 @@ static int insert_new_node(write_node_fn writer, read_node_fn reader, path->p_idx->idx = (LAST_EXT(path->p_ext_header) - 1)->idx; writer(path->p_idx->oid, path->p_ext_header, - SD_INODE_INDEX_SIZE, 0, 0, inode->nr_copies, - inode->copy_policy, true, false); + SD_INODE_DATA_INDEX_SIZE, 0, 0, inode->nr_copies, + inode->copy_policy, true, false); } else { /* create a new ext-node */ - leaf_node = xmalloc(SD_INODE_INDEX_SIZE); + leaf_node = xmalloc(SD_INODE_DATA_INDEX_SIZE); sd_inode_init(leaf_node, 2); oid = vid_to_btree_oid(inode->vdi_id, inode->btree_counter++); insert_ext_entry_nosearch(leaf_node, FIRST_EXT(leaf_node), idx, vdi_id); - writer(oid, leaf_node, SD_INODE_INDEX_SIZE, + writer(oid, leaf_node, SD_INODE_DATA_INDEX_SIZE, 0, 0, inode->nr_copies, inode->copy_policy, true, false); insert_idx_entry_nosearch(header, path->p_idx, diff --git a/sheep/ops.c b/sheep/ops.c index ef09920..e40e833 100644 --- a/sheep/ops.c +++ b/sheep/ops.c @@ -84,6 +84,7 @@ static int cluster_new_vdi(struct request *req) .base_vid = hdr->vdi.base_vdi_id, .create_snapshot = !!hdr->vdi.snapid, .copy_policy = hdr->vdi.copy_policy, + .store_policy = hdr->vdi.store_policy, .nr_copies = hdr->vdi.copies ? hdr->vdi.copies : sys->cinfo.nr_copies, .time = (uint64_t) tv.tv_sec << 32 | tv.tv_usec * 1000, diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h index bd7e158..d333573 100644 --- a/sheep/sheep_priv.h +++ b/sheep/sheep_priv.h @@ -177,6 +177,7 @@ struct vdi_iocb { uint32_t snapid; bool create_snapshot; uint8_t copy_policy; + uint8_t store_policy; uint8_t nr_copies; uint64_t time; }; diff --git a/sheep/vdi.c b/sheep/vdi.c index 99d4335..3d4af76 100644 --- a/sheep/vdi.c +++ b/sheep/vdi.c @@ -232,12 +232,15 @@ static struct sd_inode *alloc_inode(const struct vdi_iocb *iocb, new->create_time = iocb->time; new->vdi_size = iocb->size; new->copy_policy = iocb->copy_policy; + new->store_policy = iocb->store_policy; new->nr_copies = iocb->nr_copies; new->block_size_shift = find_next_bit(&block_size, BITS_PER_LONG, 0); new->snap_id = new_snapid; new->parent_vdi_id = iocb->base_vid; if (data_vdi_id) memcpy(new->data_vdi_id, data_vdi_id, sizeof(new->data_vdi_id)); + else if (new->store_policy) + sd_inode_init(new->data_vdi_id, 1); return new; } @@ -261,9 +264,9 @@ static int create_vdi(const struct vdi_iocb *iocb, uint32_t new_snapid, int ret; sd_debug("%s: size %" PRIu64 ", new_vid %" PRIx32 ", copies %d, " - "snapid %" PRIu32 " copy policy %"PRIu8, iocb->name, - iocb->size, new_vid, iocb->nr_copies, new_snapid, - new->copy_policy); + "snapid %" PRIu32 " copy policy %"PRIu8 "store policy %"PRIu8, + iocb->name, iocb->size, new_vid, iocb->nr_copies, new_snapid, + new->copy_policy, new->store_policy); ret = write_object(vid_to_vdi_oid(new_vid), (char *)new, sizeof(*new), 0, true); -- 1.7.1 -- sheepdog mailing list [email protected] http://lists.wpkg.org/mailman/listinfo/sheepdog
