On Tue, Jul 1, 2014 at 10:51 AM, Ruoyu <lian...@ucweb.com> wrote: > How about this patch regardless of the opcode? > > The request SD_OP_OIDS_EXIST requires a new operation both reading and > writing. Is there a better way to satisfy it?
This patch (including the new opcode) looks good to me. Could you rebase it on the latest master? Thanks, Hitoshi > > > On 2014年05月28日 17:48, Ruoyu wrote: >> >> Sometimes we want to quickly check whether some of the vdi objects >> or data objects are lost due to unexpected issue. >> >> Although vdi check will do, it spends a lot of time because of >> too many client-server communication. And the probability of >> triggering data auto fixing is quite low since the writing process >> is strong consistency. >> >> Therefore, the new option -e (--exist) check whether all the objects >> related to the vdi are existed or not. It is fast because it submit >> the batched object id only one time per node. I think this is enough >> for the situation. >> >> Usage: dog vdi check -e <vdiname> >> >> Example: >> $ dog vdi check -e test >> test is fine, no object is missing. >> >> $ dog vdi check -e ucweb >> [127.0.0.1:7001] oid 80b8071d00000000 is missing. >> [127.0.0.1:7001] oid 00b8071d000000ee is missing. >> ucweb lost 2 object(s). >> >> Signed-off-by: Ruoyu <lian...@ucweb.com> >> --- >> dog/vdi.c | 114 >> ++++++++++++++++++++++++++++++++++++++++++++++- >> include/internal_proto.h | 9 ++++ >> include/sheep.h | 6 +++ >> include/sheepdog_proto.h | 1 + >> lib/net.c | 2 +- >> sheep/ops.c | 31 +++++++++++++ >> 6 files changed, 160 insertions(+), 3 deletions(-) >> >> diff --git a/dog/vdi.c b/dog/vdi.c >> index 866cb36..45fe6a9 100644 >> --- a/dog/vdi.c >> +++ b/dog/vdi.c >> @@ -21,6 +21,8 @@ >> #include "sha1.h" >> #include "fec.h" >> +struct rb_root oid_tree = RB_ROOT; >> + >> static struct sd_option vdi_options[] = { >> {'P', "prealloc", false, "preallocate all the data objects"}, >> {'n', "no-share", false, "share nothing with its parent"}, >> @@ -34,6 +36,7 @@ static struct sd_option vdi_options[] = { >> {'f', "force", false, "do operation forcibly"}, >> {'y', "hyper", false, "create a hyper volume"}, >> {'o', "oid", true, "specify the object id of the tracking >> object"}, >> + {'e', "exist", false, "check objects exist or not only, no >> repairing"}, >> { 0, NULL, false, NULL }, >> }; >> @@ -53,6 +56,7 @@ static struct vdi_cmd_data { >> uint8_t store_policy; >> uint64_t oid; >> bool no_share; >> + bool exist; >> } vdi_cmd_data = { ~0, }; >> struct get_vdi_info { >> @@ -875,6 +879,106 @@ out: >> return ret; >> } >> +#define OIDS_INIT_LENGTH 1024 >> + >> +static void save_oid(uint64_t oid, int copies) >> +{ >> + const struct sd_vnode *vnodes[SD_MAX_COPIES]; >> + struct oid_entry *entry; >> + >> + oid_to_vnodes(oid, &sd_vroot, copies, vnodes); >> + for (int i = 0; i < copies; i++) { >> + struct oid_entry key = { >> + .node = (struct sd_node *) vnodes[i]->node >> + }; >> + entry = rb_search(&oid_tree, &key, rb, oid_entry_cmp); >> + if (!entry) >> + panic("rb_search() failure."); >> + >> + if (entry->last >= entry->end) { >> + entry->end *= 2; >> + entry->oids = xrealloc(entry->oids, >> + sizeof(uint64_t) * entry->end); >> + } >> + entry->oids[entry->last] = oid; >> + entry->last++; >> + } >> +} >> + >> +static void build_oid_tree(const struct sd_inode *inode) >> +{ >> + uint32_t max_idx, vid; >> + uint64_t oid; >> + struct sd_node *node; >> + struct oid_entry *entry; >> + int copies = min((int)inode->nr_copies, sd_zones_nr); >> + >> + rb_for_each_entry(node, &sd_nroot, rb) { >> + entry = xmalloc(sizeof(*entry)); >> + entry->node = node; >> + entry->oids = xmalloc(sizeof(uint64_t) * >> OIDS_INIT_LENGTH); >> + entry->end = OIDS_INIT_LENGTH; >> + entry->last = 0; >> + rb_insert(&oid_tree, entry, rb, oid_entry_cmp); >> + } >> + >> + save_oid(vid_to_vdi_oid(inode->vdi_id), copies); >> + max_idx = count_data_objs(inode); >> + for (uint32_t idx = 0; idx < max_idx; idx++) { >> + vid = sd_inode_get_vid(inode, idx); >> + if (vid == 0) >> + continue; >> + oid = vid_to_data_oid(vid, idx); >> + save_oid(oid, copies); >> + } >> +} >> + >> +static void destroy_oid_tree(void) >> +{ >> + struct oid_entry *entry; >> + >> + rb_for_each_entry(entry, &oid_tree, rb) >> + free(entry->oids); >> + rb_destroy(&oid_tree, struct oid_entry, rb); >> +} >> + >> +static int do_obj_check(const struct sd_inode *inode) >> +{ >> + int total = 0; >> + struct oid_entry *entry; >> + struct sd_req hdr; >> + struct sd_rsp *rsp = (struct sd_rsp *)&hdr; >> + >> + build_oid_tree(inode); >> + >> + rb_for_each_entry(entry, &oid_tree, rb) { >> + sd_init_req(&hdr, SD_OP_OIDS_EXIST); >> + hdr.data_length = sizeof(uint64_t) * entry->last; >> + hdr.flags = SD_FLAG_CMD_WRITE | SD_FLAG_CMD_READ; >> + int ret = dog_exec_req(&entry->node->nid, &hdr, >> entry->oids); >> + if (ret < 0) >> + panic("dog_exec_req() failure."); >> + >> + int n = rsp->data_length / sizeof(uint64_t); >> + total += n; >> + for (int i = 0; i < n; i++) >> + printf("[%s] oid %016"PRIx64" is missing.\n", >> + addr_to_str(entry->node->nid.addr, >> + >> entry->node->nid.port), >> + entry->oids[i]); >> + } >> + >> + destroy_oid_tree(); >> + >> + if (total == 0) { >> + printf("%s is fine, no object is missing.\n", >> inode->name); >> + return EXIT_SUCCESS; >> + } else { >> + printf("%s lost %d object(s).\n", inode->name, total); >> + return EXIT_FAILURE; >> + } >> +} >> + >> static int do_track_object(uint64_t oid, uint8_t nr_copies) >> { >> int i, j, ret; >> @@ -1771,7 +1875,10 @@ static int vdi_check(int argc, char **argv) >> goto out; >> } >> - ret = do_vdi_check(inode); >> + if (vdi_cmd_data.exist) >> + ret = do_obj_check(inode); >> + else >> + ret = do_vdi_check(inode); >> out: >> free(inode); >> return ret; >> @@ -2359,7 +2466,7 @@ static int vdi_cache(int argc, char **argv) >> } >> static struct subcommand vdi_cmd[] = { >> - {"check", "<vdiname>", "saph", "check and repair image's >> consistency", >> + {"check", "<vdiname>", "seaph", "check and repair image's >> consistency", >> NULL, CMD_NEED_NODELIST|CMD_NEED_ARG, >> vdi_check, vdi_options}, >> {"create", "<vdiname> <size>", "Pycaphrv", "create an image", >> @@ -2491,6 +2598,9 @@ static int vdi_parser(int ch, const char *opt) >> exit(EXIT_FAILURE); >> } >> break; >> + case 'e': >> + vdi_cmd_data.exist = true; >> + break; >> } >> return 0; >> diff --git a/include/internal_proto.h b/include/internal_proto.h >> index ada084f..9d3928e 100644 >> --- a/include/internal_proto.h >> +++ b/include/internal_proto.h >> @@ -100,6 +100,7 @@ >> #define SD_OP_NFS_DELETE 0xBC >> #define SD_OP_EXIST 0xBD >> #define SD_OP_CLUSTER_INFO 0xBE >> +#define SD_OP_OIDS_EXIST 0xBF >> #define SD_OP_ALTER_CLUSTER_COPY 0xC0 >> #define SD_OP_ALTER_VDI_COPY 0xC1 >> @@ -175,6 +176,14 @@ struct sd_node { >> #endif >> }; >> +struct oid_entry { >> + struct rb_node rb; >> + struct sd_node *node; /* key */ >> + uint64_t *oids; /* object id array */ >> + int end; /* idx to the end of the allocated oid array >> */ >> + int last; /* idx to the last element of the oid array >> */ >> +}; >> + >> /* >> * A joining sheep multicasts the local cluster info. Then, the >> existing nodes >> * reply the latest cluster info which is unique among all of the nodes. >> diff --git a/include/sheep.h b/include/sheep.h >> index 785883e..ef8958c 100644 >> --- a/include/sheep.h >> +++ b/include/sheep.h >> @@ -199,6 +199,12 @@ static inline int node_cmp(const struct sd_node >> *node1, >> return node_id_cmp(&node1->nid, &node2->nid); >> } >> +static inline int oid_entry_cmp(const struct oid_entry *entry1, >> + const struct oid_entry *entry2) >> +{ >> + return node_cmp(entry1->node, entry2->node); >> +} >> + >> static inline bool node_eq(const struct sd_node *a, const struct sd_node >> *b) >> { >> return node_cmp(a, b) == 0; >> diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h >> index 9361bad..c6e21b4 100644 >> --- a/include/sheepdog_proto.h >> +++ b/include/sheepdog_proto.h >> @@ -45,6 +45,7 @@ >> #define SD_FLAG_CMD_COW 0x02 >> #define SD_FLAG_CMD_CACHE 0x04 >> #define SD_FLAG_CMD_DIRECT 0x08 /* don't use object cache */ >> +#define SD_FLAG_CMD_READ 0x10 >> /* flags above 0x80 are sheepdog-internal */ >> #define SD_RES_SUCCESS 0x00 /* Success */ >> diff --git a/lib/net.c b/lib/net.c >> index b32e022..c2d86cb 100644 >> --- a/lib/net.c >> +++ b/lib/net.c >> @@ -333,7 +333,7 @@ int exec_req(int sockfd, struct sd_req *hdr, void >> *data, >> if (hdr->flags & SD_FLAG_CMD_WRITE) { >> wlen = hdr->data_length; >> - rlen = 0; >> + rlen = (hdr->flags & SD_FLAG_CMD_READ) ? hdr->data_length >> : 0; >> } else { >> wlen = 0; >> rlen = hdr->data_length; >> diff --git a/sheep/ops.c b/sheep/ops.c >> index 61eb37f..c54017e 100644 >> --- a/sheep/ops.c >> +++ b/sheep/ops.c >> @@ -1058,6 +1058,30 @@ static int local_oid_exist(struct request *req) >> return SD_RES_NO_OBJ; >> } >> +static int local_oids_exist(const struct sd_req *req, struct sd_rsp >> *rsp, >> + void *data) >> +{ >> + struct request *r = container_of(req, struct request, rq); >> + uint64_t *oids = (uint64_t *) data; >> + uint8_t ec_index; >> + int i, j, n = req->data_length / sizeof(uint64_t); >> + >> + for (i = 0, j = 0; i < n; i++) { >> + ec_index = local_ec_index(r->vinfo, oids[i]); >> + if (is_erasure_oid(oids[i]) && ec_index == SD_MAX_COPIES) >> + oids[j++] = oids[i]; >> + else if (!sd_store->exist(oids[i], ec_index)) >> + oids[j++] = oids[i]; >> + } >> + >> + if (j > 0) { >> + rsp->data_length = sizeof(uint64_t) * j; >> + return SD_RES_NO_OBJ; >> + } >> + >> + return SD_RES_SUCCESS; >> +} >> + >> static int local_cluster_info(const struct sd_req *req, struct sd_rsp >> *rsp, >> void *data) >> { >> @@ -1408,6 +1432,13 @@ static struct sd_op_template sd_ops[] = { >> .process_work = local_oid_exist, >> }, >> + [SD_OP_OIDS_EXIST] = { >> + .name = "OIDS_EXIST", >> + .type = SD_OP_TYPE_LOCAL, >> + .force = true, >> + .process_main = local_oids_exist, >> + }, >> + >> [SD_OP_CLUSTER_INFO] = { >> .name = "CLUSTER INFO", >> .type = SD_OP_TYPE_LOCAL, > > > > -- > sheepdog mailing list > sheepdog@lists.wpkg.org > http://lists.wpkg.org/mailman/listinfo/sheepdog -- sheepdog mailing list sheepdog@lists.wpkg.org http://lists.wpkg.org/mailman/listinfo/sheepdog