At Mon, 28 Jul 2014 18:11:42 +0800, Ruoyu wrote: > > Sometimes we want to quickly check whether some of the vdi objects > or data objects are lost due to unexpected issue. > > Although vdi check will do, it spends a lot of time because of > too many client-server communication. And the probability of > triggering data auto fixing is quite low since the writing process > is strong consistency. > > Therefore, the new option -e (--exist) check whether all the objects > related to the vdi are existed or not. It is fast because it submit > the batched object id only one time per node. I think this is enough > for the situation. > > Usage: dog vdi check -e <vdiname> > > Example: > $ dog vdi check -e test > test is fine, no object is missing. > > $ dog vdi check -e ucweb > [127.0.0.1:7001] oid 80b8071d00000000 is missing. > [127.0.0.1:7001] oid 00b8071d000000ee is missing. > ucweb lost 2 object(s). > > v4 is rebased on the latest master and > > 1. helping message is updated to tell user vdi check -e will > not comparing nor repairing objects > 2. the function do_obj_check is renamed as do_vdi_check_exist > 3. a new command flag, SD_FLAG_CMD_FILTER, is introduced because > both read and write is not so appropriate > 4. the value of SD_FLAG_CMD_FILTER is changed because the original one > is occupied > > Signed-off-by: Ruoyu <lian...@ucweb.com> > --- > dog/vdi.c | 115 > ++++++++++++++++++++++++++++++++++++++++++++++- > include/internal_proto.h | 9 ++++ > include/sheep.h | 6 +++ > include/sheepdog_proto.h | 1 + > lib/net.c | 3 ++ > sheep/ops.c | 31 +++++++++++++ > 6 files changed, 163 insertions(+), 2 deletions(-)
Applied, thanks. Hitoshi > > diff --git a/dog/vdi.c b/dog/vdi.c > index 97ae63c..93ae763 100644 > --- a/dog/vdi.c > +++ b/dog/vdi.c > @@ -21,6 +21,8 @@ > #include "sha1.h" > #include "fec.h" > > +struct rb_root oid_tree = RB_ROOT; > + > static struct sd_option vdi_options[] = { > {'P', "prealloc", false, "preallocate all the data objects"}, > {'n', "no-share", false, "share nothing with its parent"}, > @@ -34,6 +36,8 @@ static struct sd_option vdi_options[] = { > {'f', "force", false, "do operation forcibly"}, > {'y', "hyper", false, "create a hyper volume"}, > {'o', "oid", true, "specify the object id of the tracking object"}, > + {'e', "exist", false, "only check objects exist or not,\n" > + " neither comparing nor repairing"}, > { 0, NULL, false, NULL }, > }; > > @@ -53,6 +57,7 @@ static struct vdi_cmd_data { > uint8_t store_policy; > uint64_t oid; > bool no_share; > + bool exist; > } vdi_cmd_data = { ~0, }; > > struct get_vdi_info { > @@ -985,6 +990,106 @@ out: > return ret; > } > > +#define OIDS_INIT_LENGTH 1024 > + > +static void save_oid(uint64_t oid, int copies) > +{ > + const struct sd_vnode *vnodes[SD_MAX_COPIES]; > + struct oid_entry *entry; > + > + oid_to_vnodes(oid, &sd_vroot, copies, vnodes); > + for (int i = 0; i < copies; i++) { > + struct oid_entry key = { > + .node = (struct sd_node *) vnodes[i]->node > + }; > + entry = rb_search(&oid_tree, &key, rb, oid_entry_cmp); > + if (!entry) > + panic("rb_search() failure."); > + > + if (entry->last >= entry->end) { > + entry->end *= 2; > + entry->oids = xrealloc(entry->oids, > + sizeof(uint64_t) * entry->end); > + } > + entry->oids[entry->last] = oid; > + entry->last++; > + } > +} > + > +static void build_oid_tree(const struct sd_inode *inode) > +{ > + uint32_t max_idx, vid; > + uint64_t oid; > + struct sd_node *node; > + struct oid_entry *entry; > + int copies = min((int)inode->nr_copies, sd_zones_nr); > + > + rb_for_each_entry(node, &sd_nroot, rb) { > + entry = xmalloc(sizeof(*entry)); > + entry->node = node; > + entry->oids = xmalloc(sizeof(uint64_t) * OIDS_INIT_LENGTH); > + entry->end = OIDS_INIT_LENGTH; > + entry->last = 0; > + rb_insert(&oid_tree, entry, rb, oid_entry_cmp); > + } > + > + save_oid(vid_to_vdi_oid(inode->vdi_id), copies); > + max_idx = count_data_objs(inode); > + for (uint32_t idx = 0; idx < max_idx; idx++) { > + vid = sd_inode_get_vid(inode, idx); > + if (vid == 0) > + continue; > + oid = vid_to_data_oid(vid, idx); > + save_oid(oid, copies); > + } > +} > + > +static void destroy_oid_tree(void) > +{ > + struct oid_entry *entry; > + > + rb_for_each_entry(entry, &oid_tree, rb) > + free(entry->oids); > + rb_destroy(&oid_tree, struct oid_entry, rb); > +} > + > +static int do_vdi_check_exist(const struct sd_inode *inode) > +{ > + int total = 0; > + struct oid_entry *entry; > + struct sd_req hdr; > + struct sd_rsp *rsp = (struct sd_rsp *)&hdr; > + > + build_oid_tree(inode); > + > + rb_for_each_entry(entry, &oid_tree, rb) { > + sd_init_req(&hdr, SD_OP_OIDS_EXIST); > + hdr.data_length = sizeof(uint64_t) * entry->last; > + hdr.flags = SD_FLAG_CMD_FILTER; > + int ret = dog_exec_req(&entry->node->nid, &hdr, entry->oids); > + if (ret < 0) > + panic("dog_exec_req() failure."); > + > + int n = rsp->data_length / sizeof(uint64_t); > + total += n; > + for (int i = 0; i < n; i++) > + printf("[%s] oid %016"PRIx64" is missing.\n", > + addr_to_str(entry->node->nid.addr, > + entry->node->nid.port), > + entry->oids[i]); > + } > + > + destroy_oid_tree(); > + > + if (total == 0) { > + printf("%s is fine, no object is missing.\n", inode->name); > + return EXIT_SUCCESS; > + } else { > + printf("%s lost %d object(s).\n", inode->name, total); > + return EXIT_FAILURE; > + } > +} > + > static int do_track_object(uint64_t oid, uint8_t nr_copies) > { > int i, j, ret; > @@ -1873,7 +1978,10 @@ static int vdi_check(int argc, char **argv) > goto out; > } > > - ret = do_vdi_check(inode); > + if (vdi_cmd_data.exist) > + ret = do_vdi_check_exist(inode); > + else > + ret = do_vdi_check(inode); > out: > free(inode); > return ret; > @@ -2591,7 +2699,7 @@ static int vdi_alter_copy(int argc, char **argv) > } > > static struct subcommand vdi_cmd[] = { > - {"check", "<vdiname>", "sapht", "check and repair image's consistency", > + {"check", "<vdiname>", "seapht", "check and repair image's consistency", > NULL, CMD_NEED_NODELIST|CMD_NEED_ARG, > vdi_check, vdi_options}, > {"create", "<vdiname> <size>", "Pycaphrvt", "create an image", > @@ -2735,6 +2843,9 @@ static int vdi_parser(int ch, const char *opt) > exit(EXIT_FAILURE); > } > break; > + case 'e': > + vdi_cmd_data.exist = true; > + break; > } > > return 0; > diff --git a/include/internal_proto.h b/include/internal_proto.h > index 2affc42..37afb46 100644 > --- a/include/internal_proto.h > +++ b/include/internal_proto.h > @@ -107,6 +107,7 @@ > #define SD_OP_PREVENT_INODE_UPDATE 0xC3 > #define SD_OP_ALLOW_INODE_UPDATE 0xC4 > #define SD_OP_REPAIR_REPLICA 0xC5 > +#define SD_OP_OIDS_EXIST 0xC6 > > /* internal flags for hdr.flags, must be above 0x80 */ > #define SD_FLAG_CMD_RECOVERY 0x0080 > @@ -180,6 +181,14 @@ struct sd_node { > #endif > }; > > +struct oid_entry { > + struct rb_node rb; > + struct sd_node *node; /* key */ > + uint64_t *oids; /* object id array */ > + int end; /* idx to the end of the allocated oid array */ > + int last; /* idx to the last element of the oid array */ > +}; > + > /* > * A joining sheep multicasts the local cluster info. Then, the existing > nodes > * reply the latest cluster info which is unique among all of the nodes. > diff --git a/include/sheep.h b/include/sheep.h > index e062372..5b136a8 100644 > --- a/include/sheep.h > +++ b/include/sheep.h > @@ -199,6 +199,12 @@ static inline int node_cmp(const struct sd_node *node1, > return node_id_cmp(&node1->nid, &node2->nid); > } > > +static inline int oid_entry_cmp(const struct oid_entry *entry1, > + const struct oid_entry *entry2) > +{ > + return node_cmp(entry1->node, entry2->node); > +} > + > static inline bool node_eq(const struct sd_node *a, const struct sd_node *b) > { > return node_cmp(a, b) == 0; > diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h > index d6a8d35..b4e1e13 100644 > --- a/include/sheepdog_proto.h > +++ b/include/sheepdog_proto.h > @@ -50,6 +50,7 @@ > #define SD_FLAG_CMD_COW 0x02 > #define SD_FLAG_CMD_CACHE 0x04 > #define SD_FLAG_CMD_DIRECT 0x08 /* don't use object cache */ > +#define SD_FLAG_CMD_FILTER 0x11 /* write & read, output is subset of input > */ > /* flags above 0x80 are sheepdog-internal */ > > #define SD_RES_SUCCESS 0x00 /* Success */ > diff --git a/lib/net.c b/lib/net.c > index b32e022..552e945 100644 > --- a/lib/net.c > +++ b/lib/net.c > @@ -334,6 +334,9 @@ int exec_req(int sockfd, struct sd_req *hdr, void *data, > if (hdr->flags & SD_FLAG_CMD_WRITE) { > wlen = hdr->data_length; > rlen = 0; > + } else if (hdr->flags & SD_FLAG_CMD_FILTER) { > + wlen = hdr->data_length; > + rlen = hdr->data_length; > } else { > wlen = 0; > rlen = hdr->data_length; > diff --git a/sheep/ops.c b/sheep/ops.c > index dc10f0f..3d20c7d 100644 > --- a/sheep/ops.c > +++ b/sheep/ops.c > @@ -1056,6 +1056,30 @@ static int local_oid_exist(struct request *req) > return SD_RES_NO_OBJ; > } > > +static int local_oids_exist(const struct sd_req *req, struct sd_rsp *rsp, > + void *data) > +{ > + struct request *r = container_of(req, struct request, rq); > + uint64_t *oids = (uint64_t *) data; > + uint8_t ec_index; > + int i, j, n = req->data_length / sizeof(uint64_t); > + > + for (i = 0, j = 0; i < n; i++) { > + ec_index = local_ec_index(r->vinfo, oids[i]); > + if (is_erasure_oid(oids[i]) && ec_index == SD_MAX_COPIES) > + oids[j++] = oids[i]; > + else if (!sd_store->exist(oids[i], ec_index)) > + oids[j++] = oids[i]; > + } > + > + if (j > 0) { > + rsp->data_length = sizeof(uint64_t) * j; > + return SD_RES_NO_OBJ; > + } > + > + return SD_RES_SUCCESS; > +} > + > static int local_cluster_info(const struct sd_req *req, struct sd_rsp *rsp, > void *data) > { > @@ -1594,6 +1618,13 @@ static struct sd_op_template sd_ops[] = { > .process_work = local_oid_exist, > }, > > + [SD_OP_OIDS_EXIST] = { > + .name = "OIDS_EXIST", > + .type = SD_OP_TYPE_LOCAL, > + .force = true, > + .process_main = local_oids_exist, > + }, > + > [SD_OP_CLUSTER_INFO] = { > .name = "CLUSTER INFO", > .type = SD_OP_TYPE_LOCAL, > -- > 1.8.3.2 > > > -- > sheepdog mailing list > sheepdog@lists.wpkg.org > http://lists.wpkg.org/mailman/listinfo/sheepdog -- sheepdog mailing list sheepdog@lists.wpkg.org http://lists.wpkg.org/mailman/listinfo/sheepdog