Current "dog vdi check" cannot work well in a case of inode object lost. This patch lets the subcommand handle the case correctly.
Signed-off-by: Hitoshi Mitake <[email protected]> --- dog/common.c | 12 +++-- dog/dog.h | 2 + dog/vdi.c | 114 +++++++++++++++++++++++++++++++++++++++++++++-- tests/functional/077 | 15 +++++++ tests/functional/077.out | 2 + 5 files changed, 139 insertions(+), 6 deletions(-) diff --git a/dog/common.c b/dog/common.c index 59d38dc..649f303 100644 --- a/dog/common.c +++ b/dog/common.c @@ -55,8 +55,8 @@ char *strnumber(uint64_t size) return strnumber_raw(size, raw_output); } -int sd_read_object(uint64_t oid, void *data, unsigned int datalen, - uint64_t offset, bool direct) +int do_sd_read_object(struct node_id *nid, uint64_t oid, void *data, + unsigned int datalen, uint64_t offset, bool direct) { struct sd_req hdr; struct sd_rsp *rsp = (struct sd_rsp *)&hdr; @@ -70,7 +70,7 @@ int sd_read_object(uint64_t oid, void *data, unsigned int datalen, if (direct) hdr.flags |= SD_FLAG_CMD_DIRECT; - ret = dog_exec_req(&sd_nid, &hdr, data); + ret = dog_exec_req(nid, &hdr, data); if (ret < 0) { sd_err("Failed to read object %" PRIx64, oid); return SD_RES_EIO; @@ -85,6 +85,12 @@ int sd_read_object(uint64_t oid, void *data, unsigned int datalen, return SD_RES_SUCCESS; } +int sd_read_object(uint64_t oid, void *data, unsigned int datalen, + uint64_t offset, bool direct) +{ + return do_sd_read_object(&sd_nid, oid, data, datalen, offset, direct); +} + int sd_write_object(uint64_t oid, uint64_t cow_oid, void *data, unsigned int datalen, uint64_t offset, uint32_t flags, uint8_t copies, uint8_t copy_policy, bool create, diff --git a/dog/dog.h b/dog/dog.h index af3cefa..f46297b 100644 --- a/dog/dog.h +++ b/dog/dog.h @@ -71,6 +71,8 @@ typedef void (*vdi_parser_func_t)(uint32_t vid, const char *name, uint32_t flags, const struct sd_inode *i, void *data); int parse_vdi(vdi_parser_func_t func, size_t size, void *data); +int do_sd_read_object(struct node_id *nid, uint64_t oid, void *data, + unsigned int datalen, uint64_t offset, bool direct); int sd_read_object(uint64_t oid, void *data, unsigned int datalen, uint64_t offset, bool direct); int sd_write_object(uint64_t oid, uint64_t cow_oid, void *data, diff --git a/dog/vdi.c b/dog/vdi.c index bfee11f..909de35 100644 --- a/dog/vdi.c +++ b/dog/vdi.c @@ -532,6 +532,114 @@ static int read_vdi_obj(const char *vdiname, int snapid, const char *tag, return EXIT_SUCCESS; } +static int read_majority_vdi_obj(const char *vdiname, int snapid, + const char *tag, uint32_t *pvid, + struct sd_inode *inode, + size_t size) +{ + int ret; + uint32_t vid; + uint64_t oid; + const struct sd_vnode *vnodes[SD_MAX_COPIES]; + /* + * FIXME: we should handle a case of + * inode->nr_copies != SD_DEFAULT_COPIES + */ + + struct { + bool found; + uint8_t digest[20]; + } results[SD_DEFAULT_COPIES]; + + int count = 0, nr_live_copies = 0; + int majority_idx = -1; + + ret = find_vdi_name(vdiname, snapid, tag, &vid, 0); + if (ret < 0) { + sd_err("Failed to open VDI %s", vdiname); + return EXIT_FAILURE; + } + + oid = vid_to_vdi_oid(vid); + oid_to_vnodes(oid, &sd_vroot, SD_DEFAULT_COPIES, vnodes); + + for (int i = 0; i < SD_DEFAULT_COPIES; i++) { + struct sd_req hdr; + struct sd_rsp *rsp = (struct sd_rsp *)&hdr; + + sd_init_req(&hdr, SD_OP_GET_HASH); + + hdr.obj.oid = oid; + hdr.obj.tgt_epoch = sd_epoch; + + ret = dog_exec_req(&vnodes[i]->node->nid, &hdr, NULL); + if (ret < 0) + exit(EXIT_SYSFAIL); + + switch (rsp->result) { + case SD_RES_SUCCESS: + results[i].found = true; + memcpy(results[i].digest, rsp->hash.digest, + sizeof(results[i].digest)); + break; + case SD_RES_NO_OBJ: + results[i].found = false; + break; + default: + sd_err("failed to read %" PRIx64 " from %s, %s", oid, + addr_to_str(vnodes[i]->node->nid.addr, + vnodes[i]->node->nid.port), + sd_strerror(rsp->result)); + exit(EXIT_FAILURE); + } + + } + + /* Boyer Moore MJRTY */ + for (int i = 0; i < SD_DEFAULT_COPIES; i++) { + if (!results[i].found) + continue; + nr_live_copies++; + + if (!count) + majority_idx = i; + + if (!memcmp(results[majority_idx].digest, results[i].digest, + sizeof(results[majority_idx].digest))) + count++; + else + count--; + } + + if (majority_idx == -1) { + sd_err("no inode object (%" PRIx64 ") found", oid); + return EXIT_FAILURE; + } else if (count < nr_live_copies / 2) { + sd_err("no majority inode object (%" PRIx64 ") found", oid); + return EXIT_FAILURE; + } + + ret = do_sd_read_object((struct node_id *) + &vnodes[majority_idx]->node->nid, oid, inode, + size, 0, true); + if (ret != SD_RES_SUCCESS) { + if (snapid) { + sd_err("Failed to read a snapshot %s:%d", vdiname, + snapid); + } else if (tag && tag[0]) { + sd_err("Failed to read a snapshot %s:%s", vdiname, tag); + } else { + sd_err("Failed to read a vdi %s", vdiname); + } + return EXIT_FAILURE; + } + + if (pvid) + *pvid = vid; + + return EXIT_SUCCESS; +} + int do_vdi_create(const char *vdiname, int64_t vdi_size, uint32_t base_vid, uint32_t *vdi_id, bool snapshot, uint8_t nr_copies, uint8_t copy_policy, uint8_t store_policy) @@ -1880,9 +1988,9 @@ static int vdi_check(int argc, char **argv) int ret; struct sd_inode *inode = xmalloc(sizeof(*inode)); - ret = read_vdi_obj(vdiname, vdi_cmd_data.snapshot_id, - vdi_cmd_data.snapshot_tag, NULL, inode, - SD_INODE_SIZE); + ret = read_majority_vdi_obj(vdiname, vdi_cmd_data.snapshot_id, + vdi_cmd_data.snapshot_tag, NULL, inode, + SD_INODE_SIZE); if (ret != EXIT_SUCCESS) { sd_err("FATAL: no inode objects"); return ret; diff --git a/tests/functional/077 b/tests/functional/077 index f2c2211..7f14b8c 100755 --- a/tests/functional/077 +++ b/tests/functional/077 @@ -68,6 +68,20 @@ $DOG vdi read test 0 14 $DOG cluster shutdown +# single inode object lost + +rm $STORE/0/obj/807c2b2500000000 + +for i in `seq 0 2`; do + _start_sheep $i +done + +_wait_for_sheep 3 + +$DOG vdi check test + +$DOG cluster shutdown + # single object lost, single broken object. no majority rm $STORE/0/obj/007c2b2500000000 @@ -82,3 +96,4 @@ _wait_for_sheep 3 $DOG vdi check test $DOG cluster shutdown + diff --git a/tests/functional/077.out b/tests/functional/077.out index 5bddb9b..e76b1ee 100644 --- a/tests/functional/077.out +++ b/tests/functional/077.out @@ -12,5 +12,7 @@ fixed missing 7c2b2500000000 fixed missing 7c2b2500000000 finish check&repair test original data +fixed missing 807c2b2500000000 +finish check&repair test no majority of 7c2b2500000000 finish check&repair test -- 1.8.1.2 -- sheepdog mailing list [email protected] http://lists.wpkg.org/mailman/listinfo/sheepdog
