This patch provide sheepdog with live snapshot. NOTE: To work this patch correctly, it's needed to apply the patch which adds vm_clock_nsec and vm_state_size to sd_inode to collie.
Signed-off-by: OZAWA Tsuyoshi <ozawa.tsuyo...@lab.ntt.co.jp> --- block/sheepdog.c | 284 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 files changed, 280 insertions(+), 4 deletions(-) diff --git a/block/sheepdog.c b/block/sheepdog.c index 18ecd22..ded7c75 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -76,6 +76,9 @@ #define SD_RES_VDI_NOT_LOCKED 0x17 /* Vdi is not locked */ #define SD_RES_SHUTDOWN 0x18 /* Sheepdog is shutting down */ +#define VDI_SNAPSHOT_MASK 0x7fffffff00000000 +#define VDI_SNAPSHOT_BIT 0x4000000000000000 + /* should be configurable? */ #define MAX_RETRIES 6 @@ -209,7 +212,9 @@ struct sd_inode { uint64_t oid; uint64_t ctime; uint64_t snap_ctime; + uint64_t vm_clock_nsec; uint64_t vdi_size; + uint64_t vm_state_size; uint16_t copy_policy; uint8_t nr_copies; uint8_t block_size_shift; @@ -1043,6 +1048,23 @@ static int read_vdi_obj(char *buf, uint64_t oid, int *copies) } } +static void set_vdi_index_and_offset(int *vdi_index, int *offset, + int64_t pos, int size) +{ + int next_offset, overflow; + + *vdi_index = pos / SD_DATA_OBJ_SIZE; + *offset = pos % SD_DATA_OBJ_SIZE; + next_offset = *offset + size; + overflow = next_offset / SD_DATA_OBJ_SIZE; + if (overflow && (next_offset % SD_DATA_OBJ_SIZE != 0)) { + /* change to write data to next vdi */ + (*vdi_index)++; + *offset = 0; + } + return; +} + /* TODO: error cleanups */ static int sd_open(BlockDriverState *bs, const char *filename, int flags) { @@ -1572,9 +1594,15 @@ static BlockDriverAIOCB *sd_aio_readv(BlockDriverState *bs, static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) { struct bdrv_sd_state *s = bs->opaque; - int ret; + int ret, fd; + struct sd_obj_req hdr; + unsigned int rlen, wlen; + uint64_t new_oid; + struct sd_inode *inode; - eprintf("%s %s %s %d %d\n", sn_info->name, sn_info->id_str, + sd_release(bs); + eprintf("sn_info: name %s id_str %s s: name %s vm_state_size %d" + "is_current %d\n", sn_info->name, sn_info->id_str, s->name, sn_info->vm_state_size, s->is_current); if (!s->is_current) { @@ -1587,12 +1615,156 @@ static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info) dprintf("%s %s\n", sn_info->name, sn_info->id_str); - ret = do_sd_create(s->name, sn_info->name, s->inode.vdi_size >> 9, - s->inode.oid, NULL, 1); + s->inode.vm_state_size = sn_info->vm_state_size; + s->inode.vm_clock_nsec = sn_info->vm_clock_nsec; + + /* refresh inode. */ + fd = connect_to_vost(); + if (fd < 0) { + ret = -EIO; + goto cleanup; + } + + memset(&hdr, 0, sizeof(hdr)); + hdr.opcode = SD_OP_WRITE_OBJ; + + hdr.oid = s->inode.oid; + hdr.copies = s->inode.nr_copies; + + hdr.flags |= SD_FLAG_CMD_WRITE; + hdr.data_length = SD_INODE_SIZE; + hdr.offset = 0; + wlen = SD_INODE_SIZE; + rlen = 0; + + ret = do_req(fd, (struct sd_req *)&hdr, &s->inode, &wlen, &rlen); + if (ret < 0) { + eprintf("do_req write\n"); + ret = -EIO; + goto cleanup; + } + + if (fd < 0) { + ret = -EIO; + goto cleanup; + } + + ret = do_sd_create(s->name, NULL, s->inode.vdi_size >> 9, + s->inode.oid, &new_oid, 1); + if (ret < 0) { + eprintf("do_sd_create %m"); + ret = -EIO; + goto cleanup; + } + + inode = (struct sd_inode *)malloc(sizeof(struct sd_inode)); + if (!inode) { + eprintf("malloc %m"); + goto cleanup; + } + + memset(&hdr, 0, sizeof(hdr)); + + hdr.opcode = SD_OP_READ_OBJ; + hdr.oid = new_oid; + hdr.data_length = SD_INODE_SIZE; + hdr.offset = 0; + + wlen = 0; + rlen = SD_INODE_SIZE; + + ret = do_req(fd, (struct sd_req *)&hdr, inode, &wlen, &rlen); + if (ret < 0) { + eprintf("do_req read\n"); + ret = -EIO; + goto cleanup; + } + + memcpy(&s->inode, inode, sizeof(struct sd_inode)); + eprintf("s->inode: name %s snap_id %x oid %lxn", + s->inode.name, s->inode.snap_id, s->inode.oid); + +cleanup: + close(fd); + return ret; +} + +static int sd_snapshot_goto(BlockDriverState *bs, const char *snapshot_id) +{ + struct bdrv_sd_state *s = bs->opaque; + struct bdrv_sd_state *old_s; + char vdi[256]; + char *buf = NULL; + uint64_t oid; + uint32_t snapid = 0; + int ret = -ENOENT, dummy; + + + old_s = malloc(sizeof(struct bdrv_sd_state)); + if (!old_s) { + eprintf("malloc"); + goto out; + } + + memcpy(old_s, s, sizeof(struct bdrv_sd_state)); + sd_release(bs); + + snapid = strtol(snapshot_id, NULL, 16); + if (!snapid) { + eprintf("Invalid snapshot_id\n"); + goto out; + } + + buf = malloc(SD_INODE_SIZE); + if (!buf) { + eprintf("Failed to allocate memory\n"); + goto out; + } + strncpy(vdi, s->name, strlen(s->name)+1); + ret = find_vdi_name(s, vdi, snapid, &oid); + if (ret) { + eprintf("Failed to find_vdi_name\n"); + ret = -ENOENT; + goto out; + } + + ret = read_vdi_obj(buf, oid, &dummy); + if (ret) { + ret = -ENOENT; + goto out; + } + + memcpy(&s->inode, buf, sizeof(s->inode)); + + if (!s->inode.vm_state_size) { + eprintf("Invalid snapshot\n"); + ret = -ENOENT; + goto out; + } + + s->is_current = 0; + + free(buf); + free(old_s); + + return 0; +out: + /* recover bdrv_sd_state */ + memcpy(s, old_s, sizeof(struct bdrv_sd_state)); + free(buf); + free(old_s); + + eprintf("failed to open. recover old bdrv_sd_state.\n"); return ret; } +static int sd_snapshot_delete(BlockDriverState *bs, const char *snapshot_id) +{ + /* FIXME: Delete snapshot specified by snapshot_id */ + return 0; +} + struct sd_so_req { uint8_t proto_ver; uint8_t opcode; @@ -1696,6 +1868,8 @@ static int sd_snapshot_list(BlockDriverState *bs, QEMUSnapshotInfo **psn_tab) if (!strcmp(inode.name, s->name) && inode.snap_ctime) { sn_tab[found].date_sec = inode.snap_ctime >> 32; sn_tab[found].date_nsec = inode.snap_ctime & 0xffffffff; + sn_tab[found].vm_state_size = inode.vm_state_size; + sn_tab[found].vm_clock_nsec = inode.vm_clock_nsec; snprintf(sn_tab[found].id_str, sizeof(sn_tab[found].id_str), "%u", inode.snap_id); @@ -1710,6 +1884,100 @@ out: return found; } +static int sd_save_vmstate(BlockDriverState *bs, const uint8_t *data, + int64_t pos, int size) +{ + struct bdrv_sd_state *s = bs->opaque; + struct sd_obj_req hdr; + int fd; + int ret = 0; + unsigned int rlen = 0, wlen = 0; + uint64_t sn_oid; + int vdi_index, offset; + + set_vdi_index_and_offset(&vdi_index, &offset, pos, size); + + sn_oid = VDI_SNAPSHOT_MASK & s->inode.oid; + sn_oid |= VDI_SNAPSHOT_BIT; + sn_oid |= vdi_index; + + fd = connect_to_vost(); + if (fd < 0) { + ret = -EIO; + goto cleanup; + } + + memset(&hdr, 0, sizeof(hdr)); + if (offset) + hdr.opcode = SD_OP_WRITE_OBJ; + else + hdr.opcode = SD_OP_CREATE_AND_WRITE_OBJ; + + hdr.oid = sn_oid; + hdr.cow_oid = 0; + hdr.copies = s->inode.nr_copies; + + hdr.flags |= SD_FLAG_CMD_WRITE; + hdr.data_length = size; + hdr.offset = offset; + wlen = size; + rlen = 0; + + ret = do_req(fd, (struct sd_req *)&hdr, data, &wlen, &rlen); + if (ret < 0) { + eprintf("do_req %m"); + ret = -EIO; + } + +cleanup: + close(fd); + return ret; +} + +static int sd_load_vmstate(BlockDriverState *bs, uint8_t *data, + int64_t pos, int size) +{ + struct bdrv_sd_state *s = bs->opaque; + int ret = 0; + int fd; + unsigned int rlen = 0, wlen = 0; + struct sd_obj_req hdr; + uint64_t sn_oid; + int vdi_index, offset; + + set_vdi_index_and_offset(&vdi_index, &offset, pos, size); + + sn_oid = VDI_SNAPSHOT_MASK & s->inode.oid; + sn_oid |= VDI_SNAPSHOT_BIT; + sn_oid |= vdi_index; + + fd = connect_to_vost(); + if (fd < 0) { + ret = -EIO; + goto cleanup; + } + memset(&hdr, 0, sizeof(hdr)); + + hdr.opcode = SD_OP_READ_OBJ; + hdr.oid = sn_oid; + hdr.data_length = size; + hdr.offset = offset; + + wlen = 0; + rlen = size; + + ret = do_req(fd, (struct sd_req *)&hdr, data, &wlen, &rlen); + if (ret < 0) { + eprintf("do_req %m"); + ret = -EIO; + } + +cleanup: + close(fd); + return rlen; +} + + static QEMUOptionParameter sd_create_options[] = { { .name = BLOCK_OPT_SIZE, @@ -1738,8 +2006,16 @@ BlockDriver bdrv_sheepdog = { .bdrv_aio_writev = sd_aio_writev, .bdrv_snapshot_create = sd_snapshot_create, + .bdrv_snapshot_goto = sd_snapshot_goto, + .bdrv_snapshot_delete = sd_snapshot_delete, .bdrv_snapshot_list = sd_snapshot_list, + + + .bdrv_save_vmstate = sd_save_vmstate, + .bdrv_load_vmstate = sd_load_vmstate, + .create_options = sd_create_options, + }; static void bdrv_sheepdog_init(void) -- 1.6.5 -- sheepdog mailing list sheepdog@lists.wpkg.org http://lists.wpkg.org/mailman/listinfo/sheepdog