This patch provide sheepdog with live snapshot.

NOTE: To work this patch correctly, it's needed to apply the patch
which adds vm_clock_nsec and vm_state_size to sd_inode to collie.

Signed-off-by: OZAWA Tsuyoshi <ozawa.tsuyo...@lab.ntt.co.jp>
---
 block/sheepdog.c |  284 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 280 insertions(+), 4 deletions(-)

diff --git a/block/sheepdog.c b/block/sheepdog.c
index 18ecd22..ded7c75 100644
--- a/block/sheepdog.c
+++ b/block/sheepdog.c
@@ -76,6 +76,9 @@
 #define SD_RES_VDI_NOT_LOCKED   0x17 /* Vdi is not locked */
 #define SD_RES_SHUTDOWN      0x18 /* Sheepdog is shutting down */
 
+#define VDI_SNAPSHOT_MASK      0x7fffffff00000000
+#define VDI_SNAPSHOT_BIT       0x4000000000000000
+
 /* should be configurable? */
 #define MAX_RETRIES 6
 
@@ -209,7 +212,9 @@ struct sd_inode {
        uint64_t oid;
        uint64_t ctime;
        uint64_t snap_ctime;
+       uint64_t vm_clock_nsec;
        uint64_t vdi_size;
+       uint64_t vm_state_size;
        uint16_t copy_policy;
        uint8_t  nr_copies;
        uint8_t  block_size_shift;
@@ -1043,6 +1048,23 @@ static int read_vdi_obj(char *buf, uint64_t oid, int 
*copies)
        }
 }
 
+static void set_vdi_index_and_offset(int *vdi_index, int *offset,
+                               int64_t pos, int size)
+{
+       int next_offset, overflow;
+
+       *vdi_index = pos / SD_DATA_OBJ_SIZE;
+       *offset = pos % SD_DATA_OBJ_SIZE;
+       next_offset = *offset + size;
+       overflow = next_offset / SD_DATA_OBJ_SIZE;
+       if (overflow && (next_offset % SD_DATA_OBJ_SIZE != 0)) {
+               /* change to write data to next vdi */
+               (*vdi_index)++;
+               *offset = 0;
+       }
+       return;
+}
+
 /* TODO: error cleanups */
 static int sd_open(BlockDriverState *bs, const char *filename, int flags)
 {
@@ -1572,9 +1594,15 @@ static BlockDriverAIOCB *sd_aio_readv(BlockDriverState 
*bs,
 static int sd_snapshot_create(BlockDriverState *bs, QEMUSnapshotInfo *sn_info)
 {
        struct bdrv_sd_state *s = bs->opaque;
-       int ret;
+       int ret, fd;
+       struct sd_obj_req hdr;
+       unsigned int rlen, wlen;
+       uint64_t new_oid;
+       struct sd_inode *inode;
 
-       eprintf("%s %s %s %d %d\n", sn_info->name, sn_info->id_str,
+       sd_release(bs);
+       eprintf("sn_info: name %s id_str %s s: name %s vm_state_size %d"
+               "is_current %d\n", sn_info->name, sn_info->id_str,
                s->name, sn_info->vm_state_size, s->is_current);
 
        if (!s->is_current) {
@@ -1587,12 +1615,156 @@ static int sd_snapshot_create(BlockDriverState *bs, 
QEMUSnapshotInfo *sn_info)
 
        dprintf("%s %s\n", sn_info->name, sn_info->id_str);
 
-       ret = do_sd_create(s->name, sn_info->name, s->inode.vdi_size >> 9,
-                          s->inode.oid, NULL, 1);
+       s->inode.vm_state_size = sn_info->vm_state_size;
+       s->inode.vm_clock_nsec = sn_info->vm_clock_nsec;
+
+       /* refresh inode. */
+       fd = connect_to_vost();
+       if (fd < 0) {
+               ret = -EIO;
+               goto cleanup;
+       }
+
+       memset(&hdr, 0, sizeof(hdr));
+       hdr.opcode = SD_OP_WRITE_OBJ;
+
+       hdr.oid = s->inode.oid;
+       hdr.copies = s->inode.nr_copies;
+
+       hdr.flags |= SD_FLAG_CMD_WRITE;
+       hdr.data_length = SD_INODE_SIZE;
+       hdr.offset = 0;
+       wlen = SD_INODE_SIZE;
+       rlen = 0;
+
+       ret = do_req(fd, (struct sd_req *)&hdr, &s->inode, &wlen, &rlen);
+       if (ret < 0) {
+               eprintf("do_req write\n");
+               ret = -EIO;
+               goto cleanup;
+       }
+
+       if (fd < 0) {
+               ret = -EIO;
+               goto cleanup;
+       }
+
+       ret = do_sd_create(s->name, NULL, s->inode.vdi_size >> 9,
+                          s->inode.oid, &new_oid, 1);
+       if (ret < 0) {
+               eprintf("do_sd_create %m");
+               ret = -EIO;
+               goto cleanup;
+       }
+
+       inode = (struct sd_inode *)malloc(sizeof(struct sd_inode));
+       if (!inode) {
+               eprintf("malloc %m");
+               goto cleanup;
+       }
+
+       memset(&hdr, 0, sizeof(hdr));
+
+       hdr.opcode = SD_OP_READ_OBJ;
+       hdr.oid = new_oid;
+       hdr.data_length = SD_INODE_SIZE;
+       hdr.offset = 0;
+
+       wlen = 0;
+       rlen = SD_INODE_SIZE;
+
+       ret = do_req(fd, (struct sd_req *)&hdr, inode, &wlen, &rlen);
+       if (ret < 0) {
+               eprintf("do_req read\n");
+               ret = -EIO;
+               goto cleanup;
+       }
+
+       memcpy(&s->inode, inode, sizeof(struct sd_inode));
+       eprintf("s->inode: name %s snap_id %x oid %lxn",
+               s->inode.name, s->inode.snap_id, s->inode.oid);
+
+cleanup:
+       close(fd);
+       return ret;
+}
+
+static int sd_snapshot_goto(BlockDriverState *bs, const char *snapshot_id)
+{
+       struct bdrv_sd_state *s = bs->opaque;
+       struct bdrv_sd_state *old_s;
+       char vdi[256];
+       char *buf = NULL;
+       uint64_t oid;
+       uint32_t snapid = 0;
+       int ret = -ENOENT, dummy;
+
+
+       old_s = malloc(sizeof(struct bdrv_sd_state));
+       if (!old_s) {
+               eprintf("malloc");
+               goto out;
+       }
+
+       memcpy(old_s, s, sizeof(struct bdrv_sd_state));
+       sd_release(bs);
+
+       snapid = strtol(snapshot_id, NULL, 16);
+       if (!snapid) {
+               eprintf("Invalid snapshot_id\n");
+               goto out;
+       }
+
+       buf = malloc(SD_INODE_SIZE);
+       if (!buf) {
+               eprintf("Failed to allocate memory\n");
+               goto out;
+       }
+       strncpy(vdi, s->name, strlen(s->name)+1);
+       ret = find_vdi_name(s, vdi, snapid, &oid);
+       if (ret) {
+               eprintf("Failed to find_vdi_name\n");
+               ret = -ENOENT;
+               goto out;
+       }
+
+       ret = read_vdi_obj(buf, oid, &dummy);
+       if (ret) {
+               ret = -ENOENT;
+               goto out;
+       }
+
+       memcpy(&s->inode, buf, sizeof(s->inode));
+
+       if (!s->inode.vm_state_size) {
+               eprintf("Invalid snapshot\n");
+               ret = -ENOENT;
+               goto out;
+       }
+
+       s->is_current = 0;
+
+       free(buf);
+       free(old_s);
+
+       return 0;
+out:
+       /* recover bdrv_sd_state */
+       memcpy(s, old_s, sizeof(struct bdrv_sd_state));
+       free(buf);
+       free(old_s);
+
+       eprintf("failed to open. recover old bdrv_sd_state.\n");
 
        return ret;
 }
 
+static int sd_snapshot_delete(BlockDriverState *bs, const char *snapshot_id)
+{
+       /* FIXME: Delete snapshot specified by snapshot_id */
+       return 0;
+}
+
 struct sd_so_req {
        uint8_t         proto_ver;
        uint8_t         opcode;
@@ -1696,6 +1868,8 @@ static int sd_snapshot_list(BlockDriverState *bs, 
QEMUSnapshotInfo **psn_tab)
                if (!strcmp(inode.name, s->name) && inode.snap_ctime) {
                        sn_tab[found].date_sec = inode.snap_ctime >> 32;
                        sn_tab[found].date_nsec = inode.snap_ctime & 0xffffffff;
+                       sn_tab[found].vm_state_size = inode.vm_state_size;
+                       sn_tab[found].vm_clock_nsec = inode.vm_clock_nsec;
 
                        snprintf(sn_tab[found].id_str, 
sizeof(sn_tab[found].id_str), "%u",
                                 inode.snap_id);
@@ -1710,6 +1884,100 @@ out:
        return found;
 }
 
+static int sd_save_vmstate(BlockDriverState *bs, const uint8_t *data,
+                               int64_t pos, int size)
+{
+       struct bdrv_sd_state *s = bs->opaque;
+       struct sd_obj_req hdr;
+       int fd;
+       int ret = 0;
+       unsigned int rlen = 0, wlen = 0;
+       uint64_t sn_oid;
+       int vdi_index, offset;
+
+       set_vdi_index_and_offset(&vdi_index, &offset, pos, size);
+
+       sn_oid = VDI_SNAPSHOT_MASK & s->inode.oid;
+       sn_oid |= VDI_SNAPSHOT_BIT;
+       sn_oid |= vdi_index;
+
+       fd = connect_to_vost();
+       if (fd < 0) {
+               ret = -EIO;
+               goto cleanup;
+       }
+
+       memset(&hdr, 0, sizeof(hdr));
+       if (offset)
+               hdr.opcode = SD_OP_WRITE_OBJ;
+       else
+               hdr.opcode = SD_OP_CREATE_AND_WRITE_OBJ;
+
+       hdr.oid = sn_oid;
+       hdr.cow_oid = 0;
+       hdr.copies = s->inode.nr_copies;
+
+       hdr.flags |= SD_FLAG_CMD_WRITE;
+       hdr.data_length = size;
+       hdr.offset = offset;
+       wlen = size;
+       rlen = 0;
+
+       ret = do_req(fd, (struct sd_req *)&hdr, data, &wlen, &rlen);
+       if (ret < 0) {
+               eprintf("do_req %m");
+               ret = -EIO;
+       }
+
+cleanup:
+       close(fd);
+       return ret;
+}
+
+static int sd_load_vmstate(BlockDriverState *bs, uint8_t *data,
+                               int64_t pos, int size)
+{
+       struct bdrv_sd_state *s = bs->opaque;
+       int ret = 0;
+       int fd;
+       unsigned int rlen = 0, wlen = 0;
+       struct sd_obj_req hdr;
+       uint64_t sn_oid;
+       int vdi_index, offset;
+
+       set_vdi_index_and_offset(&vdi_index, &offset, pos, size);
+
+       sn_oid = VDI_SNAPSHOT_MASK & s->inode.oid;
+       sn_oid |= VDI_SNAPSHOT_BIT;
+       sn_oid |= vdi_index;
+
+       fd = connect_to_vost();
+       if (fd < 0) {
+               ret = -EIO;
+               goto cleanup;
+       }
+       memset(&hdr, 0, sizeof(hdr));
+
+       hdr.opcode = SD_OP_READ_OBJ;
+       hdr.oid = sn_oid;
+       hdr.data_length = size;
+       hdr.offset = offset;
+
+       wlen = 0;
+       rlen = size;
+
+       ret = do_req(fd, (struct sd_req *)&hdr, data, &wlen, &rlen);
+       if (ret < 0) {
+               eprintf("do_req %m");
+               ret = -EIO;
+       }
+
+cleanup:
+       close(fd);
+       return rlen;
+}
+
+
 static QEMUOptionParameter sd_create_options[] = {
        {
                .name = BLOCK_OPT_SIZE,
@@ -1738,8 +2006,16 @@ BlockDriver bdrv_sheepdog = {
        .bdrv_aio_writev = sd_aio_writev,
 
        .bdrv_snapshot_create = sd_snapshot_create,
+       .bdrv_snapshot_goto = sd_snapshot_goto,
+       .bdrv_snapshot_delete   = sd_snapshot_delete,
        .bdrv_snapshot_list = sd_snapshot_list,
+
+
+       .bdrv_save_vmstate    = sd_save_vmstate,
+       .bdrv_load_vmstate    = sd_load_vmstate,
+
        .create_options = sd_create_options,
+
 };
 
 static void bdrv_sheepdog_init(void)
-- 
1.6.5

-- 
sheepdog mailing list
sheepdog@lists.wpkg.org
http://lists.wpkg.org/mailman/listinfo/sheepdog

Reply via email to