restore support

tao.peng Thu, 29 Dec 2011 22:51:09 -0800

> -----Original Message-----
> From: [email protected] 
> [mailto:[email protected]] On Behalf Of Liu Yuan
> Sent: Sunday, December 25, 2011 11:43 PM
> To: [email protected]
> Cc: [email protected]
> Subject: [Sheepdog] [PATCH v4 11/12] sheep: add cluster snapshot/restore 
> support
> 
> From: Liu Yuan <[email protected]>
> 
> This kind of snapshot is supposed be triggered by user, _not_ by recovery
> code. I don't think we need to restore to the state at the beginning of
> the recovery. So this work only permits us to restore cluster to the
> snapshot initiated by end users, thought it is quite easy to implement
> to restore to the snapshots forcibly taken by recovery path.
> 
> TODO:
> - check the nodes state consistency besides object recovery.
> 
> Signed-off-by: Liu Yuan <[email protected]>
> ---
>  include/sheep.h    |    5 ++-
>  sheep/farm/farm.c  |  134 
> ++++++++++++++++++++++++++++++++++++++++++++++++++++
>  sheep/ops.c        |   58 ++++++++++++++++++++++
>  sheep/sheep_priv.h |    4 ++
>  4 files changed, 200 insertions(+), 1 deletions(-)
> 
> diff --git a/include/sheep.h b/include/sheep.h
> index 906c1f5..f90b968 100644
> --- a/include/sheep.h
> +++ b/include/sheep.h
> @@ -37,7 +37,10 @@
>  #define SD_OP_STAT_CLUSTER   0x87
>  #define SD_OP_KILL_NODE      0x88
>  #define SD_OP_GET_VDI_ATTR   0x89
> -#define SD_OP_RECOVER             0x8A
> +#define SD_OP_RECOVER        0x8a
> +#define SD_OP_SNAPSHOT       0x90
> +#define SD_OP_RESTORE        0x91
> +#define SD_OP_SNAP_FILE      0x92
> 
>  #define SD_FLAG_CMD_IO_LOCAL   0x0010
>  #define SD_FLAG_CMD_RECOVERY 0x0020
> diff --git a/sheep/farm/farm.c b/sheep/farm/farm.c
> index 813e389..1c26d76 100644
> --- a/sheep/farm/farm.c
> +++ b/sheep/farm/farm.c
> @@ -410,6 +410,137 @@ out:
>       return ret;
>  }
> 
> +static int farm_snapshot(struct siocb *iocb)
> +{
> +     unsigned char snap_sha1[SHA1_LEN];
> +     void *buffer;
> +     int log_nr, ret = SD_RES_EIO, epoch;
> +
> +     buffer = snap_log_read(&log_nr, 1);
> +     if (!buffer)
> +             goto out;
> +
> +     epoch = log_nr + 1;
> +     dprintf("user epoch %d\n", epoch);
Will there be concurrent snapshot creators?


> +     if (snap_file_write(epoch, snap_sha1, 1) < 0)
> +             goto out;
> +
> +     if (snap_log_write(epoch, snap_sha1, 1) < 0)
> +             goto out;
> +
> +     ret = SD_RES_SUCCESS;
> +out:
> +     free(buffer);
> +     return ret;
> +}
> +
> +static int cleanup_working_dir(void)
> +{
> +     DIR *dir;
> +     struct dirent *d;
> +
> +     dprintf("try clean up working dir\n");
> +     dir = opendir(obj_path);
> +     if (!dir)
> +             return -1;
> +
> +     while ((d = readdir(dir))) {
> +             char p[PATH_MAX];
> +             if (!strncmp(d->d_name, ".", 1))
> +                     continue;
> +             snprintf(p, sizeof(p), "%s%s", obj_path, d->d_name);
> +             if (unlink(p) < 0) {
> +                     eprintf("%s:%m\n", p);
> +                     continue;
> +             }
> +             dprintf("remove file %s\n", d->d_name);
> +     }
> +     closedir(dir);
> +     return 0;
> +}
> +
> +static int restore_objects_from_snap(int epoch)
> +{
> +     struct sha1_file_hdr hdr;
> +     struct trunk_entry *trunk_buf, *trunk_free = NULL;
> +     unsigned char trunk_sha1[SHA1_LEN];
> +     uint64_t nr_trunks, i;
> +     int ret = SD_RES_EIO;
> +
> +     if (get_trunk_sha1(epoch, trunk_sha1, 1) < 0)
> +             goto out;
> +
> +     trunk_free = trunk_buf = trunk_file_read(trunk_sha1, &hdr);
> +     if (!trunk_buf)
> +             goto out;
> +
> +     nr_trunks = hdr.priv;
> +     for (i = 0; i < nr_trunks; i++, trunk_buf++) {
> +             struct sha1_file_hdr h;
> +             struct siocb io = { 0 };
> +             uint64_t oid;
> +             void *buffer = NULL;
> +
> +             oid = trunk_buf->oid;
> +             buffer = sha1_file_read(trunk_buf->sha1, &h);
> +             if (!buffer) {
> +                     eprintf("oid %"PRIx64" not restored\n", oid);
> +                     goto out;
> +             }
> +             io.length = h.size;
> +             io.buf = buffer;
> +             ret = farm_atomic_put(oid, &io);
> +             if (ret != SD_RES_SUCCESS) {
> +                     eprintf("oid %"PRIx64" not restored\n", oid);
> +                     goto out;
> +             } else
> +                     dprintf("oid %"PRIx64" restored\n", oid);
nit... if one section of "if...else" has brackets, better add them to the other 
section as well :)

> +
> +             free(buffer);
> +     }
> +out:
> +     free(trunk_free);
> +     return ret;
> +}
> +
> +static int farm_restore(struct siocb *iocb)
> +{
> +     int ret = SD_RES_EIO, epoch = iocb->epoch;
> +
> +     dprintf("try recover user epoch %d\n", epoch);
> +
> +     if (cleanup_working_dir() < 0) {
> +             eprintf("failed to clean up the working dir %m\n");
> +             goto out;
> +     }
> +
> +     ret = restore_objects_from_snap(epoch);
> +     if (ret != SD_RES_SUCCESS)
> +             goto out;
> +out:
> +     return ret;
> +}
> +
> +static int farm_get_snap_file(struct siocb *iocb)
> +{
> +     int ret = SD_RES_EIO;
> +     void *buffer = NULL;
> +     size_t size;
> +     int nr;
> +
> +     dprintf("try get snap file\n");
> +     buffer = snap_log_read(&nr, 1);
> +     if (!buffer)
> +             goto out;
> +     size = nr * sizeof(struct snap_log);
> +     memcpy(iocb->buf, buffer, size);
> +     iocb->length = size;
> +     ret = SD_RES_SUCCESS;
> +out:
> +     free(buffer);
> +     return ret;
> +}
> +
>  struct store_driver farm = {
>       .driver_name = "farm",
>       .init = farm_init,
> @@ -422,4 +553,7 @@ struct store_driver farm = {
>       .atomic_put = farm_atomic_put,
>       .begin_recover = farm_begin_recover,
>       .end_recover = farm_end_recover,
> +     .snapshot = farm_snapshot,
> +     .restore = farm_restore,
> +     .get_snap_file = farm_get_snap_file,
>  };
> diff --git a/sheep/ops.c b/sheep/ops.c
> index 13ecdf2..5300039 100644
> --- a/sheep/ops.c
> +++ b/sheep/ops.c
> @@ -13,6 +13,8 @@
> 
>  #include "sheep_priv.h"
> 
> +extern struct store_driver store;
> +
>  enum sd_op_type {
>       SD_OP_TYPE_CLUSTER = 1, /* cluster operations */
>       SD_OP_TYPE_LOCAL,       /* local operations */
> @@ -383,6 +385,45 @@ out:
>       return ret;
>  }
> 
> +static int cluster_snapshot(const struct sd_req *req, struct sd_rsp *rsp,
> +                         void *data)
> +{
> +     int ret = SD_RES_SUCCESS;
Better return some error instead of success if snapshot is not supported.

> +     struct siocb iocb = { 0 };
> +
> +     if (store.snapshot)
> +             ret = store.snapshot(&iocb);
How does the above become a cluster wide snapshot? From what I read, 
farm.snapshot only ensures single node's dirty objects to be snapshotted. Or am 
I missing something?

> +
> +     return ret;
> +}
> +
> +static int cluster_restore(const struct sd_req *req, struct sd_rsp *rsp,
> +                        void *data)
> +{
> +     const struct sd_vdi_req *hdr = (const struct sd_vdi_req *)req;
> +     int ret = SD_RES_SUCCESS;
ditto...

> +     struct siocb iocb = { .epoch = hdr->epoch };
> +
> +     if (store.restore)
> +             ret = store.restore(&iocb);
same here. farm.restore looks to be single node only...

Cheers,
Tao
-- 
sheepdog mailing list
[email protected]
http://lists.wpkg.org/mailman/listinfo/sheepdog

Re: [Sheepdog] [PATCH v4 11/12] sheep: add cluster snapshot/restore support

Reply via email to