This patch adds two new internal sheep operation: SD_OP_SYNC_VDI and
SD_OP_FLUSH_PEER for implementing writeback cache semantics in backend stores.

If writeback cache semantics is used in backend stores, explicit
flushing in all sheeps is required when gateway sheep receives SD_OP_FLUSH_VDI.

After applying this patch, SD_OP_SYNC_VDI will be queued as a gateway
request when sheep receives SD_OP_FLUSH_VDI. SD_OP_SYNC_VDI forwards
SD_OP_FLUSH_PEER to all other sheeps. After receiving the
SD_OP_FLUSH_PEER, sheeps flush their cache of backend stores.

This patch also modifies command line option of sheep. -w was used for
enabling object cache and specyfing size of it. After applying this
patch, -w is also used for enabling writeback cache semantics in
backend stores. Example of new -w is like this:
-w disk ... enable writeback cache semantics of disks
-w disk,object:50 ... enable writeback cache semantics of disks, and
enable object cache with 50MB memory
-w object:50 ... enable object cache with 50MB memory

Cc: MORITA Kazutaka <[email protected]>
Cc: Liu Yuan <[email protected]>
Signed-off-by: Hitoshi Mitake <[email protected]>
---
 include/internal_proto.h |    2 +
 sheep/gateway.c          |    5 ++
 sheep/ops.c              |   36 ++++++++++++++++-
 sheep/plain_store.c      |    2 +
 sheep/sheep.c            |   97 ++++++++++++++++++++++++++++++++++++---------
 sheep/sheep_priv.h       |    7 +++-
 sheep/store.c            |    5 +-
 7 files changed, 127 insertions(+), 27 deletions(-)

diff --git a/include/internal_proto.h b/include/internal_proto.h
index c1d116a..6c09af8 100644
--- a/include/internal_proto.h
+++ b/include/internal_proto.h
@@ -65,6 +65,8 @@
 #define SD_OP_INFO_RECOVER 0xAA
 #define SD_OP_GET_VDI_COPIES 0xAB
 #define SD_OP_COMPLETE_RECOVERY 0xAC
+#define SD_OP_SYNC_VDI 0xAD
+#define SD_OP_FLUSH_PEER 0xAE
 
 /* internal flags for hdr.flags, must be above 0x80 */
 #define SD_FLAG_CMD_RECOVERY 0x0080
diff --git a/sheep/gateway.c b/sheep/gateway.c
index 98ba079..88ec3a3 100644
--- a/sheep/gateway.c
+++ b/sheep/gateway.c
@@ -334,3 +334,8 @@ int gateway_remove_obj(struct request *req)
 {
        return gateway_forward_request(req, 0);
 }
+
+int gateway_sync_vdi(struct request *req)
+{
+       return gateway_forward_request(req, 1);
+}
diff --git a/sheep/ops.c b/sheep/ops.c
index ccb1c5e..bb96625 100644
--- a/sheep/ops.c
+++ b/sheep/ops.c
@@ -647,9 +647,22 @@ static int local_get_snap_file(struct request *req)
 
 static int local_flush_vdi(struct request *req)
 {
-       if (!sys->enable_write_cache)
-               return SD_RES_SUCCESS;
-       return object_cache_flush_vdi(req);
+       int ret = SD_RES_SUCCESS;
+
+       if (sys->enable_write_cache) {
+               ret = object_cache_flush_vdi(req);
+               if (ret != SD_RES_SUCCESS)
+                       return ret;
+       }
+
+       if (sys->store_writeback) {
+               struct sd_req hdr;
+
+               sd_init_req(&hdr, SD_OP_SYNC_VDI);
+               return exec_local_req(&hdr, NULL);
+       }
+
+       return ret;
 }
 
 static int local_flush_and_del(struct request *req)
@@ -904,6 +917,11 @@ out:
        return ret;
 }
 
+int peer_flush(struct request *req)
+{
+       return sd_store->flush();
+}
+
 static struct sd_op_template sd_ops[] = {
 
        /* cluster operations */
@@ -1170,6 +1188,17 @@ static struct sd_op_template sd_ops[] = {
                .type = SD_OP_TYPE_LOCAL,
                .process_main = local_info_recover,
        },
+
+       [SD_OP_FLUSH_PEER] = {
+               .name = "FLUSH_PEER",
+               .type = SD_OP_TYPE_PEER,
+               .process_work = peer_flush,
+       },
+       [SD_OP_SYNC_VDI] = {
+               .name = "SYNC_VDI",
+               .type = SD_OP_TYPE_GATEWAY,
+               .process_work = gateway_sync_vdi,
+       },
 };
 
 struct sd_op_template *get_sd_op(uint8_t opcode)
@@ -1255,6 +1284,7 @@ static int map_table[] = {
        [SD_OP_READ_OBJ] = SD_OP_READ_PEER,
        [SD_OP_WRITE_OBJ] = SD_OP_WRITE_PEER,
        [SD_OP_REMOVE_OBJ] = SD_OP_REMOVE_PEER,
+       [SD_OP_SYNC_VDI] = SD_OP_FLUSH_PEER,
 };
 
 int gateway_to_peer_opcode(int opcode)
diff --git a/sheep/plain_store.c b/sheep/plain_store.c
index fa3cf60..6152348 100644
--- a/sheep/plain_store.c
+++ b/sheep/plain_store.c
@@ -128,6 +128,8 @@ int default_write(uint64_t oid, struct siocb *iocb, int 
create)
        }
 
        get_obj_path(oid, path);
+       if (iocb->flags & SD_FLAG_CMD_CACHE && sys->store_writeback)
+               flags &= ~O_DSYNC;
        fd = open(path, flags, def_fmode);
        if (fd < 0)
                return err_to_sderr(oid, errno);
diff --git a/sheep/sheep.c b/sheep/sheep.c
index 45064da..b244370 100644
--- a/sheep/sheep.c
+++ b/sheep/sheep.c
@@ -49,9 +49,9 @@ static struct option const long_options[] = {
        {"stdout", no_argument, NULL, 'o'},
        {"port", required_argument, NULL, 'p'},
        {"disk-space", required_argument, NULL, 's'},
-       {"enable-cache", required_argument, NULL, 'w'},
        {"zone", required_argument, NULL, 'z'},
        {"pidfile", required_argument, NULL, 'P'},
+       {"cache", required_argument, NULL, 'w'},
        {NULL, 0, NULL, 0},
 };
 
@@ -78,9 +78,9 @@ Options:\n\
   -p, --port              specify the TCP port on which to listen\n\
   -P, --pidfile           create a pid file\n\
   -s, --disk-space        specify the free disk space in megabytes\n\
-  -w, --enable-cache      enable object cache and specify the max size (M) and 
mode\n\
   -y, --myaddr            specify the address advertised to other sheep\n\
   -z, --zone              specify the zone id\n\
+  -w, --cache             specify the cache type\n\
 ", PACKAGE_VERSION, program_name);
        exit(status);
 }
@@ -178,6 +178,77 @@ static int init_signal(void)
 static struct cluster_info __sys;
 struct cluster_info *sys = &__sys;
 
+static void object_cache_set(char *s)
+{
+       const char *header = "object:";
+       int len = strlen(header);
+       char *size, *p;
+       int64_t cache_size;
+
+       if (strncmp(s, header, len))
+               goto err;
+
+       size = s + len;
+       cache_size = strtol(size, &p, 10);
+       if (size == p || cache_size < 0 || UINT64_MAX < cache_size)
+               goto err;
+
+       sys->enable_write_cache = 1;
+       sys->cache_size = cache_size * 1024 * 1024;
+
+       return;
+err:
+       fprintf(stderr, "Invalid object cache option '%s': "
+               "size must be an integer between 0 and %lu\n",
+               s, UINT64_MAX);
+       exit(1);
+}
+
+static void disk_cache_set(char *s)
+{
+       if (strcmp(s, "disk")) {
+               fprintf(stderr, "invalid disk cache option: %s\n", s);
+               exit(1);
+       }
+
+       sys->store_writeback = 1;
+}
+
+static void do_cache_mode(char *s)
+{
+       int i;
+       struct cache_mode {
+               const char *name;
+               void (*set)(char *);
+       };
+
+       struct cache_mode cache_mode_array[] = {
+               { "object", object_cache_set },
+               { "disk", disk_cache_set },
+               { NULL, NULL },
+       };
+
+       for (i = 0; cache_mode_array[i].name; i++) {
+               const char *n = cache_mode_array[i].name;
+
+               if (!strncmp(s, n, strlen(n))) {
+                       cache_mode_array[i].set(s);
+                       return;
+               }
+       }
+
+       fprintf(stderr, "invalid cache mode: %s\n", s);
+       exit(1);
+}
+
+static void init_cache_mode(char *mode)
+{
+       char *s = strtok(mode, ",");
+       do {
+               do_cache_mode(s);
+       } while ((s = strtok(NULL, ",")));
+}
+
 int main(int argc, char **argv)
 {
        int ch, longindex;
@@ -188,14 +259,12 @@ int main(int argc, char **argv)
        int log_level = SDOG_INFO;
        char path[PATH_MAX];
        int64_t zone = -1;
-       int64_t cache_size = 0;
        int64_t free_space = 0;
        int nr_vnodes = SD_DEFAULT_VNODES;
        bool explicit_addr = false;
        int af;
        char *p;
        struct cluster_driver *cdrv;
-       int enable_object_cache = 0; /* disabled by default */
        char *pid_file = NULL;
 
        signal(SIGPIPE, SIG_IGN);
@@ -263,21 +332,6 @@ int main(int argc, char **argv)
                        }
                        sys->this_node.zone = zone;
                        break;
-               case 'w':
-                       enable_object_cache = 1;
-                       cache_size = strtol(optarg, &p, 10);
-                       if (optarg == p || cache_size < 0 ||
-                           UINT64_MAX < cache_size) {
-                               fprintf(stderr, "Invalid cache size '%s': "
-                                       "must be an integer between 0 and 
%lu\n",
-                                       optarg, UINT64_MAX);
-                               exit(1);
-                       }
-                       sys->cache_size = cache_size * 1024 * 1024;
-
-                       fprintf(stdout, "enable write cache, "
-                               "max cache size %" PRIu64 "M\n", cache_size);
-                       break;
                case 's':
                        free_space = strtoll(optarg, &p, 10);
                        if (optarg == p || free_space <= 0 ||
@@ -303,6 +357,9 @@ int main(int argc, char **argv)
 
                        sys->cdrv_option = get_cdrv_option(sys->cdrv, optarg);
                        break;
+               case 'w':
+                       init_cache_mode(optarg);
+                       break;
                case 'h':
                        usage(0);
                        break;
@@ -334,7 +391,7 @@ int main(int argc, char **argv)
        if (ret)
                exit(1);
 
-       ret = init_store(dir, enable_object_cache);
+       ret = init_store(dir);
        if (ret)
                exit(1);
 
diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h
index 26a46e4..6e74876 100644
--- a/sheep/sheep_priv.h
+++ b/sheep/sheep_priv.h
@@ -118,6 +118,8 @@ struct cluster_info {
        struct work_queue *block_wqueue;
        struct work_queue *sockfd_wqueue;
        struct work_queue *reclaim_wqueue;
+
+       int store_writeback;
 };
 
 struct siocb {
@@ -211,7 +213,7 @@ static inline uint32_t sys_epoch(void)
 
 int create_listen_port(int port, void *data);
 
-int init_store(const char *dir, int enable_write_cache);
+int init_store(const char *dir);
 int init_base_path(const char *dir);
 
 int fill_vdi_copy_list(void *data);
@@ -353,12 +355,15 @@ int gateway_read_obj(struct request *req);
 int gateway_write_obj(struct request *req);
 int gateway_create_and_write_obj(struct request *req);
 int gateway_remove_obj(struct request *req);
+int gateway_sync_vdi(struct request *req);
 
 /* backend store */
 int peer_read_obj(struct request *req);
 int peer_write_obj(struct request *req);
 int peer_create_and_write_obj(struct request *req);
 int peer_remove_obj(struct request *req);
+int peer_flush(struct request *req);
+
 int default_flush(void);
 
 /* object_cache */
diff --git a/sheep/store.c b/sheep/store.c
index 8326156..e1f1abe 100644
--- a/sheep/store.c
+++ b/sheep/store.c
@@ -480,7 +480,7 @@ out:
        return ret;
 }
 
-int init_store(const char *d, int enable_write_cache)
+int init_store(const char *d)
 {
        int ret;
 
@@ -514,8 +514,7 @@ int init_store(const char *d, int enable_write_cache)
                        return ret;
        }
 
-       if (enable_write_cache) {
-               sys->enable_write_cache = 1;
+       if (sys->enable_write_cache) {
                ret = object_cache_init(d);
                if (ret)
                        return 1;
-- 
1.7.5.1

-- 
sheepdog mailing list
[email protected]
http://lists.wpkg.org/mailman/listinfo/sheepdog

Reply via email to