This patch adds two new internal sheep operation: SD_OP_SYNC_VDI and SD_OP_FLUSH_PEER for implementing writeback cache semantics in backend stores.
If writeback cache semantics is used in backend stores, explicit flushing in all sheeps is required when gateway sheep receives SD_OP_FLUSH_VDI. After applying this patch, SD_OP_SYNC_VDI will be queued as a gateway request when sheep receives SD_OP_FLUSH_VDI. SD_OP_SYNC_VDI forwards SD_OP_FLUSH_PEER to all other sheeps. After receiving the SD_OP_FLUSH_PEER, sheeps flush their cache of backend stores. This patch also modifies command line option of sheep. -w was used for enabling object cache and specyfing size of it. After applying this patch, -w is also used for enabling writeback cache semantics in backend stores. Example of new -w is like this: -w disk ... enable writeback cache semantics of disks -w disk,object:50 ... enable writeback cache semantics of disks, and enable object cache with 50MB memory -w object:50 ... enable object cache with 50MB memory Cc: MORITA Kazutaka <[email protected]> Cc: Liu Yuan <[email protected]> Signed-off-by: Hitoshi Mitake <[email protected]> --- include/internal_proto.h | 2 + sheep/gateway.c | 5 ++ sheep/ops.c | 36 ++++++++++++++++- sheep/plain_store.c | 2 + sheep/sheep.c | 97 ++++++++++++++++++++++++++++++++++++--------- sheep/sheep_priv.h | 7 +++- sheep/store.c | 5 +- 7 files changed, 127 insertions(+), 27 deletions(-) diff --git a/include/internal_proto.h b/include/internal_proto.h index c1d116a..6c09af8 100644 --- a/include/internal_proto.h +++ b/include/internal_proto.h @@ -65,6 +65,8 @@ #define SD_OP_INFO_RECOVER 0xAA #define SD_OP_GET_VDI_COPIES 0xAB #define SD_OP_COMPLETE_RECOVERY 0xAC +#define SD_OP_SYNC_VDI 0xAD +#define SD_OP_FLUSH_PEER 0xAE /* internal flags for hdr.flags, must be above 0x80 */ #define SD_FLAG_CMD_RECOVERY 0x0080 diff --git a/sheep/gateway.c b/sheep/gateway.c index 98ba079..88ec3a3 100644 --- a/sheep/gateway.c +++ b/sheep/gateway.c @@ -334,3 +334,8 @@ int gateway_remove_obj(struct request *req) { return gateway_forward_request(req, 0); } + +int gateway_sync_vdi(struct request *req) +{ + return gateway_forward_request(req, 1); +} diff --git a/sheep/ops.c b/sheep/ops.c index ccb1c5e..bb96625 100644 --- a/sheep/ops.c +++ b/sheep/ops.c @@ -647,9 +647,22 @@ static int local_get_snap_file(struct request *req) static int local_flush_vdi(struct request *req) { - if (!sys->enable_write_cache) - return SD_RES_SUCCESS; - return object_cache_flush_vdi(req); + int ret = SD_RES_SUCCESS; + + if (sys->enable_write_cache) { + ret = object_cache_flush_vdi(req); + if (ret != SD_RES_SUCCESS) + return ret; + } + + if (sys->store_writeback) { + struct sd_req hdr; + + sd_init_req(&hdr, SD_OP_SYNC_VDI); + return exec_local_req(&hdr, NULL); + } + + return ret; } static int local_flush_and_del(struct request *req) @@ -904,6 +917,11 @@ out: return ret; } +int peer_flush(struct request *req) +{ + return sd_store->flush(); +} + static struct sd_op_template sd_ops[] = { /* cluster operations */ @@ -1170,6 +1188,17 @@ static struct sd_op_template sd_ops[] = { .type = SD_OP_TYPE_LOCAL, .process_main = local_info_recover, }, + + [SD_OP_FLUSH_PEER] = { + .name = "FLUSH_PEER", + .type = SD_OP_TYPE_PEER, + .process_work = peer_flush, + }, + [SD_OP_SYNC_VDI] = { + .name = "SYNC_VDI", + .type = SD_OP_TYPE_GATEWAY, + .process_work = gateway_sync_vdi, + }, }; struct sd_op_template *get_sd_op(uint8_t opcode) @@ -1255,6 +1284,7 @@ static int map_table[] = { [SD_OP_READ_OBJ] = SD_OP_READ_PEER, [SD_OP_WRITE_OBJ] = SD_OP_WRITE_PEER, [SD_OP_REMOVE_OBJ] = SD_OP_REMOVE_PEER, + [SD_OP_SYNC_VDI] = SD_OP_FLUSH_PEER, }; int gateway_to_peer_opcode(int opcode) diff --git a/sheep/plain_store.c b/sheep/plain_store.c index fa3cf60..6152348 100644 --- a/sheep/plain_store.c +++ b/sheep/plain_store.c @@ -128,6 +128,8 @@ int default_write(uint64_t oid, struct siocb *iocb, int create) } get_obj_path(oid, path); + if (iocb->flags & SD_FLAG_CMD_CACHE && sys->store_writeback) + flags &= ~O_DSYNC; fd = open(path, flags, def_fmode); if (fd < 0) return err_to_sderr(oid, errno); diff --git a/sheep/sheep.c b/sheep/sheep.c index 45064da..b244370 100644 --- a/sheep/sheep.c +++ b/sheep/sheep.c @@ -49,9 +49,9 @@ static struct option const long_options[] = { {"stdout", no_argument, NULL, 'o'}, {"port", required_argument, NULL, 'p'}, {"disk-space", required_argument, NULL, 's'}, - {"enable-cache", required_argument, NULL, 'w'}, {"zone", required_argument, NULL, 'z'}, {"pidfile", required_argument, NULL, 'P'}, + {"cache", required_argument, NULL, 'w'}, {NULL, 0, NULL, 0}, }; @@ -78,9 +78,9 @@ Options:\n\ -p, --port specify the TCP port on which to listen\n\ -P, --pidfile create a pid file\n\ -s, --disk-space specify the free disk space in megabytes\n\ - -w, --enable-cache enable object cache and specify the max size (M) and mode\n\ -y, --myaddr specify the address advertised to other sheep\n\ -z, --zone specify the zone id\n\ + -w, --cache specify the cache type\n\ ", PACKAGE_VERSION, program_name); exit(status); } @@ -178,6 +178,77 @@ static int init_signal(void) static struct cluster_info __sys; struct cluster_info *sys = &__sys; +static void object_cache_set(char *s) +{ + const char *header = "object:"; + int len = strlen(header); + char *size, *p; + int64_t cache_size; + + if (strncmp(s, header, len)) + goto err; + + size = s + len; + cache_size = strtol(size, &p, 10); + if (size == p || cache_size < 0 || UINT64_MAX < cache_size) + goto err; + + sys->enable_write_cache = 1; + sys->cache_size = cache_size * 1024 * 1024; + + return; +err: + fprintf(stderr, "Invalid object cache option '%s': " + "size must be an integer between 0 and %lu\n", + s, UINT64_MAX); + exit(1); +} + +static void disk_cache_set(char *s) +{ + if (strcmp(s, "disk")) { + fprintf(stderr, "invalid disk cache option: %s\n", s); + exit(1); + } + + sys->store_writeback = 1; +} + +static void do_cache_mode(char *s) +{ + int i; + struct cache_mode { + const char *name; + void (*set)(char *); + }; + + struct cache_mode cache_mode_array[] = { + { "object", object_cache_set }, + { "disk", disk_cache_set }, + { NULL, NULL }, + }; + + for (i = 0; cache_mode_array[i].name; i++) { + const char *n = cache_mode_array[i].name; + + if (!strncmp(s, n, strlen(n))) { + cache_mode_array[i].set(s); + return; + } + } + + fprintf(stderr, "invalid cache mode: %s\n", s); + exit(1); +} + +static void init_cache_mode(char *mode) +{ + char *s = strtok(mode, ","); + do { + do_cache_mode(s); + } while ((s = strtok(NULL, ","))); +} + int main(int argc, char **argv) { int ch, longindex; @@ -188,14 +259,12 @@ int main(int argc, char **argv) int log_level = SDOG_INFO; char path[PATH_MAX]; int64_t zone = -1; - int64_t cache_size = 0; int64_t free_space = 0; int nr_vnodes = SD_DEFAULT_VNODES; bool explicit_addr = false; int af; char *p; struct cluster_driver *cdrv; - int enable_object_cache = 0; /* disabled by default */ char *pid_file = NULL; signal(SIGPIPE, SIG_IGN); @@ -263,21 +332,6 @@ int main(int argc, char **argv) } sys->this_node.zone = zone; break; - case 'w': - enable_object_cache = 1; - cache_size = strtol(optarg, &p, 10); - if (optarg == p || cache_size < 0 || - UINT64_MAX < cache_size) { - fprintf(stderr, "Invalid cache size '%s': " - "must be an integer between 0 and %lu\n", - optarg, UINT64_MAX); - exit(1); - } - sys->cache_size = cache_size * 1024 * 1024; - - fprintf(stdout, "enable write cache, " - "max cache size %" PRIu64 "M\n", cache_size); - break; case 's': free_space = strtoll(optarg, &p, 10); if (optarg == p || free_space <= 0 || @@ -303,6 +357,9 @@ int main(int argc, char **argv) sys->cdrv_option = get_cdrv_option(sys->cdrv, optarg); break; + case 'w': + init_cache_mode(optarg); + break; case 'h': usage(0); break; @@ -334,7 +391,7 @@ int main(int argc, char **argv) if (ret) exit(1); - ret = init_store(dir, enable_object_cache); + ret = init_store(dir); if (ret) exit(1); diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h index 26a46e4..6e74876 100644 --- a/sheep/sheep_priv.h +++ b/sheep/sheep_priv.h @@ -118,6 +118,8 @@ struct cluster_info { struct work_queue *block_wqueue; struct work_queue *sockfd_wqueue; struct work_queue *reclaim_wqueue; + + int store_writeback; }; struct siocb { @@ -211,7 +213,7 @@ static inline uint32_t sys_epoch(void) int create_listen_port(int port, void *data); -int init_store(const char *dir, int enable_write_cache); +int init_store(const char *dir); int init_base_path(const char *dir); int fill_vdi_copy_list(void *data); @@ -353,12 +355,15 @@ int gateway_read_obj(struct request *req); int gateway_write_obj(struct request *req); int gateway_create_and_write_obj(struct request *req); int gateway_remove_obj(struct request *req); +int gateway_sync_vdi(struct request *req); /* backend store */ int peer_read_obj(struct request *req); int peer_write_obj(struct request *req); int peer_create_and_write_obj(struct request *req); int peer_remove_obj(struct request *req); +int peer_flush(struct request *req); + int default_flush(void); /* object_cache */ diff --git a/sheep/store.c b/sheep/store.c index 8326156..e1f1abe 100644 --- a/sheep/store.c +++ b/sheep/store.c @@ -480,7 +480,7 @@ out: return ret; } -int init_store(const char *d, int enable_write_cache) +int init_store(const char *d) { int ret; @@ -514,8 +514,7 @@ int init_store(const char *d, int enable_write_cache) return ret; } - if (enable_write_cache) { - sys->enable_write_cache = 1; + if (sys->enable_write_cache) { ret = object_cache_init(d); if (ret) return 1; -- 1.7.5.1 -- sheepdog mailing list [email protected] http://lists.wpkg.org/mailman/listinfo/sheepdog
