At Fri, 19 Dec 2014 11:33:15 +0900, FUKUDA Yasuhito wrote: > > On Thu, 18 Dec 2014 13:57:39 +0900 > Hitoshi Mitake <[email protected]> wrote: > > > The change seems good, but current master required rebase of this > > patch. Could you rebase on the latest master? > > Hi, Mitake-san > > I've re-created patch on the latest master. > Regards
Applied, thanks. Could you send a new patch in new thread of ML from next time? e.g. [PATCH v2] sheep, dog: add speed throttling of auto-recovery Thanks, Hitoshi > > --------- > Current auto recovery to consume as much as possible the resources of > sheepdog node. > > So, this patch intended to allow the speed throttling of auto-recovery. > By speed throttling, reduce the resource consumption at auto recovery. > > Add new options to sheep and dog commands. > Its options are "interval" and "object processing number". > > see examples follows. > > ex) sheep -R max=5,interval=1000 /var/lib/sheepdog > dog node recovery set-throttle 5 1000 > dog node recovery get-throttle > > Signed-off-by: Yasuhito Fukuda <[email protected]> > --- > dog/node.c | 146 > +++++++++++++++++++++++++++++++++++++++++++++- > include/internal_proto.h | 2 + > include/sheepdog_proto.h | 6 ++ > sheep/ops.c | 38 ++++++++++++ > sheep/recovery.c | 139 ++++++++++++++++++++++++++++++++++++++++++- > sheep/sheep.c | 41 +++++++++++++ > sheep/sheep_priv.h | 4 + > 7 files changed, 370 insertions(+), 6 deletions(-) > > diff --git a/dog/node.c b/dog/node.c > index a4e9142..d4c8fe7 100644 > --- a/dog/node.c > +++ b/dog/node.c > @@ -183,7 +183,7 @@ static int node_recovery_progress(void) > return result < 0 ? EXIT_SYSFAIL : EXIT_SUCCESS; > } > > -static int node_recovery(int argc, char **argv) > +static int node_recovery_info(int argc, char **argv) > { > struct sd_node *n; > int ret, i = 0; > @@ -235,6 +235,120 @@ static int node_recovery(int argc, char **argv) > return EXIT_SUCCESS; > } > > +static int node_recovery_set(int argc, char **argv) > +{ > + char *p; > + struct recovery_throttling *rthrottling; > + > + rthrottling = xmalloc(sizeof(struct recovery_throttling)); > + > + if (!argv[optind] || !argv[optind + 1]) { > + sd_err("Invalid interval max (%s), interval (%s)", > + argv[optind], argv[optind + 1]); > + exit(EXIT_USAGE); > + } > + > + rthrottling->max_exec_count = strtoul(argv[optind], &p, 10); > + if (argv[optind] == p || rthrottling->max_exec_count < 0 || > + UINT32_MAX <= rthrottling->max_exec_count || errno != 0 || > + *p != '\0') { > + sd_err("Invalid max (%s)", argv[optind]); > + exit(EXIT_USAGE); > + } > + > + optind++; > + > + rthrottling->queue_work_interval = strtoull(argv[optind], &p, 10); > + if (argv[optind] == p || rthrottling->queue_work_interval < 0 || > + UINT64_MAX <= rthrottling->queue_work_interval || errno != 0 || > + *p != '\0') { > + sd_err("Invalid interval (%s)", argv[optind]); > + exit(EXIT_USAGE); > + } > + > + if ((rthrottling->max_exec_count == 0 && > + rthrottling->queue_work_interval != 0) || > + (rthrottling->max_exec_count != 0 && > + rthrottling->queue_work_interval == 0)) { > + sd_err("Invalid interval max (%"PRIu32"), interval (%"PRIu64")", > + rthrottling->max_exec_count, rthrottling->queue_work_interval); > + exit(EXIT_USAGE); > + } > + > + int ret = 0; > + struct sd_req req; > + struct sd_rsp *rsp = (struct sd_rsp *)&req; > + > + sd_init_req(&req, SD_OP_SET_RECOVERY); > + req.flags = SD_FLAG_CMD_WRITE; > + req.data_length = sizeof(struct recovery_throttling); > + ret = dog_exec_req(&sd_nid, &req, rthrottling); > + > + if (ret < 0) > + ret = EXIT_SYSFAIL; > + > + if (rsp->result == SD_RES_SUCCESS) > + ret = EXIT_SUCCESS; > + else > + ret = EXIT_FAILURE; > + > + switch (ret) { > + case EXIT_FAILURE: > + case EXIT_SYSFAIL: > + sd_err("Failed to execute request"); > + ret = -1; > + break; > + case EXIT_SUCCESS: > + /* do nothing */ > + break; > + default: > + sd_err("unknown return code: %d", ret); > + ret = -1; > + break; > + } > + > + free(rthrottling); > + return ret; > +} > + > +static int node_recovery_get(int argc, char **argv) > +{ > + struct recovery_throttling rthrottling; > + int ret = 0; > + > + struct sd_req req; > + struct sd_rsp *rsp = (struct sd_rsp *)&req; > + > + sd_init_req(&req, SD_OP_GET_RECOVERY); > + req.data_length = sizeof(rthrottling); > + > + ret = dog_exec_req(&sd_nid, &req, &rthrottling); > + if (ret < 0) > + ret = EXIT_SYSFAIL; > + > + if (rsp->result == SD_RES_SUCCESS) > + ret = EXIT_SUCCESS; > + else > + ret = EXIT_FAILURE; > + > + switch (ret) { > + case EXIT_FAILURE: > + case EXIT_SYSFAIL: > + sd_err("Failed to execute request"); > + ret = -1; > + break; > + case EXIT_SUCCESS: > + sd_info("max (%"PRIu32"), interval (%"PRIu64")", > + rthrottling.max_exec_count, rthrottling.queue_work_interval); > + break; > + default: > + sd_err("unknown return code: %d", ret); > + ret = -1; > + break; > + } > + return ret; > +} > + > static struct sd_node *idx_to_node(struct rb_root *nroot, int idx) > { > struct sd_node *n = rb_entry(rb_first(nroot), struct sd_node, rb); > @@ -538,6 +652,31 @@ static struct sd_option node_options[] = { > { 0, NULL, false, NULL }, > }; > > +static struct subcommand node_recovery_cmd[] = { > + {"info", NULL, "aphPrT", "show recovery information of nodes (default)", > + NULL, CMD_NEED_NODELIST, node_recovery_info, node_options}, > + {"set-throttle", "<max> <interval>", NULL, "set new throttling", NULL, > + CMD_NEED_ARG|CMD_NEED_NODELIST, node_recovery_set, node_options}, > + {"get-throttle", NULL, NULL, "get current throttling", NULL, > + CMD_NEED_NODELIST, node_recovery_get, node_options}, > + {NULL}, > +}; > + > +static int node_recovery(int argc, char **argv) > +{ > + int ret; > + if (argc == optind) { > + ret = update_node_list(SD_MAX_NODES); > + if (ret < 0) { > + sd_err("Failed to get node list"); > + exit(EXIT_SYSFAIL); > + } > + return node_recovery_info(argc, argv); > + } > + > + return do_generic_subcommand(node_recovery_cmd, argc, argv); > +} > + > static int node_log_level_set(int argc, char **argv) > { > int ret = 0; > @@ -632,8 +771,9 @@ static struct subcommand node_cmd[] = { > CMD_NEED_NODELIST, node_list}, > {"info", NULL, "aprhT", "show information about each node", NULL, > CMD_NEED_NODELIST, node_info}, > - {"recovery", NULL, "aphPrT", "show recovery information of nodes", NULL, > - CMD_NEED_NODELIST, node_recovery, node_options}, > + {"recovery", "<max> <interval>", "aphPrT", > + "show recovery information or set/get recovery speed throttling of > nodes", > + node_recovery_cmd, 0, node_recovery, node_options}, > {"md", "[disks]", "aprAfhT", "See 'dog node md' for more information", > node_md_cmd, CMD_NEED_ARG, node_md, node_options}, > {"stat", NULL, "aprwhT", "show stat information about the node", NULL, > diff --git a/include/internal_proto.h b/include/internal_proto.h > index 3f5d77f..f6ba18e 100644 > --- a/include/internal_proto.h > +++ b/include/internal_proto.h > @@ -111,6 +111,8 @@ > #define SD_OP_VDI_STATE_SNAPSHOT_CTL 0xC7 > #define SD_OP_INODE_COHERENCE 0xC8 > #define SD_OP_READ_DEL_VDIS 0xC9 > +#define SD_OP_GET_RECOVERY 0xCA > +#define SD_OP_SET_RECOVERY 0xCB > > /* internal flags for hdr.flags, must be above 0x80 */ > #define SD_FLAG_CMD_RECOVERY 0x0080 > diff --git a/include/sheepdog_proto.h b/include/sheepdog_proto.h > index 4f0c48c..5f6d157 100644 > --- a/include/sheepdog_proto.h > +++ b/include/sheepdog_proto.h > @@ -272,6 +272,12 @@ struct generation_reference { > int32_t count; > }; > > +struct recovery_throttling { > + uint32_t max_exec_count; > + uint64_t queue_work_interval; > + bool throttling; > +}; > + > struct sd_inode { > char name[SD_MAX_VDI_LEN]; > char tag[SD_MAX_VDI_TAG_LEN]; > diff --git a/sheep/ops.c b/sheep/ops.c > index b384aed..b0aee58 100644 > --- a/sheep/ops.c > +++ b/sheep/ops.c > @@ -1445,6 +1445,30 @@ static int cluster_inode_coherence(const struct sd_req > *req, > !!req->inode_coherence.validate, &sender->nid); > } > > +static int local_get_recovery(struct request *req) > +{ > + struct recovery_throttling rthrottling; > + > + rthrottling = get_recovery(); > + memcpy(req->data, &rthrottling, sizeof(rthrottling)); > + req->rp.data_length = sizeof(rthrottling); > + > + return SD_RES_SUCCESS; > +} > + > +static int local_set_recovery(struct request *req) > +{ > + struct recovery_throttling *rthrottling; > + > + rthrottling = xmalloc(sizeof(struct recovery_throttling)); > + > + memcpy(rthrottling, req->data, sizeof(struct recovery_throttling)); > + set_recovery(rthrottling); > + > + free(rthrottling); > + return SD_RES_SUCCESS; > +} > + > static struct sd_op_template sd_ops[] = { > > /* cluster operations */ > @@ -1892,6 +1916,20 @@ static struct sd_op_template sd_ops[] = { > .type = SD_OP_TYPE_PEER, > .process_work = peer_decref_object, > }, > + > + [SD_OP_GET_RECOVERY] = { > + .name = "GET_RECOVERY", > + .type = SD_OP_TYPE_LOCAL, > + .force = true, > + .process_work = local_get_recovery, > + }, > + > + [SD_OP_SET_RECOVERY] = { > + .name = "SET_RECOVERY", > + .type = SD_OP_TYPE_LOCAL, > + .force = true, > + .process_work = local_set_recovery, > + }, > }; > > const struct sd_op_template *get_sd_op(uint8_t opcode) > diff --git a/sheep/recovery.c b/sheep/recovery.c > index 85dad21..325122b 100644 > --- a/sheep/recovery.c > +++ b/sheep/recovery.c > @@ -78,6 +78,15 @@ struct recovery_info { > struct sd_mutex vinfo_lock; > > struct sd_node *excluded; > + > + uint32_t max_exec_count; > + uint64_t queue_work_interval; > + bool throttling; > +}; > + > +struct recovery_timer { > + void (*callback)(void *); > + void *data; > }; > > static struct recovery_info *next_rinfo; > @@ -900,6 +909,91 @@ void resume_suspended_recovery(void) > } > } > > +static void recovery_timer_handler(int fd, int events, void *data) > +{ > + struct recovery_timer *t = data; > + uint64_t val; > + > + if (read(fd, &val, sizeof(val)) < 0) > + return; > + t->callback(t->data); > + unregister_event(fd); > + close(fd); > +} > + > +static void add_recovery_timer(struct recovery_timer *t, unsigned int > mseconds) > +{ > + struct itimerspec it; > + int tfd; > + > + tfd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK); > + if (tfd < 0) { > + sd_err("timerfd_create: %m"); > + return; > + } > + > + memset(&it, 0, sizeof(it)); > + it.it_value.tv_sec = mseconds / 1000; > + it.it_value.tv_nsec = (mseconds % 1000) * 1000000; > + > + if (timerfd_settime(tfd, 0, &it, NULL) < 0) { > + sd_err("timerfd_settime: %m"); > + return; > + } > + > + if (register_event(tfd, recovery_timer_handler, t) < 0) > + sd_err("failed to register timer fd"); > +} > + > +static void recover_next_object_delay(void *arg) > +{ > + struct recovery_info *rinfo = main_thread_get(current_rinfo); > + uint32_t nr_threads = md_nr_disks() * 2; > + double thread_unit_exec = 0; > + double mod = 0; > + > + if (!rinfo) > + return; > + > + thread_unit_exec = (double) rinfo->max_exec_count / nr_threads; > + mod = rinfo->max_exec_count % nr_threads; > + > + if (rinfo->max_exec_count <= nr_threads || mod != 0) { > + if (rand() % 100 + 1 <= (mod / nr_threads) * 100) > + thread_unit_exec = ceil(thread_unit_exec); > + else > + thread_unit_exec = floor(thread_unit_exec); > + } > + > + for (int i = 0; i < thread_unit_exec; i++) { > + rinfo = main_thread_get(current_rinfo); > + > + if (!rinfo) > + return; > + > + if (rinfo->next - rinfo->done > rinfo->max_exec_count) > + break; > + > + recover_next_object(rinfo); > + } > + > + if (rinfo->throttling != sys->rthrottling.throttling) { > + rinfo->max_exec_count = sys->rthrottling.max_exec_count; > + rinfo->queue_work_interval = > + sys->rthrottling.queue_work_interval; > + rinfo->throttling = sys->rthrottling.throttling; > + } > + > + if (rinfo->throttling) { > + static struct recovery_timer rt = { > + .callback = recover_next_object_delay, > + .data = &rt, > + }; > + add_recovery_timer(&rt, rinfo->queue_work_interval); > + } else > + recover_next_object(rinfo); > +} > + > static void recover_object_main(struct work *work) > { > struct recovery_work *rw = container_of(work, struct recovery_work, > @@ -935,7 +1029,16 @@ static void recover_object_main(struct work *work) > if (rinfo->done >= rinfo->count) > goto finish_recovery; > > - recover_next_object(rinfo); > + if (!rinfo->throttling && !sys->rthrottling.throttling) > + recover_next_object(rinfo); > + else if (!rinfo->throttling && sys->rthrottling.throttling) { > + static struct recovery_timer rt = { > + .callback = recover_next_object_delay, > + .data = &rt, > + }; > + add_recovery_timer(&rt, sys->rthrottling.queue_work_interval); > + } > + > free_recovery_obj_work(row); > return; > finish_recovery: > @@ -982,8 +1085,17 @@ static void finish_object_list(struct work *work) > return; > } > > - for (uint32_t i = 0; i < nr_threads; i++) > - recover_next_object(rinfo); > + for (uint32_t i = 0; i < nr_threads; i++) { > + if (rinfo->throttling) { > + static struct recovery_timer rt = { > + .callback = recover_next_object_delay, > + .data = &rt, > + }; > + add_recovery_timer(&rt, rinfo->queue_work_interval); > + } else > + recover_next_object(rinfo); > + } > + > return; > } > > @@ -1143,6 +1255,9 @@ int start_recovery(struct vnode_info *cur_vinfo, struct > vnode_info *old_vinfo, > rinfo->max_epoch = sys->cinfo.epoch; > rinfo->vinfo_array = xzalloc(sizeof(struct vnode_info *) * > rinfo->max_epoch); > + rinfo->max_exec_count = sys->rthrottling.max_exec_count; > + rinfo->queue_work_interval = sys->rthrottling.queue_work_interval; > + rinfo->throttling = sys->rthrottling.throttling; > sd_init_mutex(&rinfo->vinfo_lock); > if (epoch_lifted) > rinfo->notify_complete = true; /* Reweight or node recovery */ > @@ -1236,3 +1351,21 @@ void get_recovery_state(struct recovery_state *state) > state->nr_finished = rinfo->done; > state->nr_total = rinfo->count; > } > + > +void set_recovery(struct recovery_throttling *rthrottling) > +{ > + sys->rthrottling.max_exec_count = rthrottling->max_exec_count; > + sys->rthrottling.queue_work_interval = > + rthrottling->queue_work_interval; > + if (rthrottling->max_exec_count > 0 && > + rthrottling->queue_work_interval > 0) > + sys->rthrottling.throttling = true; > + else > + sys->rthrottling.throttling = false; > +} > + > +struct recovery_throttling get_recovery(void) > +{ > + return sys->rthrottling; > +} > + > diff --git a/sheep/sheep.c b/sheep/sheep.c > index ef45a33..9fc7610 100644 > --- a/sheep/sheep.c > +++ b/sheep/sheep.c > @@ -115,6 +115,12 @@ static const char log_help[] = > " syslog syslog of the system\n" > " stdout standard output\n"; > > +static const char recovery_help[] = > +"Available arguments:\n" > +"\tmax=: object recovery process maximum count of each interval\n" > +"\tinterval=: object recovery interval time (millisec)\n" > +"Example:\n\t$ sheep -R max=50,interval=1000 ...\n"; > + > static struct sd_option sheep_options[] = { > {'b', "bindaddr", true, "specify IP address of interface to listen on", > bind_help}, > @@ -137,6 +143,8 @@ static struct sd_option sheep_options[] = { > {'P', "pidfile", true, "create a pid file"}, > {'r', "http", true, "enable http service. (default: disabled)", > http_help}, > + {'R', "recovery", true, "specify the recovery speed throttling", > + recovery_help}, > {'u', "upgrade", false, "upgrade to the latest data layout"}, > {'v', "version", false, "show the version"}, > {'w', "cache", true, "enable object cache", cache_help}, > @@ -424,6 +432,26 @@ static struct option_parser journal_parsers[] = { > { NULL, NULL }, > }; > > +static uint32_t max_exec_count; > +static uint64_t queue_work_interval; > +static int max_exec_count_parser(const char *s) > +{ > + max_exec_count = strtol(s, NULL, 10); > + return 0; > +} > + > +static int queue_work_interval_parser(const char *s) > +{ > + queue_work_interval = strtol(s, NULL, 10); > + return 0; > +} > + > +static struct option_parser recovery_parsers[] = { > + { "max=", max_exec_count_parser }, > + { "interval=", queue_work_interval_parser }, > + { NULL, NULL }, > +}; > + > static size_t get_nr_nodes(void) > { > struct vnode_info *vinfo; > @@ -633,6 +661,10 @@ int main(int argc, char **argv) > > sys->node_status = SD_NODE_STATUS_INITIALIZATION; > > + sys->rthrottling.max_exec_count = 0; > + sys->rthrottling.queue_work_interval = 0; > + sys->rthrottling.throttling = false; > + > install_crash_handler(crash_handler); > signal(SIGPIPE, SIG_IGN); > > @@ -751,6 +783,15 @@ int main(int argc, char **argv) > case 'h': > usage(0); > break; > + case 'R': > + if (option_parse(optarg, ",", recovery_parsers) < 0) > + exit(1); > + sys->rthrottling.max_exec_count = max_exec_count; > + sys->rthrottling.queue_work_interval > + = queue_work_interval; > + if (max_exec_count > 0 && queue_work_interval > 0) > + sys->rthrottling.throttling = true; > + break; > case 'v': > fprintf(stdout, "Sheepdog daemon version %s\n", > PACKAGE_VERSION); > diff --git a/sheep/sheep_priv.h b/sheep/sheep_priv.h > index f782044..9ad31a9 100644 > --- a/sheep/sheep_priv.h > +++ b/sheep/sheep_priv.h > @@ -143,6 +143,8 @@ struct system_info { > bool gateway_only; > bool nosync; > > + struct recovery_throttling rthrottling; > + > struct work_queue *net_wqueue; > struct work_queue *gateway_wqueue; > struct work_queue *io_wqueue; > @@ -429,6 +431,8 @@ int start_recovery(struct vnode_info *cur_vinfo, struct > vnode_info *, bool, > bool oid_in_recovery(uint64_t oid); > bool node_in_recovery(void); > void get_recovery_state(struct recovery_state *state); > +void set_recovery(struct recovery_throttling *rthrottling); > +struct recovery_throttling get_recovery(void); > > int read_backend_object(uint64_t oid, char *data, unsigned int datalen, > uint64_t offset); > -- > 1.7.1 > > > > -- > NTTソフトウェア株式会社 > クラウド事業部 第一事業ユニット(C一BU) > 福田康人(FUKUDA Yasuhito) > E-mail:[email protected] > 〒220-0012 横浜市西区みなとみらい4-4-5 > 横浜アイマークプレイス13階 > TEL:045-212-7393/FAX:045-662-7856 > > > -- > sheepdog mailing list > [email protected] > http://lists.wpkg.org/mailman/listinfo/sheepdog -- sheepdog mailing list [email protected] https://lists.wpkg.org/mailman/listinfo/sheepdog
