[patch 3/3 v2] raid5: only wakeup necessary threads
If there are no enough stripes to handle, we'd better not always queue all available work_structs. If one worker can only handle small or even none stripes, it will impact request merge and create lock contention. With this patch, the number of work_struct running will depend on pending stripes number. Note: some statistics info used in the patch are accessed without locking protection. This should doesn't matter, we just try best to avoid queue unnecessary work_struct. Signed-off-by: Shaohua Li --- drivers/md/raid5.c | 50 -- drivers/md/raid5.h |4 2 files changed, 48 insertions(+), 6 deletions(-) Index: linux/drivers/md/raid5.c === --- linux.orig/drivers/md/raid5.c 2013-08-09 09:50:25.754255596 +0800 +++ linux/drivers/md/raid5.c2013-08-09 10:03:45.016208049 +0800 @@ -77,6 +77,7 @@ static struct workqueue_struct *raid5_wq #define BYPASS_THRESHOLD 1 #define NR_HASH(PAGE_SIZE / sizeof(struct hlist_head)) #define HASH_MASK (NR_HASH - 1) +#define MAX_STRIPE_BATCH 8 static inline struct hlist_head *stripe_hash(struct r5conf *conf, sector_t sect) { @@ -209,6 +210,7 @@ static void raid5_wakeup_stripe_thread(s { struct r5conf *conf = sh->raid_conf; struct r5worker_group *group; + int thread_cnt; int i; if (conf->worker_cnt_per_group == 0) { @@ -218,8 +220,26 @@ static void raid5_wakeup_stripe_thread(s group = conf->worker_groups + cpu_to_group(sh->cpu); - for (i = 0; i < conf->worker_cnt_per_group; i++) - queue_work_on(sh->cpu, raid5_wq, >workers[i].work); + group->workers[0].working = true; + /* at least one worker should run to avoid race */ + queue_work_on(sh->cpu, raid5_wq, >workers[0].work); + + thread_cnt = group->stripes_cnt / MAX_STRIPE_BATCH - 1; + /* wakeup more workers */ + for (i = 1; i < conf->worker_cnt_per_group && thread_cnt > 0; i++) { + if (group->workers[i].working == false) { + group->workers[i].working = true; + queue_work_on(sh->cpu, raid5_wq, + >workers[i].work); + thread_cnt--; + } else if (group->workers[i].working_cnt <= + MAX_STRIPE_BATCH / 2) + /* +* If a worker has no enough stripes handling, assume +* it will fetch more stripes soon. +*/ + thread_cnt--; + } } static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh) @@ -248,6 +268,8 @@ static void do_release_stripe(struct r5c struct r5worker_group *group; group = conf->worker_groups + cpu_to_group(cpu); list_add_tail(>lru, >handle_list); + group->stripes_cnt++; + sh->group = group; } raid5_wakeup_stripe_thread(sh); return; @@ -573,6 +595,10 @@ get_active_stripe(struct r5conf *conf, s !test_bit(STRIPE_EXPANDING, >state)) BUG(); list_del_init(>lru); + if (sh->group) { + sh->group->stripes_cnt--; + sh->group = NULL; + } } } } while (sh == NULL); @@ -4143,6 +4169,7 @@ static struct stripe_head *__get_priorit { struct stripe_head *sh = NULL, *tmp; struct list_head *handle_list = NULL; + struct r5worker_group *wg = NULL; if (conf->worker_cnt_per_group == 0) { handle_list = >handle_list; @@ -4150,12 +4177,14 @@ static struct stripe_head *__get_priorit handle_list = NULL; } else if (group != ANY_GROUP) { handle_list = >worker_groups[group].handle_list; + wg = >worker_groups[group]; if (list_empty(handle_list)) handle_list = NULL; } else { int i; for (i = 0; i < conf->group_cnt; i++) { handle_list = >worker_groups[i].handle_list; + wg = >worker_groups[i]; if (!list_empty(handle_list)) break; } @@ -4204,11 +4233,16 @@ static struct stripe_head *__get_priorit if (conf->bypass_count < 0) conf->bypass_count = 0; } + wg = NULL; } if (!sh)
[patch 3/3 v2] raid5: only wakeup necessary threads
If there are no enough stripes to handle, we'd better not always queue all available work_structs. If one worker can only handle small or even none stripes, it will impact request merge and create lock contention. With this patch, the number of work_struct running will depend on pending stripes number. Note: some statistics info used in the patch are accessed without locking protection. This should doesn't matter, we just try best to avoid queue unnecessary work_struct. Signed-off-by: Shaohua Li s...@fusionio.com --- drivers/md/raid5.c | 50 -- drivers/md/raid5.h |4 2 files changed, 48 insertions(+), 6 deletions(-) Index: linux/drivers/md/raid5.c === --- linux.orig/drivers/md/raid5.c 2013-08-09 09:50:25.754255596 +0800 +++ linux/drivers/md/raid5.c2013-08-09 10:03:45.016208049 +0800 @@ -77,6 +77,7 @@ static struct workqueue_struct *raid5_wq #define BYPASS_THRESHOLD 1 #define NR_HASH(PAGE_SIZE / sizeof(struct hlist_head)) #define HASH_MASK (NR_HASH - 1) +#define MAX_STRIPE_BATCH 8 static inline struct hlist_head *stripe_hash(struct r5conf *conf, sector_t sect) { @@ -209,6 +210,7 @@ static void raid5_wakeup_stripe_thread(s { struct r5conf *conf = sh-raid_conf; struct r5worker_group *group; + int thread_cnt; int i; if (conf-worker_cnt_per_group == 0) { @@ -218,8 +220,26 @@ static void raid5_wakeup_stripe_thread(s group = conf-worker_groups + cpu_to_group(sh-cpu); - for (i = 0; i conf-worker_cnt_per_group; i++) - queue_work_on(sh-cpu, raid5_wq, group-workers[i].work); + group-workers[0].working = true; + /* at least one worker should run to avoid race */ + queue_work_on(sh-cpu, raid5_wq, group-workers[0].work); + + thread_cnt = group-stripes_cnt / MAX_STRIPE_BATCH - 1; + /* wakeup more workers */ + for (i = 1; i conf-worker_cnt_per_group thread_cnt 0; i++) { + if (group-workers[i].working == false) { + group-workers[i].working = true; + queue_work_on(sh-cpu, raid5_wq, + group-workers[i].work); + thread_cnt--; + } else if (group-workers[i].working_cnt = + MAX_STRIPE_BATCH / 2) + /* +* If a worker has no enough stripes handling, assume +* it will fetch more stripes soon. +*/ + thread_cnt--; + } } static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh) @@ -248,6 +268,8 @@ static void do_release_stripe(struct r5c struct r5worker_group *group; group = conf-worker_groups + cpu_to_group(cpu); list_add_tail(sh-lru, group-handle_list); + group-stripes_cnt++; + sh-group = group; } raid5_wakeup_stripe_thread(sh); return; @@ -573,6 +595,10 @@ get_active_stripe(struct r5conf *conf, s !test_bit(STRIPE_EXPANDING, sh-state)) BUG(); list_del_init(sh-lru); + if (sh-group) { + sh-group-stripes_cnt--; + sh-group = NULL; + } } } } while (sh == NULL); @@ -4143,6 +4169,7 @@ static struct stripe_head *__get_priorit { struct stripe_head *sh = NULL, *tmp; struct list_head *handle_list = NULL; + struct r5worker_group *wg = NULL; if (conf-worker_cnt_per_group == 0) { handle_list = conf-handle_list; @@ -4150,12 +4177,14 @@ static struct stripe_head *__get_priorit handle_list = NULL; } else if (group != ANY_GROUP) { handle_list = conf-worker_groups[group].handle_list; + wg = conf-worker_groups[group]; if (list_empty(handle_list)) handle_list = NULL; } else { int i; for (i = 0; i conf-group_cnt; i++) { handle_list = conf-worker_groups[i].handle_list; + wg = conf-worker_groups[i]; if (!list_empty(handle_list)) break; } @@ -4204,11 +4233,16 @@ static struct stripe_head *__get_priorit if (conf-bypass_count 0) conf-bypass_count = 0; } + wg = NULL;