[patch 3/3 v2] raid5: only wakeup necessary threads

2013-08-11 Thread Shaohua Li
If there are no enough stripes to handle, we'd better not always queue all
available work_structs. If one worker can only handle small or even none
stripes, it will impact request merge and create lock contention.

With this patch, the number of work_struct running will depend on pending
stripes number. Note: some statistics info used in the patch are accessed 
without
locking protection. This should doesn't matter, we just try best to avoid queue
unnecessary work_struct.

Signed-off-by: Shaohua Li 
---
 drivers/md/raid5.c |   50 --
 drivers/md/raid5.h |4 
 2 files changed, 48 insertions(+), 6 deletions(-)

Index: linux/drivers/md/raid5.c
===
--- linux.orig/drivers/md/raid5.c   2013-08-09 09:50:25.754255596 +0800
+++ linux/drivers/md/raid5.c2013-08-09 10:03:45.016208049 +0800
@@ -77,6 +77,7 @@ static struct workqueue_struct *raid5_wq
 #define BYPASS_THRESHOLD   1
 #define NR_HASH(PAGE_SIZE / sizeof(struct hlist_head))
 #define HASH_MASK  (NR_HASH - 1)
+#define MAX_STRIPE_BATCH   8
 
 static inline struct hlist_head *stripe_hash(struct r5conf *conf, sector_t 
sect)
 {
@@ -209,6 +210,7 @@ static void raid5_wakeup_stripe_thread(s
 {
struct r5conf *conf = sh->raid_conf;
struct r5worker_group *group;
+   int thread_cnt;
int i;
 
if (conf->worker_cnt_per_group == 0) {
@@ -218,8 +220,26 @@ static void raid5_wakeup_stripe_thread(s
 
group = conf->worker_groups + cpu_to_group(sh->cpu);
 
-   for (i = 0; i < conf->worker_cnt_per_group; i++)
-   queue_work_on(sh->cpu, raid5_wq, >workers[i].work);
+   group->workers[0].working = true;
+   /* at least one worker should run to avoid race */
+   queue_work_on(sh->cpu, raid5_wq, >workers[0].work);
+
+   thread_cnt = group->stripes_cnt / MAX_STRIPE_BATCH - 1;
+   /* wakeup more workers */
+   for (i = 1; i < conf->worker_cnt_per_group && thread_cnt > 0; i++) {
+   if (group->workers[i].working == false) {
+   group->workers[i].working = true;
+   queue_work_on(sh->cpu, raid5_wq,
+ >workers[i].work);
+   thread_cnt--;
+   } else if (group->workers[i].working_cnt <=
+  MAX_STRIPE_BATCH / 2)
+   /*
+* If a worker has no enough stripes handling, assume
+* it will fetch more stripes soon.
+*/
+   thread_cnt--;
+   }
 }
 
 static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh)
@@ -248,6 +268,8 @@ static void do_release_stripe(struct r5c
struct r5worker_group *group;
group = conf->worker_groups + cpu_to_group(cpu);
list_add_tail(>lru, >handle_list);
+   group->stripes_cnt++;
+   sh->group = group;
}
raid5_wakeup_stripe_thread(sh);
return;
@@ -573,6 +595,10 @@ get_active_stripe(struct r5conf *conf, s
!test_bit(STRIPE_EXPANDING, >state))
BUG();
list_del_init(>lru);
+   if (sh->group) {
+   sh->group->stripes_cnt--;
+   sh->group = NULL;
+   }
}
}
} while (sh == NULL);
@@ -4143,6 +4169,7 @@ static struct stripe_head *__get_priorit
 {
struct stripe_head *sh = NULL, *tmp;
struct list_head *handle_list = NULL;
+   struct r5worker_group *wg = NULL;
 
if (conf->worker_cnt_per_group == 0) {
handle_list = >handle_list;
@@ -4150,12 +4177,14 @@ static struct stripe_head *__get_priorit
handle_list = NULL;
} else if (group != ANY_GROUP) {
handle_list = >worker_groups[group].handle_list;
+   wg = >worker_groups[group];
if (list_empty(handle_list))
handle_list = NULL;
} else {
int i;
for (i = 0; i < conf->group_cnt; i++) {
handle_list = >worker_groups[i].handle_list;
+   wg = >worker_groups[i];
if (!list_empty(handle_list))
break;
}
@@ -4204,11 +4233,16 @@ static struct stripe_head *__get_priorit
if (conf->bypass_count < 0)
conf->bypass_count = 0;
}
+   wg = NULL;
}
 
if (!sh)
  

[patch 3/3 v2] raid5: only wakeup necessary threads

2013-08-11 Thread Shaohua Li
If there are no enough stripes to handle, we'd better not always queue all
available work_structs. If one worker can only handle small or even none
stripes, it will impact request merge and create lock contention.

With this patch, the number of work_struct running will depend on pending
stripes number. Note: some statistics info used in the patch are accessed 
without
locking protection. This should doesn't matter, we just try best to avoid queue
unnecessary work_struct.

Signed-off-by: Shaohua Li s...@fusionio.com
---
 drivers/md/raid5.c |   50 --
 drivers/md/raid5.h |4 
 2 files changed, 48 insertions(+), 6 deletions(-)

Index: linux/drivers/md/raid5.c
===
--- linux.orig/drivers/md/raid5.c   2013-08-09 09:50:25.754255596 +0800
+++ linux/drivers/md/raid5.c2013-08-09 10:03:45.016208049 +0800
@@ -77,6 +77,7 @@ static struct workqueue_struct *raid5_wq
 #define BYPASS_THRESHOLD   1
 #define NR_HASH(PAGE_SIZE / sizeof(struct hlist_head))
 #define HASH_MASK  (NR_HASH - 1)
+#define MAX_STRIPE_BATCH   8
 
 static inline struct hlist_head *stripe_hash(struct r5conf *conf, sector_t 
sect)
 {
@@ -209,6 +210,7 @@ static void raid5_wakeup_stripe_thread(s
 {
struct r5conf *conf = sh-raid_conf;
struct r5worker_group *group;
+   int thread_cnt;
int i;
 
if (conf-worker_cnt_per_group == 0) {
@@ -218,8 +220,26 @@ static void raid5_wakeup_stripe_thread(s
 
group = conf-worker_groups + cpu_to_group(sh-cpu);
 
-   for (i = 0; i  conf-worker_cnt_per_group; i++)
-   queue_work_on(sh-cpu, raid5_wq, group-workers[i].work);
+   group-workers[0].working = true;
+   /* at least one worker should run to avoid race */
+   queue_work_on(sh-cpu, raid5_wq, group-workers[0].work);
+
+   thread_cnt = group-stripes_cnt / MAX_STRIPE_BATCH - 1;
+   /* wakeup more workers */
+   for (i = 1; i  conf-worker_cnt_per_group  thread_cnt  0; i++) {
+   if (group-workers[i].working == false) {
+   group-workers[i].working = true;
+   queue_work_on(sh-cpu, raid5_wq,
+ group-workers[i].work);
+   thread_cnt--;
+   } else if (group-workers[i].working_cnt =
+  MAX_STRIPE_BATCH / 2)
+   /*
+* If a worker has no enough stripes handling, assume
+* it will fetch more stripes soon.
+*/
+   thread_cnt--;
+   }
 }
 
 static void do_release_stripe(struct r5conf *conf, struct stripe_head *sh)
@@ -248,6 +268,8 @@ static void do_release_stripe(struct r5c
struct r5worker_group *group;
group = conf-worker_groups + cpu_to_group(cpu);
list_add_tail(sh-lru, group-handle_list);
+   group-stripes_cnt++;
+   sh-group = group;
}
raid5_wakeup_stripe_thread(sh);
return;
@@ -573,6 +595,10 @@ get_active_stripe(struct r5conf *conf, s
!test_bit(STRIPE_EXPANDING, sh-state))
BUG();
list_del_init(sh-lru);
+   if (sh-group) {
+   sh-group-stripes_cnt--;
+   sh-group = NULL;
+   }
}
}
} while (sh == NULL);
@@ -4143,6 +4169,7 @@ static struct stripe_head *__get_priorit
 {
struct stripe_head *sh = NULL, *tmp;
struct list_head *handle_list = NULL;
+   struct r5worker_group *wg = NULL;
 
if (conf-worker_cnt_per_group == 0) {
handle_list = conf-handle_list;
@@ -4150,12 +4177,14 @@ static struct stripe_head *__get_priorit
handle_list = NULL;
} else if (group != ANY_GROUP) {
handle_list = conf-worker_groups[group].handle_list;
+   wg = conf-worker_groups[group];
if (list_empty(handle_list))
handle_list = NULL;
} else {
int i;
for (i = 0; i  conf-group_cnt; i++) {
handle_list = conf-worker_groups[i].handle_list;
+   wg = conf-worker_groups[i];
if (!list_empty(handle_list))
break;
}
@@ -4204,11 +4233,16 @@ static struct stripe_head *__get_priorit
if (conf-bypass_count  0)
conf-bypass_count = 0;
}
+   wg = NULL;