try_to_grab_pending() leave LINKED tagalong in delayed queue when
it deletes a work. This behavior will cause future
cwq_activate_first_delayed() increase the ->nr_active wrongly,
and may cause the whole cwq frozen.

example:

state: cwq->max_active = 1, cwq->nr_active = 1
       one work in cwq->pool, many in cwq->delayed_works.

step1: try_to_grab_pending() remove a work from delayed_works
       but leave tagalong.
step2: when the work in cwq->pool is finished,
       cwq_activate_first_delayed() move the tagalong to cwq->pool
       and increase the ->nr_active.

current state: cwq->nr_active = 1, but works of the cwq
               in cwq->pool are all NO_COLOR, so even when
               these works are finished, cwq->nr_active will
               not be decreased, and no work will be moved from
               cwq->delayed_works. the whole cwq is frozen.

Fix it by moving the work to cwq->pool before delete it
in try_to_grab_pending(), thus the tagalong is left in
cwq->pool like as grabbing non-delayed work.

Signed-off-by: Lai Jiangshan <[email protected]>
---
 kernel/workqueue.c |   26 +++++++++++++++++++++++---
 1 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 7b91332..834aa62 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -973,10 +973,9 @@ static void move_linked_works(struct work_struct *work, 
struct list_head *head,
                *nextp = n;
 }
 
-static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq)
+static void cwq_activate_delayed_work(struct work_struct *work)
 {
-       struct work_struct *work = list_first_entry(&cwq->delayed_works,
-                                                   struct work_struct, entry);
+       struct cpu_workqueue_struct *cwq = get_work_cwq(work);
 
        trace_workqueue_activate_work(work);
        move_linked_works(work, &cwq->pool->worklist, NULL);
@@ -984,6 +983,14 @@ static void cwq_activate_first_delayed(struct 
cpu_workqueue_struct *cwq)
        cwq->nr_active++;
 }
 
+static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq)
+{
+       struct work_struct *work = list_first_entry(&cwq->delayed_works,
+                                                   struct work_struct, entry);
+
+       cwq_activate_delayed_work(work);
+}
+
 /**
  * cwq_dec_nr_in_flight - decrement cwq's nr_in_flight
  * @cwq: cwq of interest
@@ -1102,6 +1109,19 @@ static int try_to_grab_pending(struct work_struct *work, 
bool is_dwork,
                smp_rmb();
                if (gcwq == get_work_gcwq(work)) {
                        debug_work_deactivate(work);
+
+                       /*
+                        * We cannot remove delayed work directly.
+                        * Otherwise we may leave some LINKED
+                        * tagalong(if exist) in the ->delayed_works,
+                        * and future cwq_activate_first_delayed() will
+                        * move this tagalong works((which are all NO_COLOR)
+                        * to cwq->pool and increase the ->nr_active,
+                        * and it may cause the whole cwq frozen.
+                        */
+                       if (*work_data_bits(work) & WORK_STRUCT_DELAYED)
+                               cwq_activate_delayed_work(work);
+
                        list_del_init(&work->entry);
                        cwq_dec_nr_in_flight(get_work_cwq(work),
                                get_work_color(work),
-- 
1.7.4.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to