Add per-CPU tracking of tasks blocked in RCU read-side critical
sections. Each rcu_data gets a blkd_list protected by blkd_lock,
mirroring the rcu_node blkd_tasks list at per-CPU granularity.

Tasks are added on preemption and removed on rcu_read_unlock.
A WARN_ON_ONCE in rcu_gp_init verifies list consistency.

Signed-off-by: Joel Fernandes <[email protected]>
---
 include/linux/sched.h    |  4 ++++
 kernel/fork.c            |  4 ++++
 kernel/rcu/Kconfig       | 12 ++++++++++++
 kernel/rcu/tree.c        | 32 ++++++++++++++++++++++++++++++++
 kernel/rcu/tree.h        |  6 ++++++
 kernel/rcu/tree_plugin.h | 21 +++++++++++++++++++++
 6 files changed, 79 insertions(+)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index d395f2810fac..90ce501a568e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -931,6 +931,10 @@ struct task_struct {
        union rcu_special               rcu_read_unlock_special;
        struct list_head                rcu_node_entry;
        struct rcu_node                 *rcu_blocked_node;
+#ifdef CONFIG_RCU_PER_CPU_BLOCKED_LISTS
+       struct list_head                rcu_rdp_entry;
+       int                             rcu_blocked_cpu;
+#endif
 #endif /* #ifdef CONFIG_PREEMPT_RCU */
 
 #ifdef CONFIG_TASKS_RCU
diff --git a/kernel/fork.c b/kernel/fork.c
index b1f3915d5f8e..7a5ba2d2c1b5 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1819,6 +1819,10 @@ static inline void rcu_copy_process(struct task_struct 
*p)
        p->rcu_read_unlock_special.s = 0;
        p->rcu_blocked_node = NULL;
        INIT_LIST_HEAD(&p->rcu_node_entry);
+#ifdef CONFIG_RCU_PER_CPU_BLOCKED_LISTS
+       INIT_LIST_HEAD(&p->rcu_rdp_entry);
+       p->rcu_blocked_cpu = -1;
+#endif
 #endif /* #ifdef CONFIG_PREEMPT_RCU */
 #ifdef CONFIG_TASKS_RCU
        p->rcu_tasks_holdout = false;
diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig
index 4d9b21f69eaa..4bb12f1fed09 100644
--- a/kernel/rcu/Kconfig
+++ b/kernel/rcu/Kconfig
@@ -248,6 +248,18 @@ config RCU_EXP_KTHREAD
 
          Accept the default if unsure.
 
+config RCU_PER_CPU_BLOCKED_LISTS
+       bool "Use per-CPU blocked task lists in PREEMPT_RCU"
+       depends on PREEMPT_RCU
+       default n
+       help
+         Enable per-CPU tracking of tasks blocked in RCU read-side
+         critical sections. This allows to quickly toggle the feature.
+         Eventually the config will be removed, in favor of always keeping
+         the optimization enabled.
+
+         Accept the default if unsure.
+
 config RCU_NOCB_CPU
        bool "Offload RCU callback processing from boot-selected CPUs"
        depends on TREE_RCU
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 293bbd9ac3f4..e2b6a4579086 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1809,6 +1809,14 @@ static noinline_for_stack bool rcu_gp_init(void)
        struct rcu_node *rnp = rcu_get_root();
        bool start_new_poll;
        unsigned long old_gp_seq;
+#ifdef CONFIG_RCU_PER_CPU_BLOCKED_LISTS
+       struct task_struct *t_verify;
+       int cpu_verify;
+       int rnp_count;
+       int rdp_total;
+       struct rcu_data *rdp_cpu;
+       struct task_struct *t_rdp;
+#endif
 
        WRITE_ONCE(rcu_state.gp_activity, jiffies);
        raw_spin_lock_irq_rcu_node(rnp);
@@ -1891,6 +1899,26 @@ static noinline_for_stack bool rcu_gp_init(void)
                 */
                arch_spin_lock(&rcu_state.ofl_lock);
                raw_spin_lock_rcu_node(rnp);
+#ifdef CONFIG_RCU_PER_CPU_BLOCKED_LISTS
+               /*
+                * Verify rdp lists consistent with rnp list. Since the unlock
+                * path removes from rdp before rnp, we can have tasks that are
+                * on rnp but not on rdp (in the middle of being removed).
+                * Therefore rnp_count >= rdp_total is the expected invariant.
+                */
+               rnp_count = 0;
+               rdp_total = 0;
+               list_for_each_entry(t_verify, &rnp->blkd_tasks, rcu_node_entry)
+                       rnp_count++;
+               for (cpu_verify = rnp->grplo; cpu_verify <= rnp->grphi; 
cpu_verify++) {
+                       rdp_cpu = per_cpu_ptr(&rcu_data, cpu_verify);
+                       raw_spin_lock(&rdp_cpu->blkd_lock);
+                       list_for_each_entry(t_rdp, &rdp_cpu->blkd_list, 
rcu_rdp_entry)
+                               rdp_total++;
+                       raw_spin_unlock(&rdp_cpu->blkd_lock);
+               }
+               WARN_ON_ONCE(rnp_count < rdp_total);
+#endif
                if (rnp->qsmaskinit == rnp->qsmaskinitnext &&
                    !rnp->wait_blkd_tasks) {
                        /* Nothing to do on this leaf rcu_node structure. */
@@ -4143,6 +4171,10 @@ rcu_boot_init_percpu_data(int cpu)
        rdp->rcu_onl_gp_state = RCU_GP_CLEANED;
        rdp->last_sched_clock = jiffies;
        rdp->cpu = cpu;
+#ifdef CONFIG_RCU_PER_CPU_BLOCKED_LISTS
+       raw_spin_lock_init(&rdp->blkd_lock);
+       INIT_LIST_HEAD(&rdp->blkd_list);
+#endif
        rcu_boot_init_nocb_percpu_data(rdp);
 }
 
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index b8bbe7960cda..13d5649a80fb 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -294,6 +294,12 @@ struct rcu_data {
 
        long lazy_len;                  /* Length of buffered lazy callbacks. */
        int cpu;
+
+#ifdef CONFIG_RCU_PER_CPU_BLOCKED_LISTS
+       /* 8) Per-CPU blocked task tracking. */
+       raw_spinlock_t blkd_lock;       /* Protects blkd_list. */
+       struct list_head blkd_list;     /* Tasks blocked on this CPU. */
+#endif
 };
 
 /* Values for nocb_defer_wakeup field in struct rcu_data. */
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 73ba5f4a968d..5d2bde19131a 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -338,6 +338,12 @@ void rcu_note_context_switch(bool preempt)
                raw_spin_lock_rcu_node(rnp);
                t->rcu_read_unlock_special.b.blocked = true;
                t->rcu_blocked_node = rnp;
+#ifdef CONFIG_RCU_PER_CPU_BLOCKED_LISTS
+               t->rcu_blocked_cpu = rdp->cpu;
+               raw_spin_lock(&rdp->blkd_lock);
+               list_add(&t->rcu_rdp_entry, &rdp->blkd_list);
+               raw_spin_unlock(&rdp->blkd_lock);
+#endif
 
                /*
                 * Verify the CPU's sanity, trace the preemption, and
@@ -485,6 +491,10 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, 
unsigned long flags)
        struct rcu_data *rdp;
        struct rcu_node *rnp;
        union rcu_special special;
+#ifdef CONFIG_RCU_PER_CPU_BLOCKED_LISTS
+       int blocked_cpu;
+       struct rcu_data *blocked_rdp;
+#endif
 
        rdp = this_cpu_ptr(&rcu_data);
        if (rdp->defer_qs_iw_pending == DEFER_QS_PENDING)
@@ -530,6 +540,17 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, 
unsigned long flags)
                 * to loop.  Retain a WARN_ON_ONCE() out of sheer paranoia.
                 */
                rnp = t->rcu_blocked_node;
+#ifdef CONFIG_RCU_PER_CPU_BLOCKED_LISTS
+               /* Remove from per-CPU list if task was added to it. */
+               blocked_cpu = t->rcu_blocked_cpu;
+               if (blocked_cpu != -1) {
+                       blocked_rdp = per_cpu_ptr(&rcu_data, blocked_cpu);
+                       raw_spin_lock(&blocked_rdp->blkd_lock);
+                       list_del_init(&t->rcu_rdp_entry);
+                       t->rcu_blocked_cpu = -1;
+                       raw_spin_unlock(&blocked_rdp->blkd_lock);
+               }
+#endif
                raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
                WARN_ON_ONCE(rnp != t->rcu_blocked_node);
                WARN_ON_ONCE(!rcu_is_leaf_node(rnp));
-- 
2.34.1


Reply via email to