Add per-CPU tracking of tasks blocked in RCU read-side critical sections. Each rcu_data gets a blkd_list protected by blkd_lock, mirroring the rcu_node blkd_tasks list at per-CPU granularity.
Tasks are added on preemption and removed on rcu_read_unlock. A WARN_ON_ONCE in rcu_gp_init verifies list consistency. Signed-off-by: Joel Fernandes <[email protected]> --- include/linux/sched.h | 4 ++++ kernel/fork.c | 4 ++++ kernel/rcu/Kconfig | 12 ++++++++++++ kernel/rcu/tree.c | 32 ++++++++++++++++++++++++++++++++ kernel/rcu/tree.h | 6 ++++++ kernel/rcu/tree_plugin.h | 21 +++++++++++++++++++++ 6 files changed, 79 insertions(+) diff --git a/include/linux/sched.h b/include/linux/sched.h index d395f2810fac..90ce501a568e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -931,6 +931,10 @@ struct task_struct { union rcu_special rcu_read_unlock_special; struct list_head rcu_node_entry; struct rcu_node *rcu_blocked_node; +#ifdef CONFIG_RCU_PER_CPU_BLOCKED_LISTS + struct list_head rcu_rdp_entry; + int rcu_blocked_cpu; +#endif #endif /* #ifdef CONFIG_PREEMPT_RCU */ #ifdef CONFIG_TASKS_RCU diff --git a/kernel/fork.c b/kernel/fork.c index b1f3915d5f8e..7a5ba2d2c1b5 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1819,6 +1819,10 @@ static inline void rcu_copy_process(struct task_struct *p) p->rcu_read_unlock_special.s = 0; p->rcu_blocked_node = NULL; INIT_LIST_HEAD(&p->rcu_node_entry); +#ifdef CONFIG_RCU_PER_CPU_BLOCKED_LISTS + INIT_LIST_HEAD(&p->rcu_rdp_entry); + p->rcu_blocked_cpu = -1; +#endif #endif /* #ifdef CONFIG_PREEMPT_RCU */ #ifdef CONFIG_TASKS_RCU p->rcu_tasks_holdout = false; diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig index 4d9b21f69eaa..4bb12f1fed09 100644 --- a/kernel/rcu/Kconfig +++ b/kernel/rcu/Kconfig @@ -248,6 +248,18 @@ config RCU_EXP_KTHREAD Accept the default if unsure. +config RCU_PER_CPU_BLOCKED_LISTS + bool "Use per-CPU blocked task lists in PREEMPT_RCU" + depends on PREEMPT_RCU + default n + help + Enable per-CPU tracking of tasks blocked in RCU read-side + critical sections. This allows to quickly toggle the feature. + Eventually the config will be removed, in favor of always keeping + the optimization enabled. + + Accept the default if unsure. + config RCU_NOCB_CPU bool "Offload RCU callback processing from boot-selected CPUs" depends on TREE_RCU diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 293bbd9ac3f4..e2b6a4579086 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1809,6 +1809,14 @@ static noinline_for_stack bool rcu_gp_init(void) struct rcu_node *rnp = rcu_get_root(); bool start_new_poll; unsigned long old_gp_seq; +#ifdef CONFIG_RCU_PER_CPU_BLOCKED_LISTS + struct task_struct *t_verify; + int cpu_verify; + int rnp_count; + int rdp_total; + struct rcu_data *rdp_cpu; + struct task_struct *t_rdp; +#endif WRITE_ONCE(rcu_state.gp_activity, jiffies); raw_spin_lock_irq_rcu_node(rnp); @@ -1891,6 +1899,26 @@ static noinline_for_stack bool rcu_gp_init(void) */ arch_spin_lock(&rcu_state.ofl_lock); raw_spin_lock_rcu_node(rnp); +#ifdef CONFIG_RCU_PER_CPU_BLOCKED_LISTS + /* + * Verify rdp lists consistent with rnp list. Since the unlock + * path removes from rdp before rnp, we can have tasks that are + * on rnp but not on rdp (in the middle of being removed). + * Therefore rnp_count >= rdp_total is the expected invariant. + */ + rnp_count = 0; + rdp_total = 0; + list_for_each_entry(t_verify, &rnp->blkd_tasks, rcu_node_entry) + rnp_count++; + for (cpu_verify = rnp->grplo; cpu_verify <= rnp->grphi; cpu_verify++) { + rdp_cpu = per_cpu_ptr(&rcu_data, cpu_verify); + raw_spin_lock(&rdp_cpu->blkd_lock); + list_for_each_entry(t_rdp, &rdp_cpu->blkd_list, rcu_rdp_entry) + rdp_total++; + raw_spin_unlock(&rdp_cpu->blkd_lock); + } + WARN_ON_ONCE(rnp_count < rdp_total); +#endif if (rnp->qsmaskinit == rnp->qsmaskinitnext && !rnp->wait_blkd_tasks) { /* Nothing to do on this leaf rcu_node structure. */ @@ -4143,6 +4171,10 @@ rcu_boot_init_percpu_data(int cpu) rdp->rcu_onl_gp_state = RCU_GP_CLEANED; rdp->last_sched_clock = jiffies; rdp->cpu = cpu; +#ifdef CONFIG_RCU_PER_CPU_BLOCKED_LISTS + raw_spin_lock_init(&rdp->blkd_lock); + INIT_LIST_HEAD(&rdp->blkd_list); +#endif rcu_boot_init_nocb_percpu_data(rdp); } diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index b8bbe7960cda..13d5649a80fb 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -294,6 +294,12 @@ struct rcu_data { long lazy_len; /* Length of buffered lazy callbacks. */ int cpu; + +#ifdef CONFIG_RCU_PER_CPU_BLOCKED_LISTS + /* 8) Per-CPU blocked task tracking. */ + raw_spinlock_t blkd_lock; /* Protects blkd_list. */ + struct list_head blkd_list; /* Tasks blocked on this CPU. */ +#endif }; /* Values for nocb_defer_wakeup field in struct rcu_data. */ diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 73ba5f4a968d..5d2bde19131a 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -338,6 +338,12 @@ void rcu_note_context_switch(bool preempt) raw_spin_lock_rcu_node(rnp); t->rcu_read_unlock_special.b.blocked = true; t->rcu_blocked_node = rnp; +#ifdef CONFIG_RCU_PER_CPU_BLOCKED_LISTS + t->rcu_blocked_cpu = rdp->cpu; + raw_spin_lock(&rdp->blkd_lock); + list_add(&t->rcu_rdp_entry, &rdp->blkd_list); + raw_spin_unlock(&rdp->blkd_lock); +#endif /* * Verify the CPU's sanity, trace the preemption, and @@ -485,6 +491,10 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags) struct rcu_data *rdp; struct rcu_node *rnp; union rcu_special special; +#ifdef CONFIG_RCU_PER_CPU_BLOCKED_LISTS + int blocked_cpu; + struct rcu_data *blocked_rdp; +#endif rdp = this_cpu_ptr(&rcu_data); if (rdp->defer_qs_iw_pending == DEFER_QS_PENDING) @@ -530,6 +540,17 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags) * to loop. Retain a WARN_ON_ONCE() out of sheer paranoia. */ rnp = t->rcu_blocked_node; +#ifdef CONFIG_RCU_PER_CPU_BLOCKED_LISTS + /* Remove from per-CPU list if task was added to it. */ + blocked_cpu = t->rcu_blocked_cpu; + if (blocked_cpu != -1) { + blocked_rdp = per_cpu_ptr(&rcu_data, blocked_cpu); + raw_spin_lock(&blocked_rdp->blkd_lock); + list_del_init(&t->rcu_rdp_entry); + t->rcu_blocked_cpu = -1; + raw_spin_unlock(&blocked_rdp->blkd_lock); + } +#endif raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */ WARN_ON_ONCE(rnp != t->rcu_blocked_node); WARN_ON_ONCE(!rcu_is_leaf_node(rnp)); -- 2.34.1

