On Tue, Sep 23, 2025 at 7:22 AM Paul E. McKenney <paul...@kernel.org> wrote: > > This commit saves more than 500 lines of RCU code by re-implementing > RCU Tasks Trace in terms of SRCU-fast. Follow-up work will remove > more code that does not cause problems by its presence, but that is no > longer required. > > This variant places smp_mb() in rcu_read_{,un}lock_trace(), which will > be removed on common-case architectures in a later commit. > > [ paulmck: Apply kernel test robot, Boqun Feng, and Zqiang feedback. ] > > Signed-off-by: Paul E. McKenney <paul...@kernel.org> > Tested-by: kernel test robot <oliver.s...@intel.com> > Cc: Andrii Nakryiko <and...@kernel.org> > Cc: Alexei Starovoitov <a...@kernel.org> > Cc: Peter Zijlstra <pet...@infradead.org> > Cc: <b...@vger.kernel.org> > --- > include/linux/rcupdate_trace.h | 107 ++++-- > include/linux/sched.h | 1 + > kernel/rcu/srcutiny.c | 13 +- > kernel/rcu/tasks.h | 617 +-------------------------------- > 4 files changed, 104 insertions(+), 634 deletions(-) >
makes sense to me overall, but I had a few questions below > diff --git a/include/linux/rcupdate_trace.h b/include/linux/rcupdate_trace.h > index e6c44eb428ab63..3f46cbe6700038 100644 > --- a/include/linux/rcupdate_trace.h > +++ b/include/linux/rcupdate_trace.h > @@ -12,28 +12,28 @@ > #include <linux/rcupdate.h> > #include <linux/cleanup.h> > > -extern struct lockdep_map rcu_trace_lock_map; > +#ifdef CONFIG_TASKS_TRACE_RCU > +extern struct srcu_struct rcu_tasks_trace_srcu_struct; > +#endif // #ifdef CONFIG_TASKS_TRACE_RCU > > -#ifdef CONFIG_DEBUG_LOCK_ALLOC > +#if defined(CONFIG_DEBUG_LOCK_ALLOC) && defined(CONFIG_TASKS_TRACE_RCU) > > static inline int rcu_read_lock_trace_held(void) > { > - return lock_is_held(&rcu_trace_lock_map); > + return srcu_read_lock_held(&rcu_tasks_trace_srcu_struct); > } > > -#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ > +#else // #if defined(CONFIG_DEBUG_LOCK_ALLOC) && > defined(CONFIG_TASKS_TRACE_RCU) > > static inline int rcu_read_lock_trace_held(void) > { > return 1; > } > > -#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ > +#endif // #else // #if defined(CONFIG_DEBUG_LOCK_ALLOC) && > defined(CONFIG_TASKS_TRACE_RCU) nit: // #else // #if... looks very unconventional > > #ifdef CONFIG_TASKS_TRACE_RCU > > -void rcu_read_unlock_trace_special(struct task_struct *t); > - > /** > * rcu_read_lock_trace - mark beginning of RCU-trace read-side critical > section > * > @@ -50,12 +50,14 @@ static inline void rcu_read_lock_trace(void) > { > struct task_struct *t = current; > > - WRITE_ONCE(t->trc_reader_nesting, READ_ONCE(t->trc_reader_nesting) + > 1); > - barrier(); > - if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB) && > - t->trc_reader_special.b.need_mb) > - smp_mb(); // Pairs with update-side barriers > - rcu_lock_acquire(&rcu_trace_lock_map); > + if (t->trc_reader_nesting++) { > + // In case we interrupted a Tasks Trace RCU reader. > + rcu_try_lock_acquire(&rcu_tasks_trace_srcu_struct.dep_map); why is this a "try_lock" variant instead of a no-try "lock_acquire" one? Some lockdep special treatment for nested locking? > + return; > + } > + barrier(); // nesting before scp to protect against interrupt > handler. > + t->trc_reader_scp = srcu_read_lock_fast(&rcu_tasks_trace_srcu_struct); > + smp_mb(); // Placeholder for more selective ordering > } > > /** [...] > diff --git a/include/linux/sched.h b/include/linux/sched.h > index 2b272382673d62..89d3646155525f 100644 > --- a/include/linux/sched.h > +++ b/include/linux/sched.h > @@ -939,6 +939,7 @@ struct task_struct { > > #ifdef CONFIG_TASKS_TRACE_RCU > int trc_reader_nesting; > + struct srcu_ctr __percpu *trc_reader_scp; > int trc_ipi_to_cpu; > union rcu_special trc_reader_special; > struct list_head trc_holdout_list; > diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c > index e3b64a5e0ec7e1..3450c3751ef7ad 100644 > --- a/kernel/rcu/srcutiny.c > +++ b/kernel/rcu/srcutiny.c > @@ -106,15 +106,15 @@ void __srcu_read_unlock(struct srcu_struct *ssp, int > idx) > newval = READ_ONCE(ssp->srcu_lock_nesting[idx]) - 1; > WRITE_ONCE(ssp->srcu_lock_nesting[idx], newval); > preempt_enable(); > - if (!newval && READ_ONCE(ssp->srcu_gp_waiting) && in_task()) > + if (!newval && READ_ONCE(ssp->srcu_gp_waiting) && in_task() && > !irqs_disabled()) this seems like something that probably should be done in a separate patch with an explanation on why? > swake_up_one(&ssp->srcu_wq); > } > EXPORT_SYMBOL_GPL(__srcu_read_unlock); > > /* > * Workqueue handler to drive one grace period and invoke any callbacks > - * that become ready as a result. Single-CPU and !PREEMPTION operation > - * means that we get away with murder on synchronization. ;-) > + * that become ready as a result. Single-CPU operation and preemption > + * disabling mean that we get away with murder on synchronization. ;-) > */ > void srcu_drive_gp(struct work_struct *wp) > { > @@ -141,7 +141,12 @@ void srcu_drive_gp(struct work_struct *wp) > WRITE_ONCE(ssp->srcu_idx, ssp->srcu_idx + 1); > WRITE_ONCE(ssp->srcu_gp_waiting, true); /* srcu_read_unlock() wakes! > */ > preempt_enable(); > - swait_event_exclusive(ssp->srcu_wq, > !READ_ONCE(ssp->srcu_lock_nesting[idx])); > + do { > + // Deadlock issues prevent __srcu_read_unlock() from > + // doing an unconditional wakeup, so polling is required. > + swait_event_timeout_exclusive(ssp->srcu_wq, > + > !READ_ONCE(ssp->srcu_lock_nesting[idx]), HZ / 10); > + } while (READ_ONCE(ssp->srcu_lock_nesting[idx])); ditto, generic srcu change, driven by RCU Tasks Trace transformation, but probably worth calling it out separately? > preempt_disable(); // Needed for PREEMPT_LAZY > WRITE_ONCE(ssp->srcu_gp_waiting, false); /* srcu_read_unlock() cheap. > */ > WRITE_ONCE(ssp->srcu_idx, ssp->srcu_idx + 1); [...]