The rcu_read_{,un}lock_{,tasks_}trace() functions need to use smp_mb() only if invoked where RCU is not watching, that is, from locations where a call to rcu_is_watching() would return false. In architectures that define the ARCH_WANTS_NO_INSTR Kconfig option, use of noinstr and friends ensures that tracing happens only where RCU is watching, so those architectures can dispense entirely with the read-side calls to smp_mb().
Other architectures include these read-side calls by default, but in many installations there might be either larger than average tolerance for risk, prohibition of removing tracing on a running system, or careful review and approval of removing of tracing. Such installations can build their kernels with CONFIG_TASKS_TRACE_RCU_NO_MB=y to avoid those read-side calls to smp_mb(), thus accepting responsibility for run-time removal of tracing from code regions that RCU is not watching. Those wishing to disable read-side memory barriers for an entire architecture can select this TASKS_TRACE_RCU_NO_MB Kconfig option, hence the polarity. Signed-off-by: Paul E. McKenney <paul...@kernel.org> Cc: Andrii Nakryiko <and...@kernel.org> Cc: Alexei Starovoitov <a...@kernel.org> Cc: Peter Zijlstra <pet...@infradead.org> Cc: <b...@vger.kernel.org> --- include/linux/rcupdate_trace.h | 32 ++++++++++++++++++-------------- kernel/rcu/Kconfig | 23 +++++++++++++++++++++++ kernel/rcu/tasks.h | 7 ++++++- 3 files changed, 47 insertions(+), 15 deletions(-) diff --git a/include/linux/rcupdate_trace.h b/include/linux/rcupdate_trace.h index b87151e6b23881..7f7977fb56aca5 100644 --- a/include/linux/rcupdate_trace.h +++ b/include/linux/rcupdate_trace.h @@ -48,10 +48,11 @@ static inline int rcu_read_lock_trace_held(void) */ static inline struct srcu_ctr __percpu *rcu_read_lock_tasks_trace(void) { - struct srcu_ctr __percpu *ret = srcu_read_lock_fast(&rcu_tasks_trace_srcu_struct); + struct srcu_ctr __percpu *ret = __srcu_read_lock_fast(&rcu_tasks_trace_srcu_struct); - if (IS_ENABLED(CONFIG_ARCH_WANTS_NO_INSTR)) - smp_mb(); + rcu_try_lock_acquire(&rcu_tasks_trace_srcu_struct.dep_map); + if (!IS_ENABLED(CONFIG_TASKS_TRACE_RCU_NO_MB)) + smp_mb(); // Provide ordering on noinstr-incomplete architectures. return ret; } @@ -66,9 +67,10 @@ static inline struct srcu_ctr __percpu *rcu_read_lock_tasks_trace(void) */ static inline void rcu_read_unlock_tasks_trace(struct srcu_ctr __percpu *scp) { - if (!IS_ENABLED(CONFIG_ARCH_WANTS_NO_INSTR)) - smp_mb(); - srcu_read_unlock_fast(&rcu_tasks_trace_srcu_struct, scp); + if (!IS_ENABLED(CONFIG_TASKS_TRACE_RCU_NO_MB)) + smp_mb(); // Provide ordering on noinstr-incomplete architectures. + __srcu_read_unlock_fast(&rcu_tasks_trace_srcu_struct, scp); + srcu_lock_release(&rcu_tasks_trace_srcu_struct.dep_map); } /** @@ -87,14 +89,15 @@ static inline void rcu_read_lock_trace(void) { struct task_struct *t = current; + rcu_try_lock_acquire(&rcu_tasks_trace_srcu_struct.dep_map); if (t->trc_reader_nesting++) { // In case we interrupted a Tasks Trace RCU reader. - rcu_try_lock_acquire(&rcu_tasks_trace_srcu_struct.dep_map); return; } barrier(); // nesting before scp to protect against interrupt handler. - t->trc_reader_scp = srcu_read_lock_fast(&rcu_tasks_trace_srcu_struct); - smp_mb(); // Placeholder for more selective ordering + t->trc_reader_scp = __srcu_read_lock_fast(&rcu_tasks_trace_srcu_struct); + if (!IS_ENABLED(CONFIG_TASKS_TRACE_RCU_NO_MB)) + smp_mb(); // Placeholder for more selective ordering } /** @@ -111,13 +114,14 @@ static inline void rcu_read_unlock_trace(void) struct srcu_ctr __percpu *scp; struct task_struct *t = current; - smp_mb(); // Placeholder for more selective ordering scp = t->trc_reader_scp; barrier(); // scp before nesting to protect against interrupt handler. - if (!--t->trc_reader_nesting) - srcu_read_unlock_fast(&rcu_tasks_trace_srcu_struct, scp); - else - srcu_lock_release(&rcu_tasks_trace_srcu_struct.dep_map); + if (!--t->trc_reader_nesting) { + if (!IS_ENABLED(CONFIG_TASKS_TRACE_RCU_NO_MB)) + smp_mb(); // Placeholder for more selective ordering + __srcu_read_unlock_fast(&rcu_tasks_trace_srcu_struct, scp); + } + srcu_lock_release(&rcu_tasks_trace_srcu_struct.dep_map); } /** diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig index 73a6cc364628b5..6a319e2926589f 100644 --- a/kernel/rcu/Kconfig +++ b/kernel/rcu/Kconfig @@ -142,6 +142,29 @@ config TASKS_TRACE_RCU default n select IRQ_WORK +config TASKS_TRACE_RCU_NO_MB + bool "Override RCU Tasks Trace inclusion of read-side memory barriers" + depends on RCU_EXPERT && TASKS_TRACE_RCU + default ARCH_WANTS_NO_INSTR + help + This option prevents the use of read-side memory barriers in + rcu_read_lock_tasks_trace() and rcu_read_unlock_tasks_trace() + even in kernels built with CONFIG_ARCH_WANTS_NO_INSTR=n, that is, + in kernels that do not have noinstr set up in entry/exit code. + By setting this option, you are promising to carefully review + use of ftrace, BPF, and friends to ensure that no tracing + operation is attached to a function that runs in that portion + of the entry/exit code that RCU does not watch, that is, + where rcu_is_watching() returns false. Alternatively, you + might choose to never remove traces except by rebooting. + + Those wishing to disable read-side memory barriers for an entire + architecture can select this Kconfig option, hence the polarity. + + Say Y here if you need speed and will review use of tracing. + Say N here for certain esoteric testing of RCU itself. + Take the default if you are unsure. + config RCU_STALL_COMMON def_bool TREE_RCU help diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index 833e180db744f2..bf1226834c9423 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -1600,8 +1600,13 @@ static inline void rcu_tasks_bootup_oddness(void) {} // Tracing variant of Tasks RCU. This variant is designed to be used // to protect tracing hooks, including those of BPF. This variant // is implemented via a straightforward mapping onto SRCU-fast. +// DEFINE_SRCU_FAST() is required because rcu_read_lock_trace() must +// use __srcu_read_lock_fast() in order to bypass the rcu_is_watching() +// checks in kernels built with CONFIG_TASKS_TRACE_RCU_NO_MB=n, which also +// bypasses the srcu_check_read_flavor_force() that would otherwise mark +// rcu_tasks_trace_srcu_struct as needing SRCU-fast readers. -DEFINE_SRCU(rcu_tasks_trace_srcu_struct); +DEFINE_SRCU_FAST(rcu_tasks_trace_srcu_struct); EXPORT_SYMBOL_GPL(rcu_tasks_trace_srcu_struct); #endif /* #else #ifdef CONFIG_TASKS_TRACE_RCU */ -- 2.40.1