The rcu_read_{,un}lock_{,tasks_}trace() functions need to use smp_mb()
only if invoked where RCU is not watching, that is, from locations where
a call to rcu_is_watching() would return false.  In architectures that
define the ARCH_WANTS_NO_INSTR Kconfig option, use of noinstr and friends
ensures that tracing happens only where RCU is watching, so those
architectures can dispense entirely with the read-side calls to smp_mb().

Other architectures include these read-side calls by default, but in many
installations there might be either larger than average tolerance for
risk, prohibition of removing tracing on a running system, or careful
review and approval of removing of tracing.  Such installations can
build their kernels with CONFIG_TASKS_TRACE_RCU_NO_MB=y to avoid those
read-side calls to smp_mb(), thus accepting responsibility for run-time
removal of tracing from code regions that RCU is not watching.

Those wishing to disable read-side memory barriers for an entire
architecture can select this TASKS_TRACE_RCU_NO_MB Kconfig option,
hence the polarity.

Signed-off-by: Paul E. McKenney <paul...@kernel.org>
Cc: Andrii Nakryiko <and...@kernel.org>
Cc: Alexei Starovoitov <a...@kernel.org>
Cc: Peter Zijlstra <pet...@infradead.org>
Cc: <b...@vger.kernel.org>
---
 include/linux/rcupdate_trace.h | 32 ++++++++++++++++++--------------
 kernel/rcu/Kconfig             | 23 +++++++++++++++++++++++
 kernel/rcu/tasks.h             |  7 ++++++-
 3 files changed, 47 insertions(+), 15 deletions(-)

diff --git a/include/linux/rcupdate_trace.h b/include/linux/rcupdate_trace.h
index b87151e6b23881..7f7977fb56aca5 100644
--- a/include/linux/rcupdate_trace.h
+++ b/include/linux/rcupdate_trace.h
@@ -48,10 +48,11 @@ static inline int rcu_read_lock_trace_held(void)
  */
 static inline struct srcu_ctr __percpu *rcu_read_lock_tasks_trace(void)
 {
-       struct srcu_ctr __percpu *ret = 
srcu_read_lock_fast(&rcu_tasks_trace_srcu_struct);
+       struct srcu_ctr __percpu *ret = 
__srcu_read_lock_fast(&rcu_tasks_trace_srcu_struct);
 
-       if (IS_ENABLED(CONFIG_ARCH_WANTS_NO_INSTR))
-               smp_mb();
+       rcu_try_lock_acquire(&rcu_tasks_trace_srcu_struct.dep_map);
+       if (!IS_ENABLED(CONFIG_TASKS_TRACE_RCU_NO_MB))
+               smp_mb(); // Provide ordering on noinstr-incomplete 
architectures.
        return ret;
 }
 
@@ -66,9 +67,10 @@ static inline struct srcu_ctr __percpu 
*rcu_read_lock_tasks_trace(void)
  */
 static inline void rcu_read_unlock_tasks_trace(struct srcu_ctr __percpu *scp)
 {
-       if (!IS_ENABLED(CONFIG_ARCH_WANTS_NO_INSTR))
-               smp_mb();
-       srcu_read_unlock_fast(&rcu_tasks_trace_srcu_struct, scp);
+       if (!IS_ENABLED(CONFIG_TASKS_TRACE_RCU_NO_MB))
+               smp_mb(); // Provide ordering on noinstr-incomplete 
architectures.
+       __srcu_read_unlock_fast(&rcu_tasks_trace_srcu_struct, scp);
+       srcu_lock_release(&rcu_tasks_trace_srcu_struct.dep_map);
 }
 
 /**
@@ -87,14 +89,15 @@ static inline void rcu_read_lock_trace(void)
 {
        struct task_struct *t = current;
 
+       rcu_try_lock_acquire(&rcu_tasks_trace_srcu_struct.dep_map);
        if (t->trc_reader_nesting++) {
                // In case we interrupted a Tasks Trace RCU reader.
-               rcu_try_lock_acquire(&rcu_tasks_trace_srcu_struct.dep_map);
                return;
        }
        barrier();  // nesting before scp to protect against interrupt handler.
-       t->trc_reader_scp = srcu_read_lock_fast(&rcu_tasks_trace_srcu_struct);
-       smp_mb(); // Placeholder for more selective ordering
+       t->trc_reader_scp = __srcu_read_lock_fast(&rcu_tasks_trace_srcu_struct);
+       if (!IS_ENABLED(CONFIG_TASKS_TRACE_RCU_NO_MB))
+               smp_mb(); // Placeholder for more selective ordering
 }
 
 /**
@@ -111,13 +114,14 @@ static inline void rcu_read_unlock_trace(void)
        struct srcu_ctr __percpu *scp;
        struct task_struct *t = current;
 
-       smp_mb(); // Placeholder for more selective ordering
        scp = t->trc_reader_scp;
        barrier();  // scp before nesting to protect against interrupt handler.
-       if (!--t->trc_reader_nesting)
-               srcu_read_unlock_fast(&rcu_tasks_trace_srcu_struct, scp);
-       else
-               srcu_lock_release(&rcu_tasks_trace_srcu_struct.dep_map);
+       if (!--t->trc_reader_nesting) {
+               if (!IS_ENABLED(CONFIG_TASKS_TRACE_RCU_NO_MB))
+                       smp_mb(); // Placeholder for more selective ordering
+               __srcu_read_unlock_fast(&rcu_tasks_trace_srcu_struct, scp);
+       }
+       srcu_lock_release(&rcu_tasks_trace_srcu_struct.dep_map);
 }
 
 /**
diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig
index 73a6cc364628b5..6a319e2926589f 100644
--- a/kernel/rcu/Kconfig
+++ b/kernel/rcu/Kconfig
@@ -142,6 +142,29 @@ config TASKS_TRACE_RCU
        default n
        select IRQ_WORK
 
+config TASKS_TRACE_RCU_NO_MB
+       bool "Override RCU Tasks Trace inclusion of read-side memory barriers"
+       depends on RCU_EXPERT && TASKS_TRACE_RCU
+       default ARCH_WANTS_NO_INSTR
+       help
+         This option prevents the use of read-side memory barriers in
+         rcu_read_lock_tasks_trace() and rcu_read_unlock_tasks_trace()
+         even in kernels built with CONFIG_ARCH_WANTS_NO_INSTR=n, that is,
+         in kernels that do not have noinstr set up in entry/exit code.
+         By setting this option, you are promising to carefully review
+         use of ftrace, BPF, and friends to ensure that no tracing
+         operation is attached to a function that runs in that portion
+         of the entry/exit code that RCU does not watch, that is,
+         where rcu_is_watching() returns false.  Alternatively, you
+         might choose to never remove traces except by rebooting.
+
+         Those wishing to disable read-side memory barriers for an entire
+         architecture can select this Kconfig option, hence the polarity.
+
+         Say Y here if you need speed and will review use of tracing.
+         Say N here for certain esoteric testing of RCU itself.
+         Take the default if you are unsure.
+
 config RCU_STALL_COMMON
        def_bool TREE_RCU
        help
diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
index 833e180db744f2..bf1226834c9423 100644
--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@@ -1600,8 +1600,13 @@ static inline void rcu_tasks_bootup_oddness(void) {}
 // Tracing variant of Tasks RCU.  This variant is designed to be used
 // to protect tracing hooks, including those of BPF.  This variant
 // is implemented via a straightforward mapping onto SRCU-fast.
+// DEFINE_SRCU_FAST() is required because rcu_read_lock_trace() must
+// use __srcu_read_lock_fast() in order to bypass the rcu_is_watching()
+// checks in kernels built with CONFIG_TASKS_TRACE_RCU_NO_MB=n, which also
+// bypasses the srcu_check_read_flavor_force() that would otherwise mark
+// rcu_tasks_trace_srcu_struct as needing SRCU-fast readers.
 
-DEFINE_SRCU(rcu_tasks_trace_srcu_struct);
+DEFINE_SRCU_FAST(rcu_tasks_trace_srcu_struct);
 EXPORT_SYMBOL_GPL(rcu_tasks_trace_srcu_struct);
 
 #endif /* #else #ifdef CONFIG_TASKS_TRACE_RCU */
-- 
2.40.1


Reply via email to