On Thu,  3 Oct 2024 11:16:35 -0400
Mathieu Desnoyers <[email protected]> wrote:

> Use Tasks Trace RCU to protect iteration of system call enter/exit
> tracepoint probes to allow those probes to handle page faults.
> 
> In preparation for this change, all tracers registering to system call
> enter/exit tracepoints should expect those to be called with preemption
> enabled.
> 
> This allows tracers to fault-in userspace system call arguments such as
> path strings within their probe callbacks.
> 
> Signed-off-by: Mathieu Desnoyers <[email protected]>
> Cc: Michael Jeanson <[email protected]>
> Cc: Steven Rostedt <[email protected]>
> Cc: Masami Hiramatsu <[email protected]>
> Cc: Peter Zijlstra <[email protected]>
> Cc: Alexei Starovoitov <[email protected]>
> Cc: Yonghong Song <[email protected]>
> Cc: Paul E. McKenney <[email protected]>
> Cc: Ingo Molnar <[email protected]>
> Cc: Arnaldo Carvalho de Melo <[email protected]>
> Cc: Mark Rutland <[email protected]>
> Cc: Alexander Shishkin <[email protected]>
> Cc: Namhyung Kim <[email protected]>
> Cc: Andrii Nakryiko <[email protected]>
> Cc: [email protected]
> Cc: Joel Fernandes <[email protected]>
> ---
>  include/linux/tracepoint.h | 25 +++++++++++++++++--------
>  init/Kconfig               |  1 +
>  2 files changed, 18 insertions(+), 8 deletions(-)
> 
> diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
> index 666499b9f3be..6faf34e5efc9 100644
> --- a/include/linux/tracepoint.h
> +++ b/include/linux/tracepoint.h
> @@ -17,6 +17,7 @@
>  #include <linux/errno.h>
>  #include <linux/types.h>
>  #include <linux/rcupdate.h>
> +#include <linux/rcupdate_trace.h>
>  #include <linux/tracepoint-defs.h>
>  #include <linux/static_call.h>
>  
> @@ -109,6 +110,7 @@ void for_each_tracepoint_in_module(struct module *mod,
>  #ifdef CONFIG_TRACEPOINTS
>  static inline void tracepoint_synchronize_unregister(void)
>  {
> +     synchronize_rcu_tasks_trace();
>       synchronize_srcu(&tracepoint_srcu);
>       synchronize_rcu();
>  }
> @@ -211,7 +213,7 @@ static inline struct tracepoint 
> *tracepoint_ptr_deref(tracepoint_ptr_t *p)
>   * it_func[0] is never NULL because there is at least one element in the 
> array
>   * when the array itself is non NULL.
>   */
> -#define __DO_TRACE(name, args, cond, rcuidle)                                
> \
> +#define __DO_TRACE(name, args, cond, rcuidle, syscall)                       
> \
>       do {                                                            \
>               int __maybe_unused __idx = 0;                           \
>                                                                       \
> @@ -222,8 +224,12 @@ static inline struct tracepoint 
> *tracepoint_ptr_deref(tracepoint_ptr_t *p)
>                             "Bad RCU usage for tracepoint"))          \
>                       return;                                         \
>                                                                       \
> -             /* keep srcu and sched-rcu usage consistent */          \
> -             preempt_disable_notrace();                              \
> +             if (syscall) {                                          \
> +                     rcu_read_lock_trace();                          \
> +             } else {                                                \
> +                     /* keep srcu and sched-rcu usage consistent */  \
> +                     preempt_disable_notrace();                      \
> +             }                                                       \
>                                                                       \

I'm thinking we just use rcu_read_lock_trace() and get rid of the
preempt_disable and srcu locks for all tracepoints. Oh crap! I should get
rid of srcu locking too, as it was only needed for the rcuidle code :-p

-- Steve


>               /*                                                      \
>                * For rcuidle callers, use srcu since sched-rcu        \
> @@ -241,7 +247,10 @@ static inline struct tracepoint 
> *tracepoint_ptr_deref(tracepoint_ptr_t *p)
>                       srcu_read_unlock_notrace(&tracepoint_srcu, __idx);\
>               }                                                       \
>                                                                       \
> -             preempt_enable_notrace();                               \
> +             if (syscall)                                            \
> +                     rcu_read_unlock_trace();                        \
> +             else                                                    \
> +                     preempt_enable_notrace();                       \
>       } while (0)
>  
>  #ifndef MODULE
> @@ -251,7 +260,7 @@ static inline struct tracepoint 
> *tracepoint_ptr_deref(tracepoint_ptr_t *p)
>               if (static_key_false(&__tracepoint_##name.key))         \
>                       __DO_TRACE(name,                                \
>                               TP_ARGS(args),                          \
> -                             TP_CONDITION(cond), 1);                 \
> +                             TP_CONDITION(cond), 1, 0);              \
>       }
>  #else
>  #define __DECLARE_TRACE_RCU(name, proto, args, cond)
> @@ -284,7 +293,7 @@ static inline struct tracepoint 
> *tracepoint_ptr_deref(tracepoint_ptr_t *p)
>               if (static_key_false(&__tracepoint_##name.key))         \
>                       __DO_TRACE(name,                                \
>                               TP_ARGS(args),                          \
> -                             TP_CONDITION(cond), 0);                 \
> +                             TP_CONDITION(cond), 0, 0);              \
>               if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) {             \
>                       WARN_ONCE(!rcu_is_watching(),                   \
>                                 "RCU not watching for tracepoint");   \
> @@ -295,7 +304,7 @@ static inline struct tracepoint 
> *tracepoint_ptr_deref(tracepoint_ptr_t *p)
>               if (static_key_false(&__tracepoint_##name.key))         \
>                       __DO_TRACE(name,                                \
>                               TP_ARGS(args),                          \
> -                             TP_CONDITION(cond), 1);                 \
> +                             TP_CONDITION(cond), 1, 0);              \
>       }                                                               \
>       static inline int                                               \
>       register_trace_##name(void (*probe)(data_proto), void *data)    \
> @@ -330,7 +339,7 @@ static inline struct tracepoint 
> *tracepoint_ptr_deref(tracepoint_ptr_t *p)
>               if (static_key_false(&__tracepoint_##name.key))         \
>                       __DO_TRACE(name,                                \
>                               TP_ARGS(args),                          \
> -                             TP_CONDITION(cond), 0);                 \
> +                             TP_CONDITION(cond), 0, 1);              \
>               if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) {             \
>                       WARN_ONCE(!rcu_is_watching(),                   \
>                                 "RCU not watching for tracepoint");   \
> diff --git a/init/Kconfig b/init/Kconfig
> index fbd0cb06a50a..eedd0064fb36 100644
> --- a/init/Kconfig
> +++ b/init/Kconfig
> @@ -1984,6 +1984,7 @@ config BINDGEN_VERSION_TEXT
>  #
>  config TRACEPOINTS
>       bool
> +     select TASKS_TRACE_RCU
>  
>  source "kernel/Kconfig.kexec"
>  


Reply via email to