Re: [RFC PATCH v2 20/31] function_graph: Add a new exit handler with parent_ip and ftrace_regs

2023-11-08 Thread Google
On Wed,  8 Nov 2023 23:28:11 +0900
"Masami Hiramatsu (Google)"  wrote:

> From: Masami Hiramatsu (Google) 
> 
> Add a new return handler to fgraph_ops as 'retregfunc'  which takes
> parent_ip and ftrace_regs instead of ftrace_graph_ret. This handler
> is available only if the arch support CONFIG_HAVE_FUNCTION_GRAPH_FREGS.
> Note that the 'retfunc' and 'reregfunc' are mutual exclusive.
> You can set only one of them.
> 
> Signed-off-by: Masami Hiramatsu (Google) 
> ---
>  arch/x86/include/asm/ftrace.h |2 +
>  include/linux/ftrace.h|   10 +-
>  kernel/trace/Kconfig  |5 ++-
>  kernel/trace/fgraph.c |   74 
> +++--
>  4 files changed, 63 insertions(+), 28 deletions(-)
> 
> diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
> index fc60974a1d89..4701d009c215 100644
> --- a/arch/x86/include/asm/ftrace.h
> +++ b/arch/x86/include/asm/ftrace.h
> @@ -72,6 +72,8 @@ arch_ftrace_get_regs(struct ftrace_regs *fregs)
>   override_function_with_return(&(fregs)->regs)
>  #define ftrace_regs_query_register_offset(name) \
>   regs_query_register_offset(name)
> +#define ftrace_regs_get_frame_pointer(fregs) \
> + frame_pointer(&(fregs)->regs)
>  
>  struct ftrace_ops;
>  #define ftrace_graph_func ftrace_graph_func
> diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
> index c91b234949d5..8efccd911d41 100644
> --- a/include/linux/ftrace.h
> +++ b/include/linux/ftrace.h
> @@ -43,7 +43,9 @@ struct dyn_ftrace;
>  
>  char *arch_ftrace_match_adjust(char *str, const char *search);
>  
> -#ifdef CONFIG_HAVE_FUNCTION_GRAPH_RETVAL
> +#ifdef CONFIG_HAVE_FUNCTION_GRAPH_FREGS
> +unsigned long ftrace_return_to_handler(struct ftrace_regs *fregs);
> +#elif defined(CONFIG_HAVE_FUNCTION_GRAPH_RETVAL)
>  struct fgraph_ret_regs;
>  unsigned long ftrace_return_to_handler(struct fgraph_ret_regs *ret_regs);
>  #else
> @@ -156,6 +158,7 @@ struct ftrace_regs {
>  #define ftrace_regs_set_instruction_pointer(fregs, ip) do { } while (0)
>  #endif /* CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS */
>  
> +
>  static __always_inline struct pt_regs *ftrace_get_regs(struct ftrace_regs 
> *fregs)
>  {
>   if (!fregs)
> @@ -1066,6 +1069,10 @@ typedef int (*trace_func_graph_regs_ent_t)(unsigned 
> long func,
>  unsigned long parent_ip,
>  struct ftrace_regs *fregs,
>  struct fgraph_ops *); /* entry w/ 
> regs */
> +typedef void (*trace_func_graph_regs_ret_t)(unsigned long func,
> + unsigned long parent_ip,
> + struct ftrace_regs *,
> + struct fgraph_ops *); /* return w/ 
> regs */
>  
>  extern int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace, struct 
> fgraph_ops *gops);
>  
> @@ -1075,6 +1082,7 @@ struct fgraph_ops {
>   trace_func_graph_ent_t  entryfunc;
>   trace_func_graph_ret_t  retfunc;
>   trace_func_graph_regs_ent_t entryregfunc;
> + trace_func_graph_regs_ret_t retregfunc;
>   struct ftrace_ops   ops; /* for the hash lists */
>   void*private;
>   int idx;
> diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
> index 61c541c36596..308b3bec01b1 100644
> --- a/kernel/trace/Kconfig
> +++ b/kernel/trace/Kconfig
> @@ -34,6 +34,9 @@ config HAVE_FUNCTION_GRAPH_TRACER
>  config HAVE_FUNCTION_GRAPH_RETVAL
>   bool
>  
> +config HAVE_FUNCTION_GRAPH_FREGS
> + bool
> +
>  config HAVE_DYNAMIC_FTRACE
>   bool
>   help
> @@ -232,7 +235,7 @@ config FUNCTION_GRAPH_TRACER
>  
>  config FUNCTION_GRAPH_RETVAL
>   bool "Kernel Function Graph Return Value"
> - depends on HAVE_FUNCTION_GRAPH_RETVAL
> + depends on HAVE_FUNCTION_GRAPH_RETVAL || HAVE_FUNCTION_GRAPH_FREGS
>   depends on FUNCTION_GRAPH_TRACER
>   default n
>   help
> diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
> index 6567b18c6c54..cf240914ef9b 100644
> --- a/kernel/trace/fgraph.c
> +++ b/kernel/trace/fgraph.c
> @@ -623,8 +623,7 @@ int function_graph_enter_regs(unsigned long ret, unsigned 
> long func,
>  
>  /* Retrieve a function return address to the trace stack on thread info.*/
>  static struct ftrace_ret_stack *
> -ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret,
> - unsigned long frame_pointer)
> +ftrace_pop_return_trace(unsigned long *ret, unsigned long frame_pointer)
>  {
>   struct ftrace_ret_stack *ret_stack;
>   int index;
> @@ -669,10 +668,6 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, 
> unsigned long *ret,
>  #endif
>  
>   *ret = ret_stack->ret;
> - trace->func = ret_stack->func;
> - trace->calltime = ret_stack->calltime;
> - trace->overrun = atomic_read(>trace_overrun);
> - 

Re: [RFC PATCH v2 01/31] tracing: Add a comment about ftrace_regs definition

2023-11-08 Thread Google
On Wed,  8 Nov 2023 23:24:32 +0900
"Masami Hiramatsu (Google)"  wrote:

> From: Masami Hiramatsu (Google) 
> 
> To clarify what will be expected on ftrace_regs, add a comment to the
> architecture independent definition of the ftrace_regs.
> 
> Signed-off-by: Masami Hiramatsu (Google) 
> ---
>  Changes in v2:
>   - newly added.
> ---
>  include/linux/ftrace.h |   25 +
>  1 file changed, 25 insertions(+)
> 
> diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
> index e8921871ef9a..b174af91d8be 100644
> --- a/include/linux/ftrace.h
> +++ b/include/linux/ftrace.h
> @@ -118,6 +118,31 @@ extern int ftrace_enabled;
>  
>  #ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS
>  
> +/**
> + * ftrace_regs - ftrace partial/optimal register set
> + *
> + * ftrace_regs represents a group of registers which is used at the
> + * function entry and exit. There are three types of registers.
> + *
> + * - Registers for passing the parameters to callee, including the stack
> + *   pointer. (e.g. rcx, rdx, rdi, rsi, r8, r9 and rsp on x86_64)
> + * - Registers for passing the return values to caller.
> + *   (e.g. rax and rdx on x86_64)
> + * - Registers for hooking the function return including the frame pointer
> + *   (the frame pointer is architecture/config dependent)
> + *   (e.g. rbp and rsp for x86_64)

Oops, I found the program counter/instruction pointer must be saved too.
This is used for live patching. One question is that if the IP is modified
at the return handler, what should we do? Return to the specified address?

Thanks,

> + *
> + * Also, architecture dependent fields can be used for internal process.
> + * (e.g. orig_ax on x86_64)
> + *
> + * On the function entry, those registers will be restored except for
> + * the stack pointer, so that user can change the function parameters.
> + * On the function exit, onlu registers which is used for return values
> + * are restored.
> + *
> + * NOTE: user *must not* access regs directly, only do it via APIs, because
> + * the member can be changed according to the architecture.
> + */
>  struct ftrace_regs {
>   struct pt_regs  regs;
>  };
> 


-- 
Masami Hiramatsu (Google) 



Re: [PATCH v7 00/13] selftests/sgx: Fix compilation errors

2023-11-08 Thread Dave Hansen
On 11/8/23 12:31, Jo Van Bulck wrote:
> Just a kind follow-up: from what I can see, this series has not been
> merged into the x86/sgx branch of tip yet (assuming that's where it
> should go next)?
> 
> Apologies if I've overlooked anything, and please let me know if there's
> something on my end that can help!

Yes, you've missed something.  For your reading pleasure:

https://www.kernel.org/doc/html/latest/process/2.Process.html?highlight=merge%20window
https://www.kernel.org/doc/html/latest/process/maintainer-tip.html

I honestly didn't even think about applying this until Jarkko said
something on 23rd.  By that point, it was far too late for it to get
sorted out for 6.7.  So, that puts it in the next merge window.
Specifically:

The release candidate -rc1 is the starting point for new
patches to be applied which are targeted for the next merge
window.

So wait for the next -rc1, and you'll hopefully see your series get merged.


Re: [PATCH v7 00/13] selftests/sgx: Fix compilation errors

2023-11-08 Thread Jo Van Bulck

On 23.10.23 23:32, Jarkko Sakkinen wrote:

On Fri Oct 13, 2023 at 2:45 PM EEST, Jo Van Bulck wrote:

On 10.10.23 11:44, Jarkko Sakkinen wrote:

Folks (sorry for top posting): I've now taken my old NUC7 out of the
dust and tested the series :-)

Tested-by: Jarkko Sakkinen 


Thanks for testing this Jarkko! Not sure on next steps, do you want me
to re-post the series with the Tested-by tag for all commits or will you
add that? Let me know if something from my side is needed.


Dave, can you pick these patches to the x86 tree with my tested-by
added? Sorry for latency. It is flu season in Finland and I've been
functional varying last week because of that.


Just a kind follow-up: from what I can see, this series has not been 
merged into the x86/sgx branch of tip yet (assuming that's where it 
should go next)?


Apologies if I've overlooked anything, and please let me know if there's 
something on my end that can help!


Best,
Jo


[RFC PATCH v2 31/31] Documentation: probes: Update fprobe on function-graph tracer

2023-11-08 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Update fprobe documentation for the new fprobe on function-graph
tracer. This includes some bahvior changes and pt_regs to
ftrace_regs interface change.

Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v2:
  - Update @fregs parameter explanation.
---
 Documentation/trace/fprobe.rst |   42 ++--
 1 file changed, 27 insertions(+), 15 deletions(-)

diff --git a/Documentation/trace/fprobe.rst b/Documentation/trace/fprobe.rst
index 196f52386aaa..f58bdc64504f 100644
--- a/Documentation/trace/fprobe.rst
+++ b/Documentation/trace/fprobe.rst
@@ -9,9 +9,10 @@ Fprobe - Function entry/exit probe
 Introduction
 
 
-Fprobe is a function entry/exit probe mechanism based on ftrace.
-Instead of using ftrace full feature, if you only want to attach callbacks
-on function entry and exit, similar to the kprobes and kretprobes, you can
+Fprobe is a function entry/exit probe mechanism based on the function-graph
+tracer.
+Instead of tracing all functions, if you want to attach callbacks on specific
+function entry and exit, similar to the kprobes and kretprobes, you can
 use fprobe. Compared with kprobes and kretprobes, fprobe gives faster
 instrumentation for multiple functions with single handler. This document
 describes how to use fprobe.
@@ -91,12 +92,14 @@ The prototype of the entry/exit callback function are as 
follows:
 
 .. code-block:: c
 
- int entry_callback(struct fprobe *fp, unsigned long entry_ip, unsigned long 
ret_ip, struct pt_regs *regs, void *entry_data);
+ int entry_callback(struct fprobe *fp, unsigned long entry_ip, unsigned long 
ret_ip, struct ftrace_regs *fregs, void *entry_data);
 
- void exit_callback(struct fprobe *fp, unsigned long entry_ip, unsigned long 
ret_ip, struct pt_regs *regs, void *entry_data);
+ void exit_callback(struct fprobe *fp, unsigned long entry_ip, unsigned long 
ret_ip, struct ftrace_regs *fregs, void *entry_data);
 
-Note that the @entry_ip is saved at function entry and passed to exit handler.
-If the entry callback function returns !0, the corresponding exit callback 
will be cancelled.
+Note that the @entry_ip is saved at function entry and passed to exit
+handler.
+If the entry callback function returns !0, the corresponding exit callback
+will be cancelled.
 
 @fp
 This is the address of `fprobe` data structure related to this handler.
@@ -112,12 +115,10 @@ If the entry callback function returns !0, the 
corresponding exit callback will
 This is the return address that the traced function will return to,
 somewhere in the caller. This can be used at both entry and exit.
 
-@regs
-This is the `pt_regs` data structure at the entry and exit. Note that
-the instruction pointer of @regs may be different from the @entry_ip
-in the entry_handler. If you need traced instruction pointer, you need
-to use @entry_ip. On the other hand, in the exit_handler, the 
instruction
-pointer of @regs is set to the current return address.
+@fregs
+This is the `ftrace_regs` data structure at the entry and exit. This
+includes the function parameters, or the return values. So user can
+access thos values via appropriate `ftrace_regs_*` APIs.
 
 @entry_data
 This is a local storage to share the data between entry and exit 
handlers.
@@ -125,6 +126,17 @@ If the entry callback function returns !0, the 
corresponding exit callback will
 and `entry_data_size` field when registering the fprobe, the storage is
 allocated and passed to both `entry_handler` and `exit_handler`.
 
+Entry data size and exit handlers on the same function
+==
+
+Since the entry data is passed via per-task stack and it is has limited size,
+the entry data size per probe is limited to `15 * sizeof(long)`. You also need
+to take care that the different fprobes are probing on the same function, this
+limit becomes smaller. The entry data size is aligned to `sizeof(long)` and
+each fprobe which has exit handler uses a `sizeof(long)` space on the stack,
+you should keep the number of fprobes on the same function as small as
+possible.
+
 Share the callbacks with kprobes
 
 
@@ -165,8 +177,8 @@ This counter counts up when;
  - fprobe fails to take ftrace_recursion lock. This usually means that a 
function
which is traced by other ftrace users is called from the entry_handler.
 
- - fprobe fails to setup the function exit because of the shortage of rethook
-   (the shadow stack for hooking the function return.)
+ - fprobe fails to setup the function exit because of failing to allocate the
+   data buffer from the per-task shadow stack.
 
 The `fprobe::nmissed` field counts up in both cases. Therefore, the former
 skips both of entry and exit callback and the latter skips the exit




[RFC PATCH v2 30/31] selftests: ftrace: Remove obsolate maxactive syntax check

2023-11-08 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Since the fprobe event does not support maxactive anymore, stop
testing the maxactive syntax error checking.

Signed-off-by: Masami Hiramatsu (Google) 
---
 .../ftrace/test.d/dynevent/fprobe_syntax_errors.tc |4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git 
a/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc 
b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc
index 20e42c030095..66516073ff27 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc
@@ -16,9 +16,7 @@ aarch64)
   REG=%r0 ;;
 esac
 
-check_error 'f^100 vfs_read'   # MAXACT_NO_KPROBE
-check_error 'f^1a111 vfs_read' # BAD_MAXACT
-check_error 'f^10 vfs_read'# MAXACT_TOO_BIG
+check_error 'f^100 vfs_read'   # BAD_MAXACT
 
 check_error 'f ^non_exist_func'# BAD_PROBE_ADDR (enoent)
 check_error 'f ^vfs_read+10'   # BAD_PROBE_ADDR




[RFC PATCH v2 29/31] bpf: Enable kprobe_multi feature if CONFIG_FPROBE is enabled

2023-11-08 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Enable kprobe_multi feature if CONFIG_FPROBE is enabled. The pt_regs is
converted from ftrace_regs by ftrace_partial_regs(), thus some registers
may always returns 0. But it should be enough for function entry (access
arguments) and exit (access return value).

Signed-off-by: Masami Hiramatsu (Google) 
Acked-by: Florent Revest 
---
 Changes from previous series: NOTHING, Update against the new series.
---
 kernel/trace/bpf_trace.c |   22 +-
 1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index a4b5e34b0419..96d6e9993f75 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -2501,7 +2501,7 @@ static int __init bpf_event_init(void)
 fs_initcall(bpf_event_init);
 #endif /* CONFIG_MODULES */
 
-#if defined(CONFIG_FPROBE) && defined(CONFIG_DYNAMIC_FTRACE_WITH_REGS)
+#ifdef CONFIG_FPROBE
 struct bpf_kprobe_multi_link {
struct bpf_link link;
struct fprobe fp;
@@ -2524,6 +2524,8 @@ struct user_syms {
char *buf;
 };
 
+static DEFINE_PER_CPU(struct pt_regs, bpf_kprobe_multi_pt_regs);
+
 static int copy_user_syms(struct user_syms *us, unsigned long __user *usyms, 
u32 cnt)
 {
unsigned long __user usymbol;
@@ -2700,13 +2702,14 @@ static u64 bpf_kprobe_multi_entry_ip(struct bpf_run_ctx 
*ctx)
 
 static int
 kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link,
-  unsigned long entry_ip, struct pt_regs *regs)
+  unsigned long entry_ip, struct ftrace_regs *fregs)
 {
struct bpf_kprobe_multi_run_ctx run_ctx = {
.link = link,
.entry_ip = entry_ip,
};
struct bpf_run_ctx *old_run_ctx;
+   struct pt_regs *regs;
int err;
 
if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) {
@@ -2716,6 +2719,7 @@ kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link 
*link,
 
migrate_disable();
rcu_read_lock();
+   regs = ftrace_partial_regs(fregs, 
this_cpu_ptr(_kprobe_multi_pt_regs));
old_run_ctx = bpf_set_run_ctx(_ctx.run_ctx);
err = bpf_prog_run(link->link.prog, regs);
bpf_reset_run_ctx(old_run_ctx);
@@ -2733,13 +2737,9 @@ kprobe_multi_link_handler(struct fprobe *fp, unsigned 
long fentry_ip,
  void *data)
 {
struct bpf_kprobe_multi_link *link;
-   struct pt_regs *regs = ftrace_get_regs(fregs);
-
-   if (!regs)
-   return 0;
 
link = container_of(fp, struct bpf_kprobe_multi_link, fp);
-   kprobe_multi_link_prog_run(link, get_entry_ip(fentry_ip), regs);
+   kprobe_multi_link_prog_run(link, get_entry_ip(fentry_ip), fregs);
return 0;
 }
 
@@ -2749,13 +2749,9 @@ kprobe_multi_link_exit_handler(struct fprobe *fp, 
unsigned long fentry_ip,
   void *data)
 {
struct bpf_kprobe_multi_link *link;
-   struct pt_regs *regs = ftrace_get_regs(fregs);
-
-   if (!regs)
-   return;
 
link = container_of(fp, struct bpf_kprobe_multi_link, fp);
-   kprobe_multi_link_prog_run(link, get_entry_ip(fentry_ip), regs);
+   kprobe_multi_link_prog_run(link, get_entry_ip(fentry_ip), fregs);
 }
 
 static int symbols_cmp_r(const void *a, const void *b, const void *priv)
@@ -3012,7 +3008,7 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr 
*attr, struct bpf_prog *pr
kvfree(cookies);
return err;
 }
-#else /* !CONFIG_FPROBE || !CONFIG_DYNAMIC_FTRACE_WITH_REGS */
+#else /* !CONFIG_FPROBE */
 int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog 
*prog)
 {
return -EOPNOTSUPP;




[RFC PATCH v2 28/31] tracing/fprobe: Enable fprobe events with CONFIG_DYNAMIC_FTRACE_WITH_ARGS

2023-11-08 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Allow fprobe events to be enabled with CONFIG_DYNAMIC_FTRACE_WITH_ARGS.
With this change, fprobe events mostly use ftrace_regs instead of pt_regs.
Note that if the arch doesn't enable HAVE_PT_REGS_COMPAT_FTRACE_REGS,
fprobe events will not be able to be used from perf.

Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v2:
  - Define ftrace_regs_get_kernel_stack_nth() for
!CONFIG_HAVE_REGS_AND_STACK_ACCESS_API.
 Changes from previous series: Update against the new series.
---
 include/linux/ftrace.h  |   17 +
 kernel/trace/Kconfig|1 -
 kernel/trace/trace_fprobe.c |   74 ---
 kernel/trace/trace_probe_tmpl.h |2 +
 4 files changed, 55 insertions(+), 39 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index c221c754885c..610e26b7c2fc 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -249,6 +249,23 @@ static __always_inline bool ftrace_regs_has_args(struct 
ftrace_regs *fregs)
regs_query_register_offset(name)
 #endif
 
+#ifdef CONFIG_HAVE_REGS_AND_STACK_ACCESS_API
+static __always_inline unsigned long
+ftrace_regs_get_kernel_stack_nth(struct ftrace_regs *fregs, unsigned int nth)
+{
+   unsigned long *stackp;
+
+   stackp = (unsigned long *)ftrace_regs_get_stack_pointer(fregs);
+   if (((unsigned long)(stackp + nth) & ~(THREAD_SIZE - 1)) ==
+   ((unsigned long)stackp & ~(THREAD_SIZE - 1)))
+   return *(stackp + nth);
+
+   return 0;
+}
+#else /* !CONFIG_HAVE_REGS_AND_STACK_ACCESS_API */
+#define ftrace_regs_get_kernel_stack_nth(fregs, nth)   (0L)
+#endif /* CONFIG_HAVE_REGS_AND_STACK_ACCESS_API */
+
 typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip,
  struct ftrace_ops *op, struct ftrace_regs *fregs);
 
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 11a96275b68c..169588021d90 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -681,7 +681,6 @@ config FPROBE_EVENTS
select TRACING
select PROBE_EVENTS
select DYNAMIC_EVENTS
-   depends on DYNAMIC_FTRACE_WITH_REGS
default y
help
  This allows user to add tracing events on the function entry and
diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c
index 59d2ef8d9552..a6a3a85f96a9 100644
--- a/kernel/trace/trace_fprobe.c
+++ b/kernel/trace/trace_fprobe.c
@@ -132,7 +132,7 @@ static int
 process_fetch_insn(struct fetch_insn *code, void *rec, void *dest,
   void *base)
 {
-   struct pt_regs *regs = rec;
+   struct ftrace_regs *fregs = rec;
unsigned long val;
int ret;
 
@@ -140,17 +140,17 @@ process_fetch_insn(struct fetch_insn *code, void *rec, 
void *dest,
/* 1st stage: get value from context */
switch (code->op) {
case FETCH_OP_STACK:
-   val = regs_get_kernel_stack_nth(regs, code->param);
+   val = ftrace_regs_get_kernel_stack_nth(fregs, code->param);
break;
case FETCH_OP_STACKP:
-   val = kernel_stack_pointer(regs);
+   val = ftrace_regs_get_stack_pointer(fregs);
break;
case FETCH_OP_RETVAL:
-   val = regs_return_value(regs);
+   val = ftrace_regs_return_value(fregs);
break;
 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
case FETCH_OP_ARG:
-   val = regs_get_kernel_argument(regs, code->param);
+   val = ftrace_regs_get_argument(fregs, code->param);
break;
 #endif
case FETCH_NOP_SYMBOL:  /* Ignore a place holder */
@@ -170,7 +170,7 @@ NOKPROBE_SYMBOL(process_fetch_insn)
 /* function entry handler */
 static nokprobe_inline void
 __fentry_trace_func(struct trace_fprobe *tf, unsigned long entry_ip,
-   struct pt_regs *regs,
+   struct ftrace_regs *fregs,
struct trace_event_file *trace_file)
 {
struct fentry_trace_entry_head *entry;
@@ -184,36 +184,36 @@ __fentry_trace_func(struct trace_fprobe *tf, unsigned 
long entry_ip,
if (trace_trigger_soft_disabled(trace_file))
return;
 
-   dsize = __get_data_size(>tp, regs);
+   dsize = __get_data_size(>tp, fregs);
 
entry = trace_event_buffer_reserve(, trace_file,
   sizeof(*entry) + tf->tp.size + 
dsize);
if (!entry)
return;
 
-   fbuffer.regs = regs;
+   fbuffer.regs = ftrace_get_regs(fregs);
entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event);
entry->ip = entry_ip;
-   store_trace_args([1], >tp, regs, sizeof(*entry), dsize);
+   store_trace_args([1], >tp, fregs, sizeof(*entry), dsize);
 
trace_event_buffer_commit();
 }
 
 static void
 fentry_trace_func(struct trace_fprobe *tf, unsigned long entry_ip,
-   

[RFC PATCH v2 27/31] tracing/fprobe: Remove nr_maxactive from fprobe

2023-11-08 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Remove depercated fprobe::nr_maxactive. This involves fprobe events to
rejects the maxactive number.

Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v2:
  - Newly added.
---
 include/linux/fprobe.h  |2 --
 kernel/trace/trace_fprobe.c |   44 ++-
 2 files changed, 6 insertions(+), 40 deletions(-)

diff --git a/include/linux/fprobe.h b/include/linux/fprobe.h
index 08b37b0d1d05..c28d06ddfb8e 100644
--- a/include/linux/fprobe.h
+++ b/include/linux/fprobe.h
@@ -47,7 +47,6 @@ struct fprobe_hlist {
  * @nmissed: The counter for missing events.
  * @flags: The status flag.
  * @entry_data_size: The private data storage size.
- * @nr_maxactive: The max number of active functions. (*deprecated)
  * @entry_handler: The callback function for function entry.
  * @exit_handler: The callback function for function exit.
  * @hlist_array: The fprobe_hlist for fprobe search from IP hash table.
@@ -56,7 +55,6 @@ struct fprobe {
unsigned long   nmissed;
unsigned intflags;
size_t  entry_data_size;
-   int nr_maxactive;
 
int (*entry_handler)(struct fprobe *fp, unsigned long entry_ip,
 unsigned long ret_ip, struct ftrace_regs *regs,
diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c
index c60d0d9f1a95..59d2ef8d9552 100644
--- a/kernel/trace/trace_fprobe.c
+++ b/kernel/trace/trace_fprobe.c
@@ -375,7 +375,6 @@ static struct trace_fprobe *alloc_trace_fprobe(const char 
*group,
   const char *event,
   const char *symbol,
   struct tracepoint *tpoint,
-  int maxactive,
   int nargs, bool is_return)
 {
struct trace_fprobe *tf;
@@ -395,7 +394,6 @@ static struct trace_fprobe *alloc_trace_fprobe(const char 
*group,
tf->fp.entry_handler = fentry_dispatcher;
 
tf->tpoint = tpoint;
-   tf->fp.nr_maxactive = maxactive;
 
ret = trace_probe_init(>tp, event, group, false);
if (ret < 0)
@@ -973,12 +971,11 @@ static int __trace_fprobe_create(int argc, const char 
*argv[])
 *  FETCHARG:TYPE : use TYPE instead of unsigned long.
 */
struct trace_fprobe *tf = NULL;
-   int i, len, new_argc = 0, ret = 0;
+   int i, new_argc = 0, ret = 0;
bool is_return = false;
char *symbol = NULL;
const char *event = NULL, *group = FPROBE_EVENT_SYSTEM;
const char **new_argv = NULL;
-   int maxactive = 0;
char buf[MAX_EVENT_NAME_LEN];
char gbuf[MAX_EVENT_NAME_LEN];
char sbuf[KSYM_NAME_LEN];
@@ -999,33 +996,13 @@ static int __trace_fprobe_create(int argc, const char 
*argv[])
 
trace_probe_log_init("trace_fprobe", argc, argv);
 
-   event = strchr([0][1], ':');
-   if (event)
-   event++;
-
-   if (isdigit(argv[0][1])) {
-   if (event)
-   len = event - [0][1] - 1;
-   else
-   len = strlen([0][1]);
-   if (len > MAX_EVENT_NAME_LEN - 1) {
-   trace_probe_log_err(1, BAD_MAXACT);
-   goto parse_error;
-   }
-   memcpy(buf, [0][1], len);
-   buf[len] = '\0';
-   ret = kstrtouint(buf, 0, );
-   if (ret || !maxactive) {
+   if (argv[0][1] != '\0') {
+   if (argv[0][1] != ':') {
+   trace_probe_log_set_index(0);
trace_probe_log_err(1, BAD_MAXACT);
goto parse_error;
}
-   /* fprobe rethook instances are iterated over via a list. The
-* maximum should stay reasonable.
-*/
-   if (maxactive > RETHOOK_MAXACTIVE_MAX) {
-   trace_probe_log_err(1, MAXACT_TOO_BIG);
-   goto parse_error;
-   }
+   event = [0][2];
}
 
trace_probe_log_set_index(1);
@@ -1035,12 +1012,6 @@ static int __trace_fprobe_create(int argc, const char 
*argv[])
if (ret < 0)
goto parse_error;
 
-   if (!is_return && maxactive) {
-   trace_probe_log_set_index(0);
-   trace_probe_log_err(1, BAD_MAXACT_TYPE);
-   goto parse_error;
-   }
-
trace_probe_log_set_index(0);
if (event) {
ret = traceprobe_parse_event_name(, , gbuf,
@@ -1094,8 +1065,7 @@ static int __trace_fprobe_create(int argc, const char 
*argv[])
}
 
/* setup a probe */
-   tf = alloc_trace_fprobe(group, event, symbol, tpoint, maxactive,
-   argc, is_return);
+   tf = 

[RFC PATCH v2 26/31] fprobe: Rewrite fprobe on function-graph tracer

2023-11-08 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Rewrite fprobe implementation on function-graph tracer.
Major API changes are:
 -  'nr_maxactive' field is deprecated.
 -  This depends on CONFIG_DYNAMIC_FTRACE_WITH_ARGS or
!CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS, and
CONFIG_HAVE_FUNCTION_GRAPH_FREGS. So currently works only
on x86_64.
 -  Currently the entry size is limited in 15 * sizeof(long).
 -  If there is too many fprobe exit handler set on the same
function, it will fail to probe.

Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v2:
  - Add more lockdep_assert_held(fprobe_mutex)
  - Use READ_ONCE() and WRITE_ONCE() for fprobe_hlist_node::fp.
  - Add NOKPROBE_SYMBOL() for the functions which is called from
entry/exit callback.
---
 include/linux/fprobe.h |   54 +++-
 kernel/trace/Kconfig   |8 -
 kernel/trace/fprobe.c  |  638 ++--
 lib/test_fprobe.c  |   45 ---
 4 files changed, 493 insertions(+), 252 deletions(-)

diff --git a/include/linux/fprobe.h b/include/linux/fprobe.h
index 879a30956009..08b37b0d1d05 100644
--- a/include/linux/fprobe.h
+++ b/include/linux/fprobe.h
@@ -5,32 +5,56 @@
 
 #include 
 #include 
-#include 
+#include 
+#include 
+#include 
+
+struct fprobe;
+
+/**
+ * strcut fprobe_hlist_node - address based hash list node for fprobe.
+ *
+ * @hlist: The hlist node for address search hash table.
+ * @addr: The address represented by this.
+ * @fp: The fprobe which owns this.
+ */
+struct fprobe_hlist_node {
+   struct hlist_node   hlist;
+   unsigned long   addr;
+   struct fprobe   *fp;
+};
+
+/**
+ * struct fprobe_hlist - hash list nodes for fprobe.
+ *
+ * @hlist: The hlist node for existence checking hash table.
+ * @rcu: rcu_head for RCU deferred release.
+ * @fp: The fprobe which owns this fprobe_hlist.
+ * @size: The size of @array.
+ * @array: The fprobe_hlist_node for each address to probe.
+ */
+struct fprobe_hlist {
+   struct hlist_node   hlist;
+   struct rcu_head rcu;
+   struct fprobe   *fp;
+   int size;
+   struct fprobe_hlist_nodearray[];
+};
 
 /**
  * struct fprobe - ftrace based probe.
- * @ops: The ftrace_ops.
+ *
  * @nmissed: The counter for missing events.
  * @flags: The status flag.
- * @rethook: The rethook data structure. (internal data)
  * @entry_data_size: The private data storage size.
- * @nr_maxactive: The max number of active functions.
+ * @nr_maxactive: The max number of active functions. (*deprecated)
  * @entry_handler: The callback function for function entry.
  * @exit_handler: The callback function for function exit.
+ * @hlist_array: The fprobe_hlist for fprobe search from IP hash table.
  */
 struct fprobe {
-#ifdef CONFIG_FUNCTION_TRACER
-   /*
-* If CONFIG_FUNCTION_TRACER is not set, CONFIG_FPROBE is disabled too.
-* But user of fprobe may keep embedding the struct fprobe on their own
-* code. To avoid build error, this will keep the fprobe data structure
-* defined here, but remove ftrace_ops data structure.
-*/
-   struct ftrace_ops   ops;
-#endif
unsigned long   nmissed;
unsigned intflags;
-   struct rethook  *rethook;
size_t  entry_data_size;
int nr_maxactive;
 
@@ -40,6 +64,8 @@ struct fprobe {
void (*exit_handler)(struct fprobe *fp, unsigned long entry_ip,
 unsigned long ret_ip, struct ftrace_regs *fregs,
 void *entry_data);
+
+   struct fprobe_hlist *hlist_array;
 };
 
 /* This fprobe is soft-disabled. */
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 1a2544712690..11a96275b68c 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -296,11 +296,9 @@ config DYNAMIC_FTRACE_WITH_ARGS
 
 config FPROBE
bool "Kernel Function Probe (fprobe)"
-   depends on FUNCTION_TRACER
-   depends on DYNAMIC_FTRACE_WITH_REGS || DYNAMIC_FTRACE_WITH_ARGS
-   depends on HAVE_PT_REGS_TO_FTRACE_REGS_CAST || 
!HAVE_DYNAMIC_FTRACE_WITH_ARGS
-   depends on HAVE_RETHOOK
-   select RETHOOK
+   depends on FUNCTION_GRAPH_TRACER
+   depends on HAVE_FUNCTION_GRAPH_FREGS
+   depends on DYNAMIC_FTRACE_WITH_ARGS || !HAVE_DYNAMIC_FTRACE_WITH_ARGS
default n
help
  This option enables kernel function probe (fprobe) based on ftrace.
diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
index 38fe6a19450b..409427a8af7c 100644
--- a/kernel/trace/fprobe.c
+++ b/kernel/trace/fprobe.c
@@ -8,98 +8,200 @@
 #include 
 #include 
 #include 
-#include 
+#include 
+#include 
 #include 
 #include 
 
 #include "trace.h"
 
-struct fprobe_rethook_node {
-   struct rethook_node node;
-   unsigned long entry_ip;
-   unsigned long entry_parent_ip;
-   char data[];
-};

[RFC PATCH v2 25/31] tracing: Add ftrace_fill_perf_regs() for perf event

2023-11-08 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Add ftrace_fill_perf_regs() which should be compatible with the
perf_fetch_caller_regs(). In other words, the pt_regs returned from the
ftrace_fill_perf_regs() must satisfy 'user_mode(regs) == false' and can be
used for stack tracing.

Signed-off-by: Masami Hiramatsu (Google) 
---
  Changes from previous series: NOTHING, just forward ported.
---
 arch/arm64/include/asm/ftrace.h   |7 +++
 arch/powerpc/include/asm/ftrace.h |7 +++
 arch/s390/include/asm/ftrace.h|5 +
 arch/x86/include/asm/ftrace.h |7 +++
 include/linux/ftrace.h|   31 +++
 5 files changed, 57 insertions(+)

diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
index 5ad24f315d52..5fbe8fe3e8a5 100644
--- a/arch/arm64/include/asm/ftrace.h
+++ b/arch/arm64/include/asm/ftrace.h
@@ -148,6 +148,13 @@ ftrace_partial_regs(const struct ftrace_regs *fregs, 
struct pt_regs *regs)
return regs;
 }
 
+#define arch_ftrace_fill_perf_regs(fregs, _regs) do {  \
+   (_regs)->pc = (fregs)->pc;  \
+   (_regs)->regs[29] = (fregs)->fp;\
+   (_regs)->sp = (fregs)->sp;  \
+   (_regs)->pstate = PSR_MODE_EL1h;\
+   } while (0)
+
 int ftrace_regs_query_register_offset(const char *name);
 
 int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
diff --git a/arch/powerpc/include/asm/ftrace.h 
b/arch/powerpc/include/asm/ftrace.h
index 9e5a39b6a311..53fa39ef476b 100644
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -52,6 +52,13 @@ static __always_inline struct pt_regs 
*arch_ftrace_get_regs(struct ftrace_regs *
return fregs->regs.msr ? >regs : NULL;
 }
 
+#define arch_ftrace_fill_perf_regs(fregs, _regs) do {  \
+   (_regs)->result = 0;\
+   (_regs)->nip = (fregs)->regs.nip;   \
+   (_regs)->gpr[1] = (fregs)->regs.gpr[1]; \
+   asm volatile("mfmsr %0" : "=r" ((_regs)->msr)); \
+   } while (0)
+
 static __always_inline void
 ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs,
unsigned long ip)
diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h
index 5a82b08f03cd..f36ee3510af8 100644
--- a/arch/s390/include/asm/ftrace.h
+++ b/arch/s390/include/asm/ftrace.h
@@ -97,6 +97,11 @@ ftrace_regs_set_instruction_pointer(struct ftrace_regs 
*fregs,
 #define ftrace_regs_query_register_offset(name) \
regs_query_register_offset(name)
 
+#define arch_ftrace_fill_perf_regs(fregs, _regs)do {   \
+   (_regs)->psw.addr = (fregs)->regs.psw.addr; \
+   (_regs)->gprs[15] = (fregs)->regs.gprs[15]; \
+   } while (0)
+
 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
 /*
  * When an ftrace registered caller is tracing a function that is
diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 4701d009c215..0ac397931bf2 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -54,6 +54,13 @@ arch_ftrace_get_regs(struct ftrace_regs *fregs)
return >regs;
 }
 
+#define arch_ftrace_fill_perf_regs(fregs, _regs) do {  \
+   (_regs)->ip = (fregs)->regs.ip; \
+   (_regs)->sp = (fregs)->regs.sp; \
+   (_regs)->cs = __KERNEL_CS;  \
+   (_regs)->flags = 0; \
+   } while (0)
+
 #define ftrace_regs_set_instruction_pointer(fregs, _ip)\
do { (fregs)->regs.ip = (_ip); } while (0)
 
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index c871b195730c..c221c754885c 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -189,6 +189,37 @@ ftrace_partial_regs(struct ftrace_regs *fregs, struct 
pt_regs *regs)
 
 #endif /* !CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS || 
CONFIG_HAVE_PT_REGS_TO_FTRACE_REGS_CAST */
 
+#ifdef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS
+
+/*
+ * Please define arch dependent pt_regs which compatible to the
+ * perf_arch_fetch_caller_regs() but based on ftrace_regs.
+ * This requires
+ *   - user_mode(_regs) returns false (always kernel mode).
+ *   - able to use the _regs for stack trace.
+ */
+#ifndef arch_ftrace_fill_perf_regs
+/* As same as perf_arch_fetch_caller_regs(), do nothing by default */
+#define arch_ftrace_fill_perf_regs(fregs, _regs) do {} while (0)
+#endif
+
+static __always_inline struct pt_regs *
+ftrace_fill_perf_regs(struct ftrace_regs *fregs, struct pt_regs *regs)
+{
+   arch_ftrace_fill_perf_regs(fregs, regs);
+   return regs;
+}
+
+#else /* !CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS */
+
+static __always_inline struct pt_regs *
+ftrace_fill_perf_regs(struct ftrace_regs *fregs, struct pt_regs *regs)
+{
+   

[RFC PATCH v2 24/31] tracing: Add ftrace_partial_regs() for converting ftrace_regs to pt_regs

2023-11-08 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Add ftrace_partial_regs() which converts the ftrace_regs to pt_regs.
If the architecture defines its own ftrace_regs, this copies partial
registers to pt_regs and returns it. If not, ftrace_regs is the same as
pt_regs and ftrace_partial_regs() will return ftrace_regs::regs.

Signed-off-by: Masami Hiramatsu (Google) 
Acked-by: Florent Revest 
---
 Changes from previous series: NOTHING, just forward ported.
---
 arch/arm64/include/asm/ftrace.h |   11 +++
 include/linux/ftrace.h  |   17 +
 2 files changed, 28 insertions(+)

diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h
index ab158196480c..5ad24f315d52 100644
--- a/arch/arm64/include/asm/ftrace.h
+++ b/arch/arm64/include/asm/ftrace.h
@@ -137,6 +137,17 @@ ftrace_override_function_with_return(struct ftrace_regs 
*fregs)
fregs->pc = fregs->lr;
 }
 
+static __always_inline struct pt_regs *
+ftrace_partial_regs(const struct ftrace_regs *fregs, struct pt_regs *regs)
+{
+   memcpy(regs->regs, fregs->regs, sizeof(u64) * 9);
+   regs->sp = fregs->sp;
+   regs->pc = fregs->pc;
+   regs->regs[29] = fregs->fp;
+   regs->regs[30] = fregs->lr;
+   return regs;
+}
+
 int ftrace_regs_query_register_offset(const char *name);
 
 int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index c87e4debf698..c871b195730c 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -172,6 +172,23 @@ static __always_inline struct pt_regs 
*ftrace_get_regs(struct ftrace_regs *fregs
return arch_ftrace_get_regs(fregs);
 }
 
+#if !defined(CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS) || \
+   defined(CONFIG_HAVE_PT_REGS_TO_FTRACE_REGS_CAST)
+
+static __always_inline struct pt_regs *
+ftrace_partial_regs(struct ftrace_regs *fregs, struct pt_regs *regs)
+{
+   /*
+* If CONFIG_HAVE_PT_REGS_TO_FTRACE_REGS_CAST=y, ftrace_regs memory
+* layout is the same as pt_regs. So always returns that address.
+* Since arch_ftrace_get_regs() will check some members and may return
+* NULL, we can not use it.
+*/
+   return >regs;
+}
+
+#endif /* !CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS || 
CONFIG_HAVE_PT_REGS_TO_FTRACE_REGS_CAST */
+
 /*
  * When true, the ftrace_regs_{get,set}_*() functions may be used on fregs.
  * Note: this can be true even when ftrace_get_regs() cannot provide a pt_regs.




[RFC PATCH v2 23/31] fprobe: Use ftrace_regs in fprobe exit handler

2023-11-08 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Change the fprobe exit handler to use ftrace_regs structure instead of
pt_regs. This also introduce HAVE_PT_REGS_TO_FTRACE_REGS_CAST which means
the ftrace_regs's memory layout is equal to the pt_regs so that those are
able to cast. Fprobe introduces a new dependency with that.

Signed-off-by: Masami Hiramatsu (Google) 
---
  Changes from previous series: NOTHING, just forward ported.
---
 arch/loongarch/Kconfig  |1 +
 arch/s390/Kconfig   |1 +
 arch/x86/Kconfig|1 +
 include/linux/fprobe.h  |2 +-
 include/linux/ftrace.h  |5 +
 kernel/trace/Kconfig|8 
 kernel/trace/bpf_trace.c|6 +-
 kernel/trace/fprobe.c   |3 ++-
 kernel/trace/trace_fprobe.c |6 +-
 lib/test_fprobe.c   |6 +++---
 samples/fprobe/fprobe_example.c |2 +-
 11 files changed, 33 insertions(+), 8 deletions(-)

diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index e14396a2ddcb..258e9bee1503 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -108,6 +108,7 @@ config LOONGARCH
select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE
select HAVE_DYNAMIC_FTRACE_WITH_ARGS
+   select HAVE_PT_REGS_TO_FTRACE_REGS_CAST
select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
select HAVE_DYNAMIC_FTRACE_WITH_REGS
select HAVE_EBPF_JIT
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index ae29e4392664..5aedb4320e7c 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -167,6 +167,7 @@ config S390
select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE
select HAVE_DYNAMIC_FTRACE_WITH_ARGS
+   select HAVE_PT_REGS_TO_FTRACE_REGS_CAST
select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
select HAVE_DYNAMIC_FTRACE_WITH_REGS
select HAVE_EBPF_JIT if HAVE_MARCH_Z196_FEATURES
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 4b4c2f9d67da..e11ba9a33afe 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -209,6 +209,7 @@ config X86
select HAVE_DYNAMIC_FTRACE
select HAVE_DYNAMIC_FTRACE_WITH_REGS
select HAVE_DYNAMIC_FTRACE_WITH_ARGSif X86_64
+   select HAVE_PT_REGS_TO_FTRACE_REGS_CAST if X86_64
select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
select HAVE_SAMPLE_FTRACE_DIRECTif X86_64
select HAVE_SAMPLE_FTRACE_DIRECT_MULTI  if X86_64
diff --git a/include/linux/fprobe.h b/include/linux/fprobe.h
index 36c0595f7b93..879a30956009 100644
--- a/include/linux/fprobe.h
+++ b/include/linux/fprobe.h
@@ -38,7 +38,7 @@ struct fprobe {
 unsigned long ret_ip, struct ftrace_regs *regs,
 void *entry_data);
void (*exit_handler)(struct fprobe *fp, unsigned long entry_ip,
-unsigned long ret_ip, struct pt_regs *regs,
+unsigned long ret_ip, struct ftrace_regs *fregs,
 void *entry_data);
 };
 
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 8efccd911d41..c87e4debf698 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -158,6 +158,11 @@ struct ftrace_regs {
 #define ftrace_regs_set_instruction_pointer(fregs, ip) do { } while (0)
 #endif /* CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS */
 
+#ifdef CONFIG_HAVE_PT_REGS_TO_FTRACE_REGS_CAST
+
+static_assert(sizeof(struct pt_regs) == sizeof(struct ftrace_regs));
+
+#endif /* CONFIG_HAVE_PT_REGS_TO_FTRACE_REGS_CAST */
 
 static __always_inline struct pt_regs *ftrace_get_regs(struct ftrace_regs 
*fregs)
 {
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 805d72ab77c6..1a2544712690 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -60,6 +60,13 @@ config HAVE_DYNAMIC_FTRACE_WITH_ARGS
 This allows for use of ftrace_regs_get_argument() and
 ftrace_regs_get_stack_pointer().
 
+config HAVE_PT_REGS_TO_FTRACE_REGS_CAST
+   bool
+   help
+If this is set, the memory layout of the ftrace_regs data structure
+is the same as the pt_regs. So the pt_regs is possible to be casted
+to ftrace_regs.
+
 config HAVE_DYNAMIC_FTRACE_NO_PATCHABLE
bool
help
@@ -291,6 +298,7 @@ config FPROBE
bool "Kernel Function Probe (fprobe)"
depends on FUNCTION_TRACER
depends on DYNAMIC_FTRACE_WITH_REGS || DYNAMIC_FTRACE_WITH_ARGS
+   depends on HAVE_PT_REGS_TO_FTRACE_REGS_CAST || 
!HAVE_DYNAMIC_FTRACE_WITH_ARGS
depends on HAVE_RETHOOK
select RETHOOK
default n
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index b0b98f0a0e52..a4b5e34b0419 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -2745,10 +2745,14 @@ kprobe_multi_link_handler(struct fprobe *fp, unsigned 
long fentry_ip,
 
 static void
 kprobe_multi_link_exit_handler(struct fprobe *fp, unsigned long fentry_ip,
- 

[RFC PATCH v2 22/31] fprobe: Use ftrace_regs in fprobe entry handler

2023-11-08 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

This allows fprobes to be available with CONFIG_DYNAMIC_FTRACE_WITH_ARGS
instead of CONFIG_DYNAMIC_FTRACE_WITH_REGS, then we can enable fprobe
on arm64.

Signed-off-by: Masami Hiramatsu (Google) 
Acked-by: Florent Revest 
---
 Changes from previous series: NOTHING, just forward ported.
---
 include/linux/fprobe.h  |2 +-
 kernel/trace/Kconfig|3 ++-
 kernel/trace/bpf_trace.c|   10 +++---
 kernel/trace/fprobe.c   |4 ++--
 kernel/trace/trace_fprobe.c |6 +-
 lib/test_fprobe.c   |4 ++--
 samples/fprobe/fprobe_example.c |2 +-
 7 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/include/linux/fprobe.h b/include/linux/fprobe.h
index 3e03758151f4..36c0595f7b93 100644
--- a/include/linux/fprobe.h
+++ b/include/linux/fprobe.h
@@ -35,7 +35,7 @@ struct fprobe {
int nr_maxactive;
 
int (*entry_handler)(struct fprobe *fp, unsigned long entry_ip,
-unsigned long ret_ip, struct pt_regs *regs,
+unsigned long ret_ip, struct ftrace_regs *regs,
 void *entry_data);
void (*exit_handler)(struct fprobe *fp, unsigned long entry_ip,
 unsigned long ret_ip, struct pt_regs *regs,
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 308b3bec01b1..805d72ab77c6 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -290,7 +290,7 @@ config DYNAMIC_FTRACE_WITH_ARGS
 config FPROBE
bool "Kernel Function Probe (fprobe)"
depends on FUNCTION_TRACER
-   depends on DYNAMIC_FTRACE_WITH_REGS
+   depends on DYNAMIC_FTRACE_WITH_REGS || DYNAMIC_FTRACE_WITH_ARGS
depends on HAVE_RETHOOK
select RETHOOK
default n
@@ -675,6 +675,7 @@ config FPROBE_EVENTS
select TRACING
select PROBE_EVENTS
select DYNAMIC_EVENTS
+   depends on DYNAMIC_FTRACE_WITH_REGS
default y
help
  This allows user to add tracing events on the function entry and
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 868008f56fec..b0b98f0a0e52 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -2501,7 +2501,7 @@ static int __init bpf_event_init(void)
 fs_initcall(bpf_event_init);
 #endif /* CONFIG_MODULES */
 
-#ifdef CONFIG_FPROBE
+#if defined(CONFIG_FPROBE) && defined(CONFIG_DYNAMIC_FTRACE_WITH_REGS)
 struct bpf_kprobe_multi_link {
struct bpf_link link;
struct fprobe fp;
@@ -2729,10 +2729,14 @@ kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link 
*link,
 
 static int
 kprobe_multi_link_handler(struct fprobe *fp, unsigned long fentry_ip,
- unsigned long ret_ip, struct pt_regs *regs,
+ unsigned long ret_ip, struct ftrace_regs *fregs,
  void *data)
 {
struct bpf_kprobe_multi_link *link;
+   struct pt_regs *regs = ftrace_get_regs(fregs);
+
+   if (!regs)
+   return 0;
 
link = container_of(fp, struct bpf_kprobe_multi_link, fp);
kprobe_multi_link_prog_run(link, get_entry_ip(fentry_ip), regs);
@@ -3004,7 +3008,7 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr 
*attr, struct bpf_prog *pr
kvfree(cookies);
return err;
 }
-#else /* !CONFIG_FPROBE */
+#else /* !CONFIG_FPROBE || !CONFIG_DYNAMIC_FTRACE_WITH_REGS */
 int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog 
*prog)
 {
return -EOPNOTSUPP;
diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c
index 881f90f0cbcf..df323cb7bed1 100644
--- a/kernel/trace/fprobe.c
+++ b/kernel/trace/fprobe.c
@@ -46,7 +46,7 @@ static inline void __fprobe_handler(unsigned long ip, 
unsigned long parent_ip,
}
 
if (fp->entry_handler)
-   ret = fp->entry_handler(fp, ip, parent_ip, 
ftrace_get_regs(fregs), entry_data);
+   ret = fp->entry_handler(fp, ip, parent_ip, fregs, entry_data);
 
/* If entry_handler returns !0, nmissed is not counted. */
if (rh) {
@@ -182,7 +182,7 @@ static void fprobe_init(struct fprobe *fp)
fp->ops.func = fprobe_kprobe_handler;
else
fp->ops.func = fprobe_handler;
-   fp->ops.flags |= FTRACE_OPS_FL_SAVE_REGS;
+   fp->ops.flags |= FTRACE_OPS_FL_SAVE_ARGS;
 }
 
 static int fprobe_init_rethook(struct fprobe *fp, int num)
diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c
index 8bfe23af9c73..71bf38d698f1 100644
--- a/kernel/trace/trace_fprobe.c
+++ b/kernel/trace/trace_fprobe.c
@@ -320,12 +320,16 @@ NOKPROBE_SYMBOL(fexit_perf_func);
 #endif /* CONFIG_PERF_EVENTS */
 
 static int fentry_dispatcher(struct fprobe *fp, unsigned long entry_ip,
-unsigned long ret_ip, struct pt_regs *regs,
+unsigned long ret_ip, struct ftrace_regs *fregs,
 

[RFC PATCH v2 21/31] x86/ftrace: Enable HAVE_FUNCTION_GRAPH_FREGS

2023-11-08 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Support HAVE_FUNCTION_GRAPH_FREGS on x86-64, which saves ftrace_regs
on the stack in ftrace_graph return trampoline so that the callbacks
can access registers via ftrace_regs APIs.

Note that this only recovers 'rax' and 'rdx' registers because other
registers are not used anymore and recovered by caller. 'rax' and
'rdx' will be used for passing the return value.

Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v2:
  - Save rsp register and drop clearing orig_ax.
---
 arch/x86/Kconfig|3 ++-
 arch/x86/kernel/ftrace_64.S |   36 
 2 files changed, 30 insertions(+), 9 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 66bfabae8814..4b4c2f9d67da 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -219,7 +219,8 @@ config X86
select HAVE_FAST_GUP
select HAVE_FENTRY  if X86_64 || DYNAMIC_FTRACE
select HAVE_FTRACE_MCOUNT_RECORD
-   select HAVE_FUNCTION_GRAPH_RETVAL   if HAVE_FUNCTION_GRAPH_TRACER
+   select HAVE_FUNCTION_GRAPH_FREGSif HAVE_DYNAMIC_FTRACE_WITH_ARGS
+   select HAVE_FUNCTION_GRAPH_RETVAL   if 
!HAVE_DYNAMIC_FTRACE_WITH_ARGS
select HAVE_FUNCTION_GRAPH_TRACER   if X86_32 || (X86_64 && 
DYNAMIC_FTRACE)
select HAVE_FUNCTION_TRACER
select HAVE_GCC_PLUGINS
diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S
index 945cfa5f7239..029b0e3a0206 100644
--- a/arch/x86/kernel/ftrace_64.S
+++ b/arch/x86/kernel/ftrace_64.S
@@ -348,21 +348,41 @@ STACK_FRAME_NON_STANDARD_FP(__fentry__)
 SYM_CODE_START(return_to_handler)
UNWIND_HINT_UNDEFINED
ANNOTATE_NOENDBR
-   subq  $24, %rsp
+   /*
+* Save the registers requires for ftrace_regs;
+* rax, rcx, rdx, rdi, rsi, r8, r9 and rbp
+*/
+   subq $(FRAME_SIZE), %rsp
+   movq %rax, RAX(%rsp)
+   movq %rcx, RCX(%rsp)
+   movq %rdx, RDX(%rsp)
+   movq %rsi, RSI(%rsp)
+   movq %rdi, RDI(%rsp)
+   movq %r8, R8(%rsp)
+   movq %r9, R9(%rsp)
+   movq %rbp, RBP(%rsp)
+   /*
+* orig_ax is not cleared because it is used for indicating the direct
+* trampoline in the fentry.
+*/
+
+   leaq FRAME_SIZE(%rsp), %rcx
+   movq %rcx, RSP(%rsp)
 
-   /* Save the return values */
-   movq %rax, (%rsp)
-   movq %rdx, 8(%rsp)
-   movq %rbp, 16(%rsp)
movq %rsp, %rdi
 
call ftrace_return_to_handler
 
movq %rax, %rdi
-   movq 8(%rsp), %rdx
-   movq (%rsp), %rax
 
-   addq $24, %rsp
+   /*
+* Restore only rax and rdx because other registers are not used
+* for return value nor callee saved. Caller will reuse/recover it.
+*/
+   movq RDX(%rsp), %rdx
+   movq RAX(%rsp), %rax
+
+   addq $(FRAME_SIZE), %rsp
/*
 * Jump back to the old return address. This cannot be JMP_NOSPEC rdi
 * since IBT would demand that contain ENDBR, which simply isn't so for




[RFC PATCH v2 20/31] function_graph: Add a new exit handler with parent_ip and ftrace_regs

2023-11-08 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Add a new return handler to fgraph_ops as 'retregfunc'  which takes
parent_ip and ftrace_regs instead of ftrace_graph_ret. This handler
is available only if the arch support CONFIG_HAVE_FUNCTION_GRAPH_FREGS.
Note that the 'retfunc' and 'reregfunc' are mutual exclusive.
You can set only one of them.

Signed-off-by: Masami Hiramatsu (Google) 
---
 arch/x86/include/asm/ftrace.h |2 +
 include/linux/ftrace.h|   10 +-
 kernel/trace/Kconfig  |5 ++-
 kernel/trace/fgraph.c |   74 +++--
 4 files changed, 63 insertions(+), 28 deletions(-)

diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index fc60974a1d89..4701d009c215 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -72,6 +72,8 @@ arch_ftrace_get_regs(struct ftrace_regs *fregs)
override_function_with_return(&(fregs)->regs)
 #define ftrace_regs_query_register_offset(name) \
regs_query_register_offset(name)
+#define ftrace_regs_get_frame_pointer(fregs) \
+   frame_pointer(&(fregs)->regs)
 
 struct ftrace_ops;
 #define ftrace_graph_func ftrace_graph_func
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index c91b234949d5..8efccd911d41 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -43,7 +43,9 @@ struct dyn_ftrace;
 
 char *arch_ftrace_match_adjust(char *str, const char *search);
 
-#ifdef CONFIG_HAVE_FUNCTION_GRAPH_RETVAL
+#ifdef CONFIG_HAVE_FUNCTION_GRAPH_FREGS
+unsigned long ftrace_return_to_handler(struct ftrace_regs *fregs);
+#elif defined(CONFIG_HAVE_FUNCTION_GRAPH_RETVAL)
 struct fgraph_ret_regs;
 unsigned long ftrace_return_to_handler(struct fgraph_ret_regs *ret_regs);
 #else
@@ -156,6 +158,7 @@ struct ftrace_regs {
 #define ftrace_regs_set_instruction_pointer(fregs, ip) do { } while (0)
 #endif /* CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS */
 
+
 static __always_inline struct pt_regs *ftrace_get_regs(struct ftrace_regs 
*fregs)
 {
if (!fregs)
@@ -1066,6 +1069,10 @@ typedef int (*trace_func_graph_regs_ent_t)(unsigned long 
func,
   unsigned long parent_ip,
   struct ftrace_regs *fregs,
   struct fgraph_ops *); /* entry w/ 
regs */
+typedef void (*trace_func_graph_regs_ret_t)(unsigned long func,
+   unsigned long parent_ip,
+   struct ftrace_regs *,
+   struct fgraph_ops *); /* return w/ 
regs */
 
 extern int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace, struct 
fgraph_ops *gops);
 
@@ -1075,6 +1082,7 @@ struct fgraph_ops {
trace_func_graph_ent_t  entryfunc;
trace_func_graph_ret_t  retfunc;
trace_func_graph_regs_ent_t entryregfunc;
+   trace_func_graph_regs_ret_t retregfunc;
struct ftrace_ops   ops; /* for the hash lists */
void*private;
int idx;
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 61c541c36596..308b3bec01b1 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -34,6 +34,9 @@ config HAVE_FUNCTION_GRAPH_TRACER
 config HAVE_FUNCTION_GRAPH_RETVAL
bool
 
+config HAVE_FUNCTION_GRAPH_FREGS
+   bool
+
 config HAVE_DYNAMIC_FTRACE
bool
help
@@ -232,7 +235,7 @@ config FUNCTION_GRAPH_TRACER
 
 config FUNCTION_GRAPH_RETVAL
bool "Kernel Function Graph Return Value"
-   depends on HAVE_FUNCTION_GRAPH_RETVAL
+   depends on HAVE_FUNCTION_GRAPH_RETVAL || HAVE_FUNCTION_GRAPH_FREGS
depends on FUNCTION_GRAPH_TRACER
default n
help
diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index 6567b18c6c54..cf240914ef9b 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -623,8 +623,7 @@ int function_graph_enter_regs(unsigned long ret, unsigned 
long func,
 
 /* Retrieve a function return address to the trace stack on thread info.*/
 static struct ftrace_ret_stack *
-ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret,
-   unsigned long frame_pointer)
+ftrace_pop_return_trace(unsigned long *ret, unsigned long frame_pointer)
 {
struct ftrace_ret_stack *ret_stack;
int index;
@@ -669,10 +668,6 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, 
unsigned long *ret,
 #endif
 
*ret = ret_stack->ret;
-   trace->func = ret_stack->func;
-   trace->calltime = ret_stack->calltime;
-   trace->overrun = atomic_read(>trace_overrun);
-   trace->depth = current->curr_ret_depth;
/*
 * We still want to trace interrupts coming in if
 * max_depth is set to 1. Make sure the decrement is
@@ -711,22 +706,43 @@ static struct notifier_block ftrace_suspend_notifier = {
 /* 

[RFC PATCH v2 19/31] function_graph: Add a new entry handler with parent_ip and ftrace_regs

2023-11-08 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Add a new entry handler to fgraph_ops as 'entryregfunc'  which takes
parent_ip and ftrace_regs. Note that the 'entryfunc' and 'entryregfunc'
are mutual exclusive. You can set only one of them.

Signed-off-by: Masami Hiramatsu (Google) 
---
 arch/arm64/kernel/ftrace.c   |8 -
 arch/loongarch/kernel/ftrace_dyn.c   |6 +++-
 arch/powerpc/kernel/trace/ftrace.c   |2 +
 arch/powerpc/kernel/trace/ftrace_64_pg.c |   10 --
 arch/x86/kernel/ftrace.c |   50 --
 include/linux/ftrace.h   |   17 +-
 kernel/trace/fgraph.c|   27 +---
 7 files changed, 90 insertions(+), 30 deletions(-)

diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c
index a650f5e11fc5..e94d9dc13d89 100644
--- a/arch/arm64/kernel/ftrace.c
+++ b/arch/arm64/kernel/ftrace.c
@@ -481,7 +481,13 @@ void prepare_ftrace_return(unsigned long self_addr, 
unsigned long *parent,
 void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
   struct ftrace_ops *op, struct ftrace_regs *fregs)
 {
-   prepare_ftrace_return(ip, >lr, fregs->fp);
+   if (unlikely(atomic_read(>tracing_graph_pause)))
+   return;
+
+   if (!function_graph_enter_regs(fregs->lr, ip, fregs->fp,
+  (void *)fregs->fp, fregs)) {
+   fregs->lr = (unsigned long)_to_handler;
+   }
 }
 #else
 /*
diff --git a/arch/loongarch/kernel/ftrace_dyn.c 
b/arch/loongarch/kernel/ftrace_dyn.c
index 73858c9029cc..39b3f09a5e0c 100644
--- a/arch/loongarch/kernel/ftrace_dyn.c
+++ b/arch/loongarch/kernel/ftrace_dyn.c
@@ -244,7 +244,11 @@ void ftrace_graph_func(unsigned long ip, unsigned long 
parent_ip,
struct pt_regs *regs = >regs;
unsigned long *parent = (unsigned long *)>regs[1];
 
-   prepare_ftrace_return(ip, (unsigned long *)parent);
+   if (unlikely(atomic_read(>tracing_graph_pause)))
+   return;
+
+   if (!function_graph_enter_regs(regs->regs[1], ip, 0, parent, fregs))
+   regs->regs[1] = (unsigned long)_to_handler;
 }
 #else
 static int ftrace_modify_graph_caller(bool enable)
diff --git a/arch/powerpc/kernel/trace/ftrace.c 
b/arch/powerpc/kernel/trace/ftrace.c
index 82010629cf88..9bf1b6912116 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -422,7 +422,7 @@ void ftrace_graph_func(unsigned long ip, unsigned long 
parent_ip,
if (bit < 0)
goto out;
 
-   if (!function_graph_enter(parent_ip, ip, 0, (unsigned long *)sp))
+   if (!function_graph_enter_regs(parent_ip, ip, 0, (unsigned long *)sp, 
fregs))
parent_ip = ppc_function_entry(return_to_handler);
 
ftrace_test_recursion_unlock(bit);
diff --git a/arch/powerpc/kernel/trace/ftrace_64_pg.c 
b/arch/powerpc/kernel/trace/ftrace_64_pg.c
index 7b85c3b460a3..43f6cfaaf7db 100644
--- a/arch/powerpc/kernel/trace/ftrace_64_pg.c
+++ b/arch/powerpc/kernel/trace/ftrace_64_pg.c
@@ -795,7 +795,8 @@ int ftrace_disable_ftrace_graph_caller(void)
  * in current thread info. Return the address we want to divert to.
  */
 static unsigned long
-__prepare_ftrace_return(unsigned long parent, unsigned long ip, unsigned long 
sp)
+__prepare_ftrace_return(unsigned long parent, unsigned long ip, unsigned long 
sp,
+   struct ftrace_regs *fregs)
 {
unsigned long return_hooker;
int bit;
@@ -812,7 +813,7 @@ __prepare_ftrace_return(unsigned long parent, unsigned long 
ip, unsigned long sp
 
return_hooker = ppc_function_entry(return_to_handler);
 
-   if (!function_graph_enter(parent, ip, 0, (unsigned long *)sp))
+   if (!function_graph_enter_regs(parent, ip, 0, (unsigned long *)sp, 
fregs))
parent = return_hooker;
 
ftrace_test_recursion_unlock(bit);
@@ -824,13 +825,14 @@ __prepare_ftrace_return(unsigned long parent, unsigned 
long ip, unsigned long sp
 void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
   struct ftrace_ops *op, struct ftrace_regs *fregs)
 {
-   fregs->regs.link = __prepare_ftrace_return(parent_ip, ip, 
fregs->regs.gpr[1]);
+   fregs->regs.link = __prepare_ftrace_return(parent_ip, ip,
+  fregs->regs.gpr[1], fregs);
 }
 #else
 unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip,
unsigned long sp)
 {
-   return __prepare_ftrace_return(parent, ip, sp);
+   return __prepare_ftrace_return(parent, ip, sp, NULL);
 }
 #endif
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 12df54ff0e81..85247a8f265b 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -614,16 +614,8 @@ int ftrace_disable_ftrace_graph_caller(void)
 }
 #endif /* 

[RFC PATCH v2 18/31] function_graph: Add selftest for passing local variables

2023-11-08 Thread Masami Hiramatsu (Google)
From: Steven Rostedt (VMware) 

Add boot up selftest that passes variables from a function entry to a
function exit, and make sure that they do get passed around.

Signed-off-by: Steven Rostedt (VMware) 
Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v2:
  - Add reserved size test.
  - Use pr_*() instead of printk(KERN_*).
---
 kernel/trace/trace_selftest.c |  169 +
 1 file changed, 169 insertions(+)

diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index f0758afa2f7d..601e9d475546 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -756,6 +756,173 @@ trace_selftest_startup_function(struct tracer *trace, 
struct trace_array *tr)
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 
+#ifdef CONFIG_DYNAMIC_FTRACE
+
+#define BYTE_NUMBER 123
+#define SHORT_NUMBER 12345
+#define WORD_NUMBER 1234567890
+#define LONG_NUMBER 1234567890123456789LL
+
+static int fgraph_store_size __initdata;
+static const char *fgraph_store_type_name __initdata;
+static char *fgraph_error_str __initdata;
+static char fgraph_error_str_buf[128] __initdata;
+
+static __init int store_entry(struct ftrace_graph_ent *trace,
+ struct fgraph_ops *gops)
+{
+   const char *type = fgraph_store_type_name;
+   int size = fgraph_store_size;
+   void *p;
+
+   p = fgraph_reserve_data(size);
+   if (!p) {
+   snprintf(fgraph_error_str_buf, sizeof(fgraph_error_str_buf),
+"Failed to reserve %s\n", type);
+   fgraph_error_str = fgraph_error_str_buf;
+   return 0;
+   }
+
+   switch (fgraph_store_size) {
+   case 1:
+   *(char *)p = BYTE_NUMBER;
+   break;
+   case 2:
+   *(short *)p = SHORT_NUMBER;
+   break;
+   case 4:
+   *(int *)p = WORD_NUMBER;
+   break;
+   case 8:
+   *(long long *)p = LONG_NUMBER;
+   break;
+   }
+
+   return 1;
+}
+
+static __init void store_return(struct ftrace_graph_ret *trace,
+   struct fgraph_ops *gops)
+{
+   const char *type = fgraph_store_type_name;
+   long long expect = 0;
+   long long found = -1;
+   int size;
+   char *p;
+
+   p = fgraph_retrieve_data();
+   if (!p) {
+   snprintf(fgraph_error_str_buf, sizeof(fgraph_error_str_buf),
+"Failed to retrieve %s\n", type);
+   fgraph_error_str = fgraph_error_str_buf;
+   return;
+   }
+   if (fgraph_store_size > size) {
+   snprintf(fgraph_error_str_buf, sizeof(fgraph_error_str_buf),
+"Retrieved size %d is smaller than expected %d\n",
+size, (int)fgraph_store_size);
+   fgraph_error_str = fgraph_error_str_buf;
+   return;
+   }
+
+   switch (fgraph_store_size) {
+   case 1:
+   expect = BYTE_NUMBER;
+   found = *(char *)p;
+   break;
+   case 2:
+   expect = SHORT_NUMBER;
+   found = *(short *)p;
+   break;
+   case 4:
+   expect = WORD_NUMBER;
+   found = *(int *)p;
+   break;
+   case 8:
+   expect = LONG_NUMBER;
+   found = *(long long *)p;
+   break;
+   }
+
+   if (found != expect) {
+   snprintf(fgraph_error_str_buf, sizeof(fgraph_error_str_buf),
+"%s returned not %lld but %lld\n", type, expect, 
found);
+   fgraph_error_str = fgraph_error_str_buf;
+   return;
+   }
+   fgraph_error_str = NULL;
+}
+
+static struct fgraph_ops store_bytes __initdata = {
+   .entryfunc  = store_entry,
+   .retfunc= store_return,
+};
+
+static int __init test_graph_storage_type(const char *name, int size)
+{
+   char *func_name;
+   int len;
+   int ret;
+
+   fgraph_store_type_name = name;
+   fgraph_store_size = size;
+
+   snprintf(fgraph_error_str_buf, sizeof(fgraph_error_str_buf),
+"Failed to execute storage %s\n", name);
+   fgraph_error_str = fgraph_error_str_buf;
+
+   pr_cont("PASSED\n");
+   pr_info("Testing fgraph storage of %d byte%s: ", size, size > 1 ? "s" : 
"");
+
+   func_name = "*" __stringify(DYN_FTRACE_TEST_NAME);
+   len = strlen(func_name);
+
+   ret = ftrace_set_filter(_bytes.ops, func_name, len, 1);
+   if (ret && ret != -ENODEV) {
+   pr_cont("*Could not set filter* ");
+   return -1;
+   }
+
+   ret = register_ftrace_graph(_bytes);
+   if (ret) {
+   pr_warn("Failed to init store_bytes fgraph tracing\n");
+   return -1;
+   }
+
+   DYN_FTRACE_TEST_NAME();
+
+   unregister_ftrace_graph(_bytes);
+
+   if 

[RFC PATCH v2 17/31] function_graph: Implement fgraph_reserve_data() and fgraph_retrieve_data()

2023-11-08 Thread Masami Hiramatsu (Google)
From: Steven Rostedt (VMware) 

Added functions that can be called by a fgraph_ops entryfunc and retfunc to
store state between the entry of the function being traced to the exit of
the same function. The fgraph_ops entryfunc() may call
fgraph_reserve_data() to store up to 32 words onto the task's shadow
ret_stack and this then can be retrieved by fgraph_retrieve_data() called
by the corresponding retfunc().

Signed-off-by: Steven Rostedt (VMware) 
Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v2:
  - Retrieve the reserved size by fgraph_retrieve_data().
  - Expand the maximum data size to 32 words.
  - Update stack index with __get_index(val) if FGRAPH_TYPE_ARRAY entry.
  - fix typos and make description lines shorter than 76 chars.
---
 include/linux/ftrace.h |3 +
 kernel/trace/fgraph.c  |  248 +---
 2 files changed, 217 insertions(+), 34 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 3f9f1f48e8fd..3bc01329548b 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -1074,6 +1074,9 @@ struct fgraph_ops {
int idx;
 };
 
+void *fgraph_reserve_data(int size_bytes);
+void *fgraph_retrieve_data(int *size_bytes);
+
 /*
  * Stack of return addresses for functions
  * of a thread.
diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index 79bdd3c775dd..4d8664942335 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -38,25 +38,36 @@
  * bits: 14 - 15   Type of storage
  *   0 - reserved
  *   1 - fgraph_array index
+ *   2 - reservered data
  * For fgraph_array_index:
  *  bits: 16 - 23  The fgraph_ops fgraph_array index
  *
+ * For reserved data:
+ *  bits: 16 - 17  The size in words that is stored
+ *
  * That is, at the end of function_graph_enter, if the first and forth
  * fgraph_ops on the fgraph_array[] (index 0 and 3) needs their retfunc called
- * on the return of the function being traced, this is what will be on the
- * task's shadow ret_stack: (the stack grows upward)
+ * on the return of the function being traced, and the forth fgraph_ops
+ * stored two words of data, this is what will be on the task's shadow
+ * ret_stack: (the stack grows upward)
+ *
+ * | | <- task->curr_ret_stack
+ * +-+
+ * | (3 << FGRAPH_ARRAY_SHIFT)|type:1|(5)| ( 3 for index of fourth fgraph_ops)
+ * +-+
+ * | (3 << FGRAPH_DATA_SHIFT)|type:2|(4) | ( Data with size of 2 words)
+ * +-+ ( It is 4 words from the ret_stack)
+ * | STORED DATA WORD 2  |
+ * | STORED DATA WORD 1  |
+ * +-+
+ * | (0 << FGRAPH_ARRAY_SHIFT)|type:1|(1)| ( 0 for index of first fgraph_ops)
+ * +-+
+ * | struct ftrace_ret_stack |
+ * |   (stores the saved ret pointer)|
+ * +-+
+ * | (X) | (N)   | ( N words away from last ret_stack)
+ * | |
  *
- * |  | <- task->curr_ret_stack
- * +--+
- * | (3 << FGRAPH_ARRAY_SHIFT)|(2)| ( 3 for index of fourth fgraph_ops)
- * +--+
- * | (0 << FGRAPH_ARRAY_SHIFT)|(1)| ( 0 for index of first fgraph_ops)
- * +--+
- * | struct ftrace_ret_stack  |
- * |   (stores the saved ret pointer) |
- * +--+
- * | (X) | (N)| ( N words away from previous ret_stack)
- * |  |
  *
  * If a backtrace is required, and the real return pointer needs to be
  * fetched, then it looks at the task's curr_ret_stack index, if it
@@ -77,12 +88,17 @@
 enum {
FGRAPH_TYPE_RESERVED= 0,
FGRAPH_TYPE_ARRAY   = 1,
+   FGRAPH_TYPE_DATA= 2,
 };
 
 #define FGRAPH_ARRAY_SIZE  16
 #define FGRAPH_ARRAY_MASK  ((1 << FGRAPH_ARRAY_SIZE) - 1)
 #define FGRAPH_ARRAY_SHIFT (FGRAPH_TYPE_SHIFT + FGRAPH_TYPE_SIZE)
 
+#define FGRAPH_DATA_SIZE   5
+#define FGRAPH_DATA_MASK   ((1 << FGRAPH_DATA_SIZE) - 1)
+#define FGRAPH_DATA_SHIFT  (FGRAPH_TYPE_SHIFT + FGRAPH_TYPE_SIZE)
+
 /* Currently the max stack index can't be more than register callers */
 #define FGRAPH_MAX_INDEX   FGRAPH_ARRAY_SIZE
 
@@ -97,6 +113,8 @@ enum {
 
 #define RET_STACK(t, index) ((struct ftrace_ret_stack 
*)(&(t)->ret_stack[index]))
 
+#define FGRAPH_MAX_DATA_SIZE (sizeof(long) * (1 << FGRAPH_DATA_SIZE))
+
 /*
  * Each fgraph_ops has a reservered unsigned long at the end (top) of the
  * ret_stack to store task specific state.
@@ -111,21 +129,44 @@ static int fgraph_array_cnt;
 
 static struct fgraph_ops *fgraph_array[FGRAPH_ARRAY_SIZE];
 

[RFC PATCH v2 16/31] function_graph: Move graph notrace bit to shadow stack global var

2023-11-08 Thread Masami Hiramatsu (Google)
From: Steven Rostedt (VMware) 

The use of the task->trace_recursion for the logic used for the function
graph no-trace was a bit of an abuse of that variable. Now that there
exists global vars that are per stack for registered graph traces, use
that instead.

Signed-off-by: Steven Rostedt (VMware) 
Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v2:
  - Make description lines shorter than 76 chars.
---
 include/linux/trace_recursion.h  |7 ---
 kernel/trace/trace.h |9 +
 kernel/trace/trace_functions_graph.c |   10 ++
 3 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h
index 00e792bf148d..cc11b0e9d220 100644
--- a/include/linux/trace_recursion.h
+++ b/include/linux/trace_recursion.h
@@ -44,13 +44,6 @@ enum {
  */
TRACE_IRQ_BIT,
 
-   /*
-* To implement set_graph_notrace, if this bit is set, we ignore
-* function graph tracing of called functions, until the return
-* function is called to clear it.
-*/
-   TRACE_GRAPH_NOTRACE_BIT,
-
/* Used to prevent recursion recording from recursing. */
TRACE_RECORD_RECURSION_BIT,
 };
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index cbe44998ef77..27b2b52c36cc 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -910,8 +910,17 @@ enum {
 
TRACE_GRAPH_DEPTH_START_BIT,
TRACE_GRAPH_DEPTH_END_BIT,
+
+   /*
+* To implement set_graph_notrace, if this bit is set, we ignore
+* function graph tracing of called functions, until the return
+* function is called to clear it.
+*/
+   TRACE_GRAPH_NOTRACE_BIT,
 };
 
+#define TRACE_GRAPH_NOTRACE(1 << TRACE_GRAPH_NOTRACE_BIT)
+
 static inline unsigned long ftrace_graph_depth(unsigned long *task_var)
 {
return (*task_var >> TRACE_GRAPH_DEPTH_START_BIT) & 3;
diff --git a/kernel/trace/trace_functions_graph.c 
b/kernel/trace/trace_functions_graph.c
index 66cce73e94f8..13d0387ac6a6 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -130,6 +130,7 @@ static inline int ftrace_graph_ignore_irqs(void)
 int trace_graph_entry(struct ftrace_graph_ent *trace,
  struct fgraph_ops *gops)
 {
+   unsigned long *task_var = fgraph_get_task_var(gops);
struct trace_array *tr = gops->private;
struct trace_array_cpu *data;
unsigned long flags;
@@ -138,7 +139,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace,
int ret;
int cpu;
 
-   if (trace_recursion_test(TRACE_GRAPH_NOTRACE_BIT))
+   if (*task_var & TRACE_GRAPH_NOTRACE)
return 0;
 
/*
@@ -149,7 +150,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace,
 * returning from the function.
 */
if (ftrace_graph_notrace_addr(trace->func)) {
-   trace_recursion_set(TRACE_GRAPH_NOTRACE_BIT);
+   *task_var |= TRACE_GRAPH_NOTRACE_BIT;
/*
 * Need to return 1 to have the return called
 * that will clear the NOTRACE bit.
@@ -240,6 +241,7 @@ void __trace_graph_return(struct trace_array *tr,
 void trace_graph_return(struct ftrace_graph_ret *trace,
struct fgraph_ops *gops)
 {
+   unsigned long *task_var = fgraph_get_task_var(gops);
struct trace_array *tr = gops->private;
struct trace_array_cpu *data;
unsigned long flags;
@@ -249,8 +251,8 @@ void trace_graph_return(struct ftrace_graph_ret *trace,
 
ftrace_graph_addr_finish(gops, trace);
 
-   if (trace_recursion_test(TRACE_GRAPH_NOTRACE_BIT)) {
-   trace_recursion_clear(TRACE_GRAPH_NOTRACE_BIT);
+   if (*task_var & TRACE_GRAPH_NOTRACE) {
+   *task_var &= ~TRACE_GRAPH_NOTRACE;
return;
}
 




[RFC PATCH v2 15/31] function_graph: Move graph depth stored data to shadow stack global var

2023-11-08 Thread Masami Hiramatsu (Google)
From: Steven Rostedt (VMware) 

The use of the task->trace_recursion for the logic used for the function
graph depth was a bit of an abuse of that variable. Now that there
exists global vars that are per stack for registered graph traces, use that
instead.

Signed-off-by: Steven Rostedt (VMware) 
Signed-off-by: Masami Hiramatsu (Google) 
---
 include/linux/trace_recursion.h |   29 -
 kernel/trace/trace.h|   34 --
 2 files changed, 32 insertions(+), 31 deletions(-)

diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h
index 2efd5ec46d7f..00e792bf148d 100644
--- a/include/linux/trace_recursion.h
+++ b/include/linux/trace_recursion.h
@@ -44,25 +44,6 @@ enum {
  */
TRACE_IRQ_BIT,
 
-   /*
-* In the very unlikely case that an interrupt came in
-* at a start of graph tracing, and we want to trace
-* the function in that interrupt, the depth can be greater
-* than zero, because of the preempted start of a previous
-* trace. In an even more unlikely case, depth could be 2
-* if a softirq interrupted the start of graph tracing,
-* followed by an interrupt preempting a start of graph
-* tracing in the softirq, and depth can even be 3
-* if an NMI came in at the start of an interrupt function
-* that preempted a softirq start of a function that
-* preempted normal context Luckily, it can't be
-* greater than 3, so the next two bits are a mask
-* of what the depth is when we set TRACE_GRAPH_FL
-*/
-
-   TRACE_GRAPH_DEPTH_START_BIT,
-   TRACE_GRAPH_DEPTH_END_BIT,
-
/*
 * To implement set_graph_notrace, if this bit is set, we ignore
 * function graph tracing of called functions, until the return
@@ -78,16 +59,6 @@ enum {
 #define trace_recursion_clear(bit) do { (current)->trace_recursion &= 
~(1<<(bit)); } while (0)
 #define trace_recursion_test(bit)  ((current)->trace_recursion & 
(1<<(bit)))
 
-#define trace_recursion_depth() \
-   (((current)->trace_recursion >> TRACE_GRAPH_DEPTH_START_BIT) & 3)
-#define trace_recursion_set_depth(depth) \
-   do {\
-   current->trace_recursion &= \
-   ~(3 << TRACE_GRAPH_DEPTH_START_BIT);\
-   current->trace_recursion |= \
-   ((depth) & 3) << TRACE_GRAPH_DEPTH_START_BIT;   \
-   } while (0)
-
 #define TRACE_CONTEXT_BITS 4
 
 #define TRACE_FTRACE_START TRACE_FTRACE_BIT
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 60d38709ab91..cbe44998ef77 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -891,8 +891,38 @@ extern void free_fgraph_ops(struct trace_array *tr);
 
 enum {
TRACE_GRAPH_FL  = 1,
+
+   /*
+* In the very unlikely case that an interrupt came in
+* at a start of graph tracing, and we want to trace
+* the function in that interrupt, the depth can be greater
+* than zero, because of the preempted start of a previous
+* trace. In an even more unlikely case, depth could be 2
+* if a softirq interrupted the start of graph tracing,
+* followed by an interrupt preempting a start of graph
+* tracing in the softirq, and depth can even be 3
+* if an NMI came in at the start of an interrupt function
+* that preempted a softirq start of a function that
+* preempted normal context Luckily, it can't be
+* greater than 3, so the next two bits are a mask
+* of what the depth is when we set TRACE_GRAPH_FL
+*/
+
+   TRACE_GRAPH_DEPTH_START_BIT,
+   TRACE_GRAPH_DEPTH_END_BIT,
 };
 
+static inline unsigned long ftrace_graph_depth(unsigned long *task_var)
+{
+   return (*task_var >> TRACE_GRAPH_DEPTH_START_BIT) & 3;
+}
+
+static inline void ftrace_graph_set_depth(unsigned long *task_var, int depth)
+{
+   *task_var &= ~(3 << TRACE_GRAPH_DEPTH_START_BIT);
+   *task_var |= (depth & 3) << TRACE_GRAPH_DEPTH_START_BIT;
+}
+
 #ifdef CONFIG_DYNAMIC_FTRACE
 extern struct ftrace_hash __rcu *ftrace_graph_hash;
 extern struct ftrace_hash __rcu *ftrace_graph_notrace_hash;
@@ -925,7 +955,7 @@ ftrace_graph_addr(unsigned long *task_var, struct 
ftrace_graph_ent *trace)
 * when the depth is zero.
 */
*task_var |= TRACE_GRAPH_FL;
-   trace_recursion_set_depth(trace->depth);
+   ftrace_graph_set_depth(task_var, trace->depth);
 
/*
 * If no irqs are to be traced, but a set_graph_function
@@ -950,7 +980,7 @@ ftrace_graph_addr_finish(struct fgraph_ops *gops, struct 
ftrace_graph_ret *trace
unsigned long *task_var = fgraph_get_task_var(gops);
 
if 

[RFC PATCH v2 14/31] function_graph: Move set_graph_function tests to shadow stack global var

2023-11-08 Thread Masami Hiramatsu (Google)
From: Steven Rostedt (VMware) 

The use of the task->trace_recursion for the logic used for the
set_graph_funnction was a bit of an abuse of that variable. Now that there
exists global vars that are per stack for registered graph traces, use that
instead.

Signed-off-by: Steven Rostedt (VMware) 
Signed-off-by: Masami Hiramatsu (Google) 
---
 include/linux/trace_recursion.h  |5 +
 kernel/trace/trace.h |   32 +---
 kernel/trace/trace_functions_graph.c |6 +++---
 kernel/trace/trace_irqsoff.c |4 ++--
 kernel/trace/trace_sched_wakeup.c|4 ++--
 5 files changed, 29 insertions(+), 22 deletions(-)

diff --git a/include/linux/trace_recursion.h b/include/linux/trace_recursion.h
index d48cd92d2364..2efd5ec46d7f 100644
--- a/include/linux/trace_recursion.h
+++ b/include/linux/trace_recursion.h
@@ -44,9 +44,6 @@ enum {
  */
TRACE_IRQ_BIT,
 
-   /* Set if the function is in the set_graph_function file */
-   TRACE_GRAPH_BIT,
-
/*
 * In the very unlikely case that an interrupt came in
 * at a start of graph tracing, and we want to trace
@@ -60,7 +57,7 @@ enum {
 * that preempted a softirq start of a function that
 * preempted normal context Luckily, it can't be
 * greater than 3, so the next two bits are a mask
-* of what the depth is when we set TRACE_GRAPH_BIT
+* of what the depth is when we set TRACE_GRAPH_FL
 */
 
TRACE_GRAPH_DEPTH_START_BIT,
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index f77322e3b177..60d38709ab91 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -889,11 +889,16 @@ extern void init_array_fgraph_ops(struct trace_array *tr, 
struct ftrace_ops *ops
 extern int allocate_fgraph_ops(struct trace_array *tr, struct ftrace_ops *ops);
 extern void free_fgraph_ops(struct trace_array *tr);
 
+enum {
+   TRACE_GRAPH_FL  = 1,
+};
+
 #ifdef CONFIG_DYNAMIC_FTRACE
 extern struct ftrace_hash __rcu *ftrace_graph_hash;
 extern struct ftrace_hash __rcu *ftrace_graph_notrace_hash;
 
-static inline int ftrace_graph_addr(struct ftrace_graph_ent *trace)
+static inline int
+ftrace_graph_addr(unsigned long *task_var, struct ftrace_graph_ent *trace)
 {
unsigned long addr = trace->func;
int ret = 0;
@@ -915,12 +920,11 @@ static inline int ftrace_graph_addr(struct 
ftrace_graph_ent *trace)
}
 
if (ftrace_lookup_ip(hash, addr)) {
-
/*
 * This needs to be cleared on the return functions
 * when the depth is zero.
 */
-   trace_recursion_set(TRACE_GRAPH_BIT);
+   *task_var |= TRACE_GRAPH_FL;
trace_recursion_set_depth(trace->depth);
 
/*
@@ -940,11 +944,14 @@ static inline int ftrace_graph_addr(struct 
ftrace_graph_ent *trace)
return ret;
 }
 
-static inline void ftrace_graph_addr_finish(struct ftrace_graph_ret *trace)
+static inline void
+ftrace_graph_addr_finish(struct fgraph_ops *gops, struct ftrace_graph_ret 
*trace)
 {
-   if (trace_recursion_test(TRACE_GRAPH_BIT) &&
+   unsigned long *task_var = fgraph_get_task_var(gops);
+
+   if ((*task_var & TRACE_GRAPH_FL) &&
trace->depth == trace_recursion_depth())
-   trace_recursion_clear(TRACE_GRAPH_BIT);
+   *task_var &= ~TRACE_GRAPH_FL;
 }
 
 static inline int ftrace_graph_notrace_addr(unsigned long addr)
@@ -971,7 +978,7 @@ static inline int ftrace_graph_notrace_addr(unsigned long 
addr)
 }
 
 #else
-static inline int ftrace_graph_addr(struct ftrace_graph_ent *trace)
+static inline int ftrace_graph_addr(unsigned long *task_var, struct 
ftrace_graph_ent *trace)
 {
return 1;
 }
@@ -980,17 +987,20 @@ static inline int ftrace_graph_notrace_addr(unsigned long 
addr)
 {
return 0;
 }
-static inline void ftrace_graph_addr_finish(struct ftrace_graph_ret *trace)
+static inline void ftrace_graph_addr_finish(struct fgraph_ops *gops, struct 
ftrace_graph_ret *trace)
 { }
 #endif /* CONFIG_DYNAMIC_FTRACE */
 
 extern unsigned int fgraph_max_depth;
 
-static inline bool ftrace_graph_ignore_func(struct ftrace_graph_ent *trace)
+static inline bool
+ftrace_graph_ignore_func(struct fgraph_ops *gops, struct ftrace_graph_ent 
*trace)
 {
+   unsigned long *task_var = fgraph_get_task_var(gops);
+
/* trace it when it is-nested-in or is a function enabled. */
-   return !(trace_recursion_test(TRACE_GRAPH_BIT) ||
-ftrace_graph_addr(trace)) ||
+   return !((*task_var & TRACE_GRAPH_FL) ||
+ftrace_graph_addr(task_var, trace)) ||
(trace->depth < 0) ||
(fgraph_max_depth && trace->depth >= fgraph_max_depth);
 }
diff --git a/kernel/trace/trace_functions_graph.c 
b/kernel/trace/trace_functions_graph.c
index 7f30652f0e97..66cce73e94f8 100644
--- a/kernel/trace/trace_functions_graph.c
+++ 

[RFC PATCH v2 13/31] function_graph: Add "task variables" per task for fgraph_ops

2023-11-08 Thread Masami Hiramatsu (Google)
From: Steven Rostedt (VMware) 

Add a "task variables" array on the tasks shadow ret_stack that is the
size of longs for each possible registered fgraph_ops. That's a total
of 16, taking up 8 * 16 = 128 bytes (out of a page size 4k).

This will allow for fgraph_ops to do specific features on a per task basis
having a way to maintain state for each task.

Signed-off-by: Steven Rostedt (VMware) 
Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v2:
  - Make description lines shorter than 76 chars.
---
 include/linux/ftrace.h |2 +
 kernel/trace/fgraph.c  |   73 +++-
 2 files changed, 74 insertions(+), 1 deletion(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index d30eb8a97a50..3f9f1f48e8fd 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -1071,6 +1071,7 @@ struct fgraph_ops {
trace_func_graph_ret_t  retfunc;
struct ftrace_ops   ops; /* for the hash lists */
void*private;
+   int idx;
 };
 
 /*
@@ -1109,6 +1110,7 @@ ftrace_graph_get_ret_stack(struct task_struct *task, int 
idx);
 
 unsigned long ftrace_graph_ret_addr(struct task_struct *task, int *idx,
unsigned long ret, unsigned long *retp);
+unsigned long *fgraph_get_task_var(struct fgraph_ops *gops);
 
 /*
  * Sometimes we don't want to trace a function with the function
diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index 97cf320d20a8..79bdd3c775dd 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -92,10 +92,18 @@ enum {
 #define SHADOW_STACK_SIZE (PAGE_SIZE)
 #define SHADOW_STACK_INDEX (SHADOW_STACK_SIZE / sizeof(long))
 /* Leave on a buffer at the end */
-#define SHADOW_STACK_MAX_INDEX (SHADOW_STACK_INDEX - (FGRAPH_RET_INDEX + 1))
+#define SHADOW_STACK_MAX_INDEX \
+   (SHADOW_STACK_INDEX - (FGRAPH_RET_INDEX + 1 + FGRAPH_ARRAY_SIZE))
 
 #define RET_STACK(t, index) ((struct ftrace_ret_stack 
*)(&(t)->ret_stack[index]))
 
+/*
+ * Each fgraph_ops has a reservered unsigned long at the end (top) of the
+ * ret_stack to store task specific state.
+ */
+#define SHADOW_STACK_TASK_VARS(ret_stack) \
+   ((unsigned long *)(&(ret_stack)[SHADOW_STACK_INDEX - 
FGRAPH_ARRAY_SIZE]))
+
 DEFINE_STATIC_KEY_FALSE(kill_ftrace_graph);
 int ftrace_graph_active;
 
@@ -131,6 +139,44 @@ static void return_run(struct ftrace_graph_ret *trace, 
struct fgraph_ops *ops)
 {
 }
 
+static void ret_stack_set_task_var(struct task_struct *t, int idx, long val)
+{
+   unsigned long *gvals = SHADOW_STACK_TASK_VARS(t->ret_stack);
+
+   gvals[idx] = val;
+}
+
+static unsigned long *
+ret_stack_get_task_var(struct task_struct *t, int idx)
+{
+   unsigned long *gvals = SHADOW_STACK_TASK_VARS(t->ret_stack);
+
+   return [idx];
+}
+
+static void ret_stack_init_task_vars(unsigned long *ret_stack)
+{
+   unsigned long *gvals = SHADOW_STACK_TASK_VARS(ret_stack);
+
+   memset(gvals, 0, sizeof(*gvals) * FGRAPH_ARRAY_SIZE);
+}
+
+/**
+ * fgraph_get_task_var - retrieve a task specific state variable
+ * @gops: The ftrace_ops that owns the task specific variable
+ *
+ * Every registered fgraph_ops has a task state variable
+ * reserved on the task's ret_stack. This function returns the
+ * address to that variable.
+ *
+ * Returns the address to the fgraph_ops @gops tasks specific
+ * unsigned long variable.
+ */
+unsigned long *fgraph_get_task_var(struct fgraph_ops *gops)
+{
+   return ret_stack_get_task_var(current, gops->idx);
+}
+
 /*
  * @offset: The index into @t->ret_stack to find the ret_stack entry
  * @index: Where to place the index into @t->ret_stack of that entry
@@ -708,6 +754,7 @@ static int alloc_retstack_tasklist(unsigned long 
**ret_stack_list)
 
if (t->ret_stack == NULL) {
atomic_set(>trace_overrun, 0);
+   ret_stack_init_task_vars(ret_stack_list[start]);
t->curr_ret_stack = 0;
t->curr_ret_depth = -1;
/* Make sure the tasks see the 0 first: */
@@ -768,6 +815,7 @@ static void
 graph_init_task(struct task_struct *t, unsigned long *ret_stack)
 {
atomic_set(>trace_overrun, 0);
+   ret_stack_init_task_vars(ret_stack);
t->ftrace_timestamp = 0;
t->curr_ret_stack = 0;
t->curr_ret_depth = -1;
@@ -866,6 +914,24 @@ static int start_graph_tracing(void)
return ret;
 }
 
+static void init_task_vars(int idx)
+{
+   struct task_struct *g, *t;
+   int cpu;
+
+   for_each_online_cpu(cpu) {
+   if (idle_task(cpu)->ret_stack)
+   ret_stack_set_task_var(idle_task(cpu), idx, 0);
+   }
+
+   read_lock(_lock);
+   for_each_process_thread(g, t) {
+   if (t->ret_stack)
+   ret_stack_set_task_var(t, idx, 0);
+   }
+   

[RFC PATCH v2 12/31] function_graph: Have the instances use their own ftrace_ops for filtering

2023-11-08 Thread Masami Hiramatsu (Google)
From: Steven Rostedt (VMware) 

Allow for instances to have their own ftrace_ops part of the fgraph_ops
that makes the funtion_graph tracer filter on the set_ftrace_filter file
of the instance and not the top instance.

Signed-off-by: Steven Rostedt (VMware) 
Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v2:
  - Use ftrace_graph_func and FTRACE_OPS_GRAPH_STUB instead of
ftrace_stub and FTRACE_OPS_FL_STUB for new ftrace based fgraph.
---
 include/linux/ftrace.h   |1 +
 kernel/trace/fgraph.c|   60 +-
 kernel/trace/ftrace.c|6 ++-
 kernel/trace/trace.h |   16 +
 kernel/trace/trace_functions.c   |2 +
 kernel/trace/trace_functions_graph.c |8 +++--
 6 files changed, 58 insertions(+), 35 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 84e06ad1b121..d30eb8a97a50 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -1069,6 +1069,7 @@ extern int ftrace_graph_entry_stub(struct 
ftrace_graph_ent *trace, struct fgraph
 struct fgraph_ops {
trace_func_graph_ent_t  entryfunc;
trace_func_graph_ret_t  retfunc;
+   struct ftrace_ops   ops; /* for the hash lists */
void*private;
 };
 
diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index 16bbb9fa3e03..97cf320d20a8 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -17,14 +17,6 @@
 #include "ftrace_internal.h"
 #include "trace.h"
 
-#ifdef CONFIG_DYNAMIC_FTRACE
-#define ASSIGN_OPS_HASH(opsname, val) \
-   .func_hash  = val, \
-   .local_hash.regex_lock  = 
__MUTEX_INITIALIZER(opsname.local_hash.regex_lock),
-#else
-#define ASSIGN_OPS_HASH(opsname, val)
-#endif
-
 #define FGRAPH_RET_SIZE sizeof(struct ftrace_ret_stack)
 #define FGRAPH_RET_INDEX (FGRAPH_RET_SIZE / sizeof(long))
 
@@ -337,9 +329,6 @@ int function_graph_enter(unsigned long ret, unsigned long 
func,
return -EBUSY;
 #endif
 
-   if (!ftrace_ops_test(_ops, func, NULL))
-   return -EBUSY;
-
trace.func = func;
trace.depth = ++current->curr_ret_depth;
 
@@ -360,7 +349,8 @@ int function_graph_enter(unsigned long ret, unsigned long 
func,
atomic_inc(>trace_overrun);
break;
}
-   if (fgraph_array[i]->entryfunc(, fgraph_array[i])) {
+   if (ftrace_ops_test(>ops, func, NULL) &&
+   gops->entryfunc(, gops)) {
offset = current->curr_ret_stack;
/* Check the top level stored word */
type = get_fgraph_type(current, offset - 1);
@@ -655,17 +645,25 @@ unsigned long ftrace_graph_ret_addr(struct task_struct 
*task, int *idx,
 }
 #endif /* HAVE_FUNCTION_GRAPH_RET_ADDR_PTR */
 
-static struct ftrace_ops graph_ops = {
-   .func   = ftrace_graph_func,
-   .flags  = FTRACE_OPS_FL_INITIALIZED |
-  FTRACE_OPS_FL_PID |
-  FTRACE_OPS_GRAPH_STUB,
+void fgraph_init_ops(struct ftrace_ops *dst_ops,
+struct ftrace_ops *src_ops)
+{
+   dst_ops->func = ftrace_graph_func;
+   dst_ops->flags = FTRACE_OPS_FL_PID | FTRACE_OPS_GRAPH_STUB;
+
 #ifdef FTRACE_GRAPH_TRAMP_ADDR
-   .trampoline = FTRACE_GRAPH_TRAMP_ADDR,
+   dst_ops->trampoline = FTRACE_GRAPH_TRAMP_ADDR;
/* trampoline_size is only needed for dynamically allocated tramps */
 #endif
-   ASSIGN_OPS_HASH(graph_ops, _ops.local_hash)
-};
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+   if (src_ops) {
+   dst_ops->func_hash = _ops->local_hash;
+   mutex_init(_ops->local_hash.regex_lock);
+   dst_ops->flags |= FTRACE_OPS_FL_INITIALIZED;
+   }
+#endif
+}
 
 void ftrace_graph_sleep_time_control(bool enable)
 {
@@ -870,11 +868,20 @@ static int start_graph_tracing(void)
 
 int register_ftrace_graph(struct fgraph_ops *gops)
 {
+   int command = 0;
int ret = 0;
int i;
 
mutex_lock(_lock);
 
+   if (!gops->ops.func) {
+   gops->ops.flags |= FTRACE_OPS_GRAPH_STUB;
+   gops->ops.func = ftrace_graph_func;
+#ifdef FTRACE_GRAPH_TRAMP_ADDR
+   gops->ops.trampoline = FTRACE_GRAPH_TRAMP_ADDR;
+#endif
+   }
+
if (!fgraph_array[0]) {
/* The array must always have real data on it */
for (i = 0; i < FGRAPH_ARRAY_SIZE; i++)
@@ -910,9 +917,10 @@ int register_ftrace_graph(struct fgraph_ops *gops)
 */
ftrace_graph_return = return_run;
ftrace_graph_entry = entry_run;
-
-   ret = ftrace_startup(_ops, FTRACE_START_FUNC_RET);
+   command = FTRACE_START_FUNC_RET;
}
+
+   ret = ftrace_startup(>ops, command);
 out:
  

[RFC PATCH v2 11/31] ftrace: Allow ftrace startup flags exist without dynamic ftrace

2023-11-08 Thread Masami Hiramatsu (Google)
From: Steven Rostedt (VMware) 

Some of the flags for ftrace_startup() may be exposed even when
CONFIG_DYNAMIC_FTRACE is not configured in. This is fine as the difference
between dynamic ftrace and static ftrace is done within the internals of
ftrace itself. No need to have use cases fail to compile because dynamic
ftrace is disabled.

This change is needed to move some of the logic of what is passed to
ftrace_startup() out of the parameters of ftrace_startup().

Signed-off-by: Steven Rostedt (VMware) 
Signed-off-by: Masami Hiramatsu (Google) 
---
 include/linux/ftrace.h |   18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 80ec01e765bd..84e06ad1b121 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -537,6 +537,15 @@ static inline void stack_tracer_disable(void) { }
 static inline void stack_tracer_enable(void) { }
 #endif
 
+enum {
+   FTRACE_UPDATE_CALLS = (1 << 0),
+   FTRACE_DISABLE_CALLS= (1 << 1),
+   FTRACE_UPDATE_TRACE_FUNC= (1 << 2),
+   FTRACE_START_FUNC_RET   = (1 << 3),
+   FTRACE_STOP_FUNC_RET= (1 << 4),
+   FTRACE_MAY_SLEEP= (1 << 5),
+};
+
 #ifdef CONFIG_DYNAMIC_FTRACE
 
 void ftrace_arch_code_modify_prepare(void);
@@ -631,15 +640,6 @@ void ftrace_set_global_notrace(unsigned char *buf, int 
len, int reset);
 void ftrace_free_filter(struct ftrace_ops *ops);
 void ftrace_ops_set_global_filter(struct ftrace_ops *ops);
 
-enum {
-   FTRACE_UPDATE_CALLS = (1 << 0),
-   FTRACE_DISABLE_CALLS= (1 << 1),
-   FTRACE_UPDATE_TRACE_FUNC= (1 << 2),
-   FTRACE_START_FUNC_RET   = (1 << 3),
-   FTRACE_STOP_FUNC_RET= (1 << 4),
-   FTRACE_MAY_SLEEP= (1 << 5),
-};
-
 /*
  * The FTRACE_UPDATE_* enum is used to pass information back
  * from the ftrace_update_record() and ftrace_test_record()




[RFC PATCH v2 10/31] ftrace: Allow function_graph tracer to be enabled in instances

2023-11-08 Thread Masami Hiramatsu (Google)
From: Steven Rostedt (VMware) 

Now that function graph tracing can handle more than one user, allow it to
be enabled in the ftrace instances. Note, the filtering of the functions is
still joined by the top level set_ftrace_filter and friends, as well as the
graph and nograph files.

Signed-off-by: Steven Rostedt (VMware) 
Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v2:
  - Fix to remove set_graph_array() completely.
---
 include/linux/ftrace.h   |1 +
 kernel/trace/ftrace.c|1 +
 kernel/trace/trace.h |   13 ++-
 kernel/trace/trace_functions.c   |8 
 kernel/trace/trace_functions_graph.c |   65 +-
 kernel/trace/trace_selftest.c|4 +-
 6 files changed, 64 insertions(+), 28 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 347627f012ce..80ec01e765bd 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -1069,6 +1069,7 @@ extern int ftrace_graph_entry_stub(struct 
ftrace_graph_ent *trace, struct fgraph
 struct fgraph_ops {
trace_func_graph_ent_t  entryfunc;
trace_func_graph_ret_t  retfunc;
+   void*private;
 };
 
 /*
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 7ff5c454622a..83fbfb7b48f8 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -7319,6 +7319,7 @@ __init void ftrace_init_global_array_ops(struct 
trace_array *tr)
tr->ops = _ops;
tr->ops->private = tr;
ftrace_init_trace_array(tr);
+   init_array_fgraph_ops(tr);
 }
 
 void ftrace_init_array_ops(struct trace_array *tr, ftrace_func_t func)
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index ada49bf1fbc8..febb9c6d01c7 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -395,6 +395,9 @@ struct trace_array {
struct ftrace_ops   *ops;
struct trace_pid_list   __rcu *function_pids;
struct trace_pid_list   __rcu *function_no_pids;
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+   struct fgraph_ops   *gops;
+#endif
 #ifdef CONFIG_DYNAMIC_FTRACE
/* All of these are protected by the ftrace_lock */
struct list_headfunc_probes;
@@ -672,7 +675,6 @@ void print_trace_header(struct seq_file *m, struct 
trace_iterator *iter);
 
 void trace_graph_return(struct ftrace_graph_ret *trace, struct fgraph_ops 
*gops);
 int trace_graph_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops);
-void set_graph_array(struct trace_array *tr);
 
 void tracing_start_cmdline_record(void);
 void tracing_stop_cmdline_record(void);
@@ -883,6 +885,9 @@ extern int __trace_graph_entry(struct trace_array *tr,
 extern void __trace_graph_return(struct trace_array *tr,
 struct ftrace_graph_ret *trace,
 unsigned int trace_ctx);
+extern void init_array_fgraph_ops(struct trace_array *tr);
+extern int allocate_fgraph_ops(struct trace_array *tr);
+extern void free_fgraph_ops(struct trace_array *tr);
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 extern struct ftrace_hash __rcu *ftrace_graph_hash;
@@ -995,6 +1000,12 @@ print_graph_function_flags(struct trace_iterator *iter, 
u32 flags)
 {
return TRACE_TYPE_UNHANDLED;
 }
+static inline void init_array_fgraph_ops(struct trace_array *tr) { }
+static inline int allocate_fgraph_ops(struct trace_array *tr)
+{
+   return 0;
+}
+static inline void free_fgraph_ops(struct trace_array *tr) { }
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
 
 extern struct list_head ftrace_pids;
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 9f1bfbe105e8..8e8da0d0ee52 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -80,6 +80,7 @@ void ftrace_free_ftrace_ops(struct trace_array *tr)
 int ftrace_create_function_files(struct trace_array *tr,
 struct dentry *parent)
 {
+   int ret;
/*
 * The top level array uses the "global_ops", and the files are
 * created on boot up.
@@ -90,6 +91,12 @@ int ftrace_create_function_files(struct trace_array *tr,
if (!tr->ops)
return -EINVAL;
 
+   ret = allocate_fgraph_ops(tr);
+   if (ret) {
+   kfree(tr->ops);
+   return ret;
+   }
+
ftrace_create_filter_files(tr->ops, parent);
 
return 0;
@@ -99,6 +106,7 @@ void ftrace_destroy_function_files(struct trace_array *tr)
 {
ftrace_destroy_filter_files(tr->ops);
ftrace_free_ftrace_ops(tr);
+   free_fgraph_ops(tr);
 }
 
 static ftrace_func_t select_trace_function(u32 flags_val)
diff --git a/kernel/trace/trace_functions_graph.c 
b/kernel/trace/trace_functions_graph.c
index b7b142b65299..9ccc904a7703 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -83,8 +83,6 @@ static struct tracer_flags tracer_flags = {

[RFC PATCH v2 09/31] ftrace/function_graph: Pass fgraph_ops to function graph callbacks

2023-11-08 Thread Masami Hiramatsu (Google)
From: Steven Rostedt (VMware) 

Pass the fgraph_ops structure to the function graph callbacks. This will
allow callbacks to add a descriptor to a fgraph_ops private field that wil
be added in the future and use it for the callbacks. This will be useful
when more than one callback can be registered to the function graph tracer.

Signed-off-by: Steven Rostedt (VMware) 
Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v2:
  - cleanup to set argument name on function prototype.
---
 include/linux/ftrace.h   |   10 +++---
 kernel/trace/fgraph.c|   17 ++---
 kernel/trace/ftrace.c|6 --
 kernel/trace/trace.h |4 ++--
 kernel/trace/trace_functions_graph.c |   11 +++
 kernel/trace/trace_irqsoff.c |6 --
 kernel/trace/trace_sched_wakeup.c|6 --
 kernel/trace/trace_selftest.c|5 +++--
 8 files changed, 41 insertions(+), 24 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index b174af91d8be..347627f012ce 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -1054,11 +1054,15 @@ struct ftrace_graph_ret {
unsigned long long rettime;
 } __packed;
 
+struct fgraph_ops;
+
 /* Type of the callback handlers for tracing function graph*/
-typedef void (*trace_func_graph_ret_t)(struct ftrace_graph_ret *); /* return */
-typedef int (*trace_func_graph_ent_t)(struct ftrace_graph_ent *); /* entry */
+typedef void (*trace_func_graph_ret_t)(struct ftrace_graph_ret *,
+  struct fgraph_ops *); /* return */
+typedef int (*trace_func_graph_ent_t)(struct ftrace_graph_ent *,
+ struct fgraph_ops *); /* entry */
 
-extern int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace);
+extern int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace, struct 
fgraph_ops *gops);
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 
diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index 97a9ffb8bb4c..16bbb9fa3e03 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -129,13 +129,13 @@ static inline int get_fgraph_array(struct task_struct *t, 
int offset)
 }
 
 /* ftrace_graph_entry set to this to tell some archs to run function graph */
-static int entry_run(struct ftrace_graph_ent *trace)
+static int entry_run(struct ftrace_graph_ent *trace, struct fgraph_ops *ops)
 {
return 0;
 }
 
 /* ftrace_graph_return set to this to tell some archs to run function graph */
-static void return_run(struct ftrace_graph_ret *trace)
+static void return_run(struct ftrace_graph_ret *trace, struct fgraph_ops *ops)
 {
 }
 
@@ -199,12 +199,14 @@ int __weak ftrace_disable_ftrace_graph_caller(void)
 }
 #endif
 
-int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace)
+int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace,
+   struct fgraph_ops *gops)
 {
return 0;
 }
 
-static void ftrace_graph_ret_stub(struct ftrace_graph_ret *trace)
+static void ftrace_graph_ret_stub(struct ftrace_graph_ret *trace,
+ struct fgraph_ops *gops)
 {
 }
 
@@ -358,7 +360,7 @@ int function_graph_enter(unsigned long ret, unsigned long 
func,
atomic_inc(>trace_overrun);
break;
}
-   if (fgraph_array[i]->entryfunc()) {
+   if (fgraph_array[i]->entryfunc(, fgraph_array[i])) {
offset = current->curr_ret_stack;
/* Check the top level stored word */
type = get_fgraph_type(current, offset - 1);
@@ -532,7 +534,7 @@ static unsigned long __ftrace_return_to_handler(struct 
fgraph_ret_regs *ret_regs
i = 0;
do {
idx = get_fgraph_array(current, offset - i);
-   fgraph_array[idx]->retfunc();
+   fgraph_array[idx]->retfunc(, fgraph_array[idx]);
i++;
} while (i < index);
 
@@ -674,7 +676,8 @@ void ftrace_graph_sleep_time_control(bool enable)
  * Simply points to ftrace_stub, but with the proper protocol.
  * Defined by the linker script in linux/vmlinux.lds.h
  */
-extern void ftrace_stub_graph(struct ftrace_graph_ret *);
+extern void ftrace_stub_graph(struct ftrace_graph_ret *trace,
+ struct fgraph_ops *gops);
 
 /* The callbacks that hook a function */
 trace_func_graph_ret_t ftrace_graph_return = ftrace_stub_graph;
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index fd64021ec52f..7ff5c454622a 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -815,7 +815,8 @@ void ftrace_graph_graph_time_control(bool enable)
fgraph_graph_time = enable;
 }
 
-static int profile_graph_entry(struct ftrace_graph_ent *trace)
+static int profile_graph_entry(struct ftrace_graph_ent *trace,
+  struct fgraph_ops *gops)
 {
struct ftrace_ret_stack 

[RFC PATCH v2 08/31] function_graph: Remove logic around ftrace_graph_entry and return

2023-11-08 Thread Masami Hiramatsu (Google)
From: Steven Rostedt (VMware) 

The function pointers ftrace_graph_entry and ftrace_graph_return are no
longer called via the function_graph tracer. Instead, an array structure is
now used that will allow for multiple users of the function_graph
infrastructure. The variables are still used by the architecture code for
non dynamic ftrace configs, where a test is made against them to see if
they point to the default stub function or not. This is how the static
function tracing knows to call into the function graph tracer
infrastructure or not.

Two new stub functions are made. entry_run() and return_run(). The
ftrace_graph_entry and ftrace_graph_return are set to them respectively
when the function graph tracer is enabled, and this will trigger the
architecture specific function graph code to be executed.

This also requires checking the global_ops hash for all calls into the
function_graph tracer.

Signed-off-by: Steven Rostedt (VMware) 
Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v2:
  - Fix typo and make lines shorter than 76 chars in the description.
  - Remove unneeded return from return_run() function.
---
 kernel/trace/fgraph.c  |   71 +++-
 kernel/trace/ftrace.c  |2 -
 kernel/trace/ftrace_internal.h |2 -
 3 files changed, 19 insertions(+), 56 deletions(-)

diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index 8aba93be11b2..97a9ffb8bb4c 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -128,6 +128,17 @@ static inline int get_fgraph_array(struct task_struct *t, 
int offset)
FGRAPH_ARRAY_MASK;
 }
 
+/* ftrace_graph_entry set to this to tell some archs to run function graph */
+static int entry_run(struct ftrace_graph_ent *trace)
+{
+   return 0;
+}
+
+/* ftrace_graph_return set to this to tell some archs to run function graph */
+static void return_run(struct ftrace_graph_ret *trace)
+{
+}
+
 /*
  * @offset: The index into @t->ret_stack to find the ret_stack entry
  * @index: Where to place the index into @t->ret_stack of that entry
@@ -323,6 +334,10 @@ int function_graph_enter(unsigned long ret, unsigned long 
func,
ftrace_find_rec_direct(ret - MCOUNT_INSN_SIZE))
return -EBUSY;
 #endif
+
+   if (!ftrace_ops_test(_ops, func, NULL))
+   return -EBUSY;
+
trace.func = func;
trace.depth = ++current->curr_ret_depth;
 
@@ -664,7 +679,6 @@ extern void ftrace_stub_graph(struct ftrace_graph_ret *);
 /* The callbacks that hook a function */
 trace_func_graph_ret_t ftrace_graph_return = ftrace_stub_graph;
 trace_func_graph_ent_t ftrace_graph_entry = ftrace_graph_entry_stub;
-static trace_func_graph_ent_t __ftrace_graph_entry = ftrace_graph_entry_stub;
 
 /* Try to assign a return stack array on FTRACE_RETSTACK_ALLOC_SIZE tasks. */
 static int alloc_retstack_tasklist(unsigned long **ret_stack_list)
@@ -747,46 +761,6 @@ ftrace_graph_probe_sched_switch(void *ignore, bool preempt,
}
 }
 
-static int ftrace_graph_entry_test(struct ftrace_graph_ent *trace)
-{
-   if (!ftrace_ops_test(_ops, trace->func, NULL))
-   return 0;
-   return __ftrace_graph_entry(trace);
-}
-
-/*
- * The function graph tracer should only trace the functions defined
- * by set_ftrace_filter and set_ftrace_notrace. If another function
- * tracer ops is registered, the graph tracer requires testing the
- * function against the global ops, and not just trace any function
- * that any ftrace_ops registered.
- */
-void update_function_graph_func(void)
-{
-   struct ftrace_ops *op;
-   bool do_test = false;
-
-   /*
-* The graph and global ops share the same set of functions
-* to test. If any other ops is on the list, then
-* the graph tracing needs to test if its the function
-* it should call.
-*/
-   do_for_each_ftrace_op(op, ftrace_ops_list) {
-   if (op != _ops && op != _ops &&
-   op != _list_end) {
-   do_test = true;
-   /* in double loop, break out with goto */
-   goto out;
-   }
-   } while_for_each_ftrace_op(op);
- out:
-   if (do_test)
-   ftrace_graph_entry = ftrace_graph_entry_test;
-   else
-   ftrace_graph_entry = __ftrace_graph_entry;
-}
-
 static DEFINE_PER_CPU(unsigned long *, idle_ret_stack);
 
 static void
@@ -927,18 +901,12 @@ int register_ftrace_graph(struct fgraph_ops *gops)
ftrace_graph_active--;
goto out;
}
-
-   ftrace_graph_return = gops->retfunc;
-
/*
-* Update the indirect function to the entryfunc, and the
-* function that gets called to the entry_test first. Then
-* call the update fgraph entry function to determine if
-* the entryfunc should be called directly or not.
+  

[RFC PATCH v2 07/31] function_graph: Allow multiple users to attach to function graph

2023-11-08 Thread Masami Hiramatsu (Google)
From: Steven Rostedt (VMware) 

Allow for multiple users to attach to function graph tracer at the same
time. Only 16 simultaneous users can attach to the tracer. This is because
there's an array that stores the pointers to the attached fgraph_ops. When
a function being traced is entered, each of the ftrace_ops entryfunc is
called and if it returns non zero, its index into the array will be added
to the shadow stack.

On exit of the function being traced, the shadow stack will contain the
indexes of the ftrace_ops on the array that want their retfunc to be
called.

Because a function may sleep for a long time (if a task sleeps itself),
the return of the function may be literally days later. If the ftrace_ops
is removed, its place on the array is replaced with a ftrace_ops that
contains the stub functions and that will be called when the function
finally returns.

If another ftrace_ops is added that happens to get the same index into the
array, its return function may be called. But that's actually the way
things current work with the old function graph tracer. If one tracer is
removed and another is added, the new one will get the return calls of the
function traced by the previous one, thus this is not a regression. This
can be fixed by adding a counter to each time the array item is updated and
save that on the shadow stack as well, such that it won't be called if the
index saved does not match the index on the array.

Note, being able to filter functions when both are called is not completely
handled yet, but that shouldn't be too hard to manage.

Signed-off-by: Steven Rostedt (VMware) 
Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v2:
  - Check return value of the ftrace_pop_return_trace() instead of 'ret'
since 'ret' is set to the address of panic().
  - Fix typo and make lines shorter than 76 chars in description.
---
 kernel/trace/fgraph.c |  332 +
 1 file changed, 280 insertions(+), 52 deletions(-)

diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index 86df3ca6964f..8aba93be11b2 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -27,23 +27,144 @@
 
 #define FGRAPH_RET_SIZE sizeof(struct ftrace_ret_stack)
 #define FGRAPH_RET_INDEX (FGRAPH_RET_SIZE / sizeof(long))
+
+/*
+ * On entry to a function (via function_graph_enter()), a new ftrace_ret_stack
+ * is allocated on the task's ret_stack, then each fgraph_ops on the
+ * fgraph_array[]'s entryfunc is called and if that returns non-zero, the
+ * index into the fgraph_array[] for that fgraph_ops is added to the ret_stack.
+ * As the associated ftrace_ret_stack saved for those fgraph_ops needs to
+ * be found, the index to it is also added to the ret_stack along with the
+ * index of the fgraph_array[] to each fgraph_ops that needs their retfunc
+ * called.
+ *
+ * The top of the ret_stack (when not empty) will always have a reference
+ * to the last ftrace_ret_stack saved. All references to the
+ * ftrace_ret_stack has the format of:
+ *
+ * bits:  0 - 13   Index in words from the previous ftrace_ret_stack
+ * bits: 14 - 15   Type of storage
+ *   0 - reserved
+ *   1 - fgraph_array index
+ * For fgraph_array_index:
+ *  bits: 16 - 23  The fgraph_ops fgraph_array index
+ *
+ * That is, at the end of function_graph_enter, if the first and forth
+ * fgraph_ops on the fgraph_array[] (index 0 and 3) needs their retfunc called
+ * on the return of the function being traced, this is what will be on the
+ * task's shadow ret_stack: (the stack grows upward)
+ *
+ * |  | <- task->curr_ret_stack
+ * +--+
+ * | (3 << FGRAPH_ARRAY_SHIFT)|(2)| ( 3 for index of fourth fgraph_ops)
+ * +--+
+ * | (0 << FGRAPH_ARRAY_SHIFT)|(1)| ( 0 for index of first fgraph_ops)
+ * +--+
+ * | struct ftrace_ret_stack  |
+ * |   (stores the saved ret pointer) |
+ * +--+
+ * | (X) | (N)| ( N words away from previous ret_stack)
+ * |  |
+ *
+ * If a backtrace is required, and the real return pointer needs to be
+ * fetched, then it looks at the task's curr_ret_stack index, if it
+ * is greater than zero, it would subtact one, and then mask the value
+ * on the ret_stack by FGRAPH_RET_INDEX_MASK and subtract FGRAPH_RET_INDEX
+ * from that, to get the index of the ftrace_ret_stack structure stored
+ * on the shadow stack.
+ */
+
+#define FGRAPH_RET_INDEX_SIZE  14
+#define FGRAPH_RET_INDEX_MASK  ((1 << FGRAPH_RET_INDEX_SIZE) - 1)
+
+
+#define FGRAPH_TYPE_SIZE   2
+#define FGRAPH_TYPE_MASK   ((1 << FGRAPH_TYPE_SIZE) - 1)
+#define FGRAPH_TYPE_SHIFT  FGRAPH_RET_INDEX_SIZE
+
+enum {
+   FGRAPH_TYPE_RESERVED= 0,
+   FGRAPH_TYPE_ARRAY   = 1,
+};
+
+#define FGRAPH_ARRAY_SIZE  16
+#define 

[RFC PATCH v2 06/31] function_graph: Add an array structure that will allow multiple callbacks

2023-11-08 Thread Masami Hiramatsu (Google)
From: Steven Rostedt (VMware) 

Add an array structure that will eventually allow the function graph tracer
to have up to 16 simultaneous callbacks attached. It's an array of 16
fgraph_ops pointers, that is assigned when one is registered. On entry of a
function the entry of the first item in the array is called, and if it
returns zero, then the callback returns non zero if it wants the return
callback to be called on exit of the function.

The array will simplify the process of having more than one callback
attached to the same function, as its index into the array can be stored on
the shadow stack. We need to only save the index, because this will allow
the fgraph_ops to be freed before the function returns (which may happen if
the function call schedule for a long time).

Signed-off-by: Steven Rostedt (VMware) 
Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v2:
  - Remove unneeded brace.
---
 kernel/trace/fgraph.c |  114 +++--
 1 file changed, 81 insertions(+), 33 deletions(-)

diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index 837daf929d2a..86df3ca6964f 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -39,6 +39,11 @@
 DEFINE_STATIC_KEY_FALSE(kill_ftrace_graph);
 int ftrace_graph_active;
 
+static int fgraph_array_cnt;
+#define FGRAPH_ARRAY_SIZE  16
+
+static struct fgraph_ops *fgraph_array[FGRAPH_ARRAY_SIZE];
+
 /* Both enabled by default (can be cleared by function_graph tracer flags */
 static bool fgraph_sleep_time = true;
 
@@ -62,6 +67,20 @@ int __weak ftrace_disable_ftrace_graph_caller(void)
 }
 #endif
 
+int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace)
+{
+   return 0;
+}
+
+static void ftrace_graph_ret_stub(struct ftrace_graph_ret *trace)
+{
+}
+
+static struct fgraph_ops fgraph_stub = {
+   .entryfunc = ftrace_graph_entry_stub,
+   .retfunc = ftrace_graph_ret_stub,
+};
+
 /**
  * ftrace_graph_stop - set to permanently disable function graph tracing
  *
@@ -159,7 +178,7 @@ int function_graph_enter(unsigned long ret, unsigned long 
func,
goto out;
 
/* Only trace if the calling function expects to */
-   if (!ftrace_graph_entry())
+   if (!fgraph_array[0]->entryfunc())
goto out_ret;
 
return 0;
@@ -274,7 +293,7 @@ static unsigned long __ftrace_return_to_handler(struct 
fgraph_ret_regs *ret_regs
trace.retval = fgraph_ret_regs_return_value(ret_regs);
 #endif
trace.rettime = trace_clock_local();
-   ftrace_graph_return();
+   fgraph_array[0]->retfunc();
/*
 * The ftrace_graph_return() may still access the current
 * ret_stack structure, we need to make sure the update of
@@ -410,11 +429,6 @@ void ftrace_graph_sleep_time_control(bool enable)
fgraph_sleep_time = enable;
 }
 
-int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace)
-{
-   return 0;
-}
-
 /*
  * Simply points to ftrace_stub, but with the proper protocol.
  * Defined by the linker script in linux/vmlinux.lds.h
@@ -652,37 +666,54 @@ static int start_graph_tracing(void)
 int register_ftrace_graph(struct fgraph_ops *gops)
 {
int ret = 0;
+   int i;
 
mutex_lock(_lock);
 
-   /* we currently allow only one tracer registered at a time */
-   if (ftrace_graph_active) {
+   if (!fgraph_array[0]) {
+   /* The array must always have real data on it */
+   for (i = 0; i < FGRAPH_ARRAY_SIZE; i++)
+   fgraph_array[i] = _stub;
+   }
+
+   /* Look for an available spot */
+   for (i = 0; i < FGRAPH_ARRAY_SIZE; i++) {
+   if (fgraph_array[i] == _stub)
+   break;
+   }
+   if (i >= FGRAPH_ARRAY_SIZE) {
ret = -EBUSY;
goto out;
}
 
-   register_pm_notifier(_suspend_notifier);
+   fgraph_array[i] = gops;
+   if (i + 1 > fgraph_array_cnt)
+   fgraph_array_cnt = i + 1;
 
ftrace_graph_active++;
-   ret = start_graph_tracing();
-   if (ret) {
-   ftrace_graph_active--;
-   goto out;
-   }
 
-   ftrace_graph_return = gops->retfunc;
+   if (ftrace_graph_active == 1) {
+   register_pm_notifier(_suspend_notifier);
+   ret = start_graph_tracing();
+   if (ret) {
+   ftrace_graph_active--;
+   goto out;
+   }
+
+   ftrace_graph_return = gops->retfunc;
 
-   /*
-* Update the indirect function to the entryfunc, and the
-* function that gets called to the entry_test first. Then
-* call the update fgraph entry function to determine if
-* the entryfunc should be called directly or not.
-*/
-   __ftrace_graph_entry = gops->entryfunc;
-   ftrace_graph_entry = ftrace_graph_entry_test;
-   update_function_graph_func();
+   /*
+  

[RFC PATCH v2 05/31] fgraph: Use BUILD_BUG_ON() to make sure we have structures divisible by long

2023-11-08 Thread Masami Hiramatsu (Google)
From: Steven Rostedt (VMware) 

Instead of using "ALIGN()", use BUILD_BUG_ON() as the structures should
always be divisible by sizeof(long).

Link: 
http://lkml.kernel.org/r/2019052444.gi2...@hirez.programming.kicks-ass.net

Suggested-by: Peter Zijlstra 
Signed-off-by: Steven Rostedt (VMware) 
Signed-off-by: Masami Hiramatsu (Google) 
---
 kernel/trace/fgraph.c |9 ++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index 30edeb6d4aa9..837daf929d2a 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -26,10 +26,9 @@
 #endif
 
 #define FGRAPH_RET_SIZE sizeof(struct ftrace_ret_stack)
-#define FGRAPH_RET_INDEX (ALIGN(FGRAPH_RET_SIZE, sizeof(long)) / sizeof(long))
+#define FGRAPH_RET_INDEX (FGRAPH_RET_SIZE / sizeof(long))
 #define SHADOW_STACK_SIZE (PAGE_SIZE)
-#define SHADOW_STACK_INDEX \
-   (ALIGN(SHADOW_STACK_SIZE, sizeof(long)) / sizeof(long))
+#define SHADOW_STACK_INDEX (SHADOW_STACK_SIZE / sizeof(long))
 /* Leave on a buffer at the end */
 #define SHADOW_STACK_MAX_INDEX (SHADOW_STACK_INDEX - FGRAPH_RET_INDEX)
 
@@ -91,6 +90,8 @@ ftrace_push_return_trace(unsigned long ret, unsigned long 
func,
if (!current->ret_stack)
return -EBUSY;
 
+   BUILD_BUG_ON(SHADOW_STACK_SIZE % sizeof(long));
+
/*
 * We must make sure the ret_stack is tested before we read
 * anything else.
@@ -325,6 +326,8 @@ ftrace_graph_get_ret_stack(struct task_struct *task, int 
idx)
 {
int index = task->curr_ret_stack;
 
+   BUILD_BUG_ON(FGRAPH_RET_SIZE % sizeof(long));
+
index -= FGRAPH_RET_INDEX * (idx + 1);
if (index < 0)
return NULL;




[RFC PATCH v2 04/31] function_graph: Convert ret_stack to a series of longs

2023-11-08 Thread Masami Hiramatsu (Google)
From: Steven Rostedt (VMware) 

In order to make it possible to have multiple callbacks registered with the
function_graph tracer, the retstack needs to be converted from an array of
ftrace_ret_stack structures to an array of longs. This will allow to store
the list of callbacks on the stack for the return side of the functions.

Signed-off-by: Steven Rostedt (VMware) 
Signed-off-by: Masami Hiramatsu (Google) 
---
 include/linux/sched.h |2 -
 kernel/trace/fgraph.c |  124 -
 2 files changed, 71 insertions(+), 55 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 77f01ac385f7..3af00d726847 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1386,7 +1386,7 @@ struct task_struct {
int curr_ret_depth;
 
/* Stack of return addresses for return function tracing: */
-   struct ftrace_ret_stack *ret_stack;
+   unsigned long   *ret_stack;
 
/* Timestamp for last schedule: */
unsigned long long  ftrace_timestamp;
diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index c83c005e654e..30edeb6d4aa9 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -25,6 +25,18 @@
 #define ASSIGN_OPS_HASH(opsname, val)
 #endif
 
+#define FGRAPH_RET_SIZE sizeof(struct ftrace_ret_stack)
+#define FGRAPH_RET_INDEX (ALIGN(FGRAPH_RET_SIZE, sizeof(long)) / sizeof(long))
+#define SHADOW_STACK_SIZE (PAGE_SIZE)
+#define SHADOW_STACK_INDEX \
+   (ALIGN(SHADOW_STACK_SIZE, sizeof(long)) / sizeof(long))
+/* Leave on a buffer at the end */
+#define SHADOW_STACK_MAX_INDEX (SHADOW_STACK_INDEX - FGRAPH_RET_INDEX)
+
+#define RET_STACK(t, index) ((struct ftrace_ret_stack 
*)(&(t)->ret_stack[index]))
+#define RET_STACK_INC(c) ({ c += FGRAPH_RET_INDEX; })
+#define RET_STACK_DEC(c) ({ c -= FGRAPH_RET_INDEX; })
+
 DEFINE_STATIC_KEY_FALSE(kill_ftrace_graph);
 int ftrace_graph_active;
 
@@ -69,6 +81,7 @@ static int
 ftrace_push_return_trace(unsigned long ret, unsigned long func,
 unsigned long frame_pointer, unsigned long *retp)
 {
+   struct ftrace_ret_stack *ret_stack;
unsigned long long calltime;
int index;
 
@@ -85,23 +98,25 @@ ftrace_push_return_trace(unsigned long ret, unsigned long 
func,
smp_rmb();
 
/* The return trace stack is full */
-   if (current->curr_ret_stack == FTRACE_RETFUNC_DEPTH - 1) {
+   if (current->curr_ret_stack >= SHADOW_STACK_MAX_INDEX) {
atomic_inc(>trace_overrun);
return -EBUSY;
}
 
calltime = trace_clock_local();
 
-   index = ++current->curr_ret_stack;
+   index = current->curr_ret_stack;
+   RET_STACK_INC(current->curr_ret_stack);
+   ret_stack = RET_STACK(current, index);
barrier();
-   current->ret_stack[index].ret = ret;
-   current->ret_stack[index].func = func;
-   current->ret_stack[index].calltime = calltime;
+   ret_stack->ret = ret;
+   ret_stack->func = func;
+   ret_stack->calltime = calltime;
 #ifdef HAVE_FUNCTION_GRAPH_FP_TEST
-   current->ret_stack[index].fp = frame_pointer;
+   ret_stack->fp = frame_pointer;
 #endif
 #ifdef HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
-   current->ret_stack[index].retp = retp;
+   ret_stack->retp = retp;
 #endif
return 0;
 }
@@ -148,7 +163,7 @@ int function_graph_enter(unsigned long ret, unsigned long 
func,
 
return 0;
  out_ret:
-   current->curr_ret_stack--;
+   RET_STACK_DEC(current->curr_ret_stack);
  out:
current->curr_ret_depth--;
return -EBUSY;
@@ -159,11 +174,13 @@ static void
 ftrace_pop_return_trace(struct ftrace_graph_ret *trace, unsigned long *ret,
unsigned long frame_pointer)
 {
+   struct ftrace_ret_stack *ret_stack;
int index;
 
index = current->curr_ret_stack;
+   RET_STACK_DEC(index);
 
-   if (unlikely(index < 0 || index >= FTRACE_RETFUNC_DEPTH)) {
+   if (unlikely(index < 0 || index > SHADOW_STACK_MAX_INDEX)) {
ftrace_graph_stop();
WARN_ON(1);
/* Might as well panic, otherwise we have no where to go */
@@ -171,6 +188,7 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, 
unsigned long *ret,
return;
}
 
+   ret_stack = RET_STACK(current, index);
 #ifdef HAVE_FUNCTION_GRAPH_FP_TEST
/*
 * The arch may choose to record the frame pointer used
@@ -186,22 +204,22 @@ ftrace_pop_return_trace(struct ftrace_graph_ret *trace, 
unsigned long *ret,
 * Note, -mfentry does not use frame pointers, and this test
 *  is not needed if CC_USING_FENTRY is set.
 */
-   if (unlikely(current->ret_stack[index].fp != frame_pointer)) {
+   if (unlikely(ret_stack->fp != frame_pointer)) {
ftrace_graph_stop();
WARN(1, "Bad frame 

[RFC PATCH v2 03/31] seq_buf: Export seq_buf_puts()

2023-11-08 Thread Masami Hiramatsu (Google)
From: Christophe JAILLET 

Mark seq_buf_puts() which is part of the seq_buf API to be exported to
kernel loadable GPL modules.

Link: 
https://lkml.kernel.org/r/b9e3737f66ec2450221b492048ce0d9c65c84953.1698861216.git.christophe.jail...@wanadoo.fr

Signed-off-by: Christophe JAILLET 
Signed-off-by: Steven Rostedt (Google) 
Signed-off-by: Masami Hiramatsu (Google) 
---
 lib/seq_buf.c |1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/seq_buf.c b/lib/seq_buf.c
index 45c450f423fa..46a1b00c3815 100644
--- a/lib/seq_buf.c
+++ b/lib/seq_buf.c
@@ -189,6 +189,7 @@ int seq_buf_puts(struct seq_buf *s, const char *str)
seq_buf_set_overflow(s);
return -1;
 }
+EXPORT_SYMBOL_GPL(seq_buf_puts);
 
 /**
  * seq_buf_putc - sequence printing of simple character




[RFC PATCH v2 02/31] x86: tracing: Add ftrace_regs definition in the header

2023-11-08 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Add ftrace_regs definition for x86_64 in the ftrace header to
clarify what register will be accessible from ftrace_regs.

Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v2:
  - Newly added.
---
 arch/x86/include/asm/ftrace.h |6 ++
 1 file changed, 6 insertions(+)

diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h
index 897cf02c20b1..fc60974a1d89 100644
--- a/arch/x86/include/asm/ftrace.h
+++ b/arch/x86/include/asm/ftrace.h
@@ -36,6 +36,12 @@ static inline unsigned long ftrace_call_adjust(unsigned long 
addr)
 
 #ifdef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS
 struct ftrace_regs {
+   /*
+* On the x86_64, the ftrace_regs saves;
+* rax, rcx, rdx, rdi, rsi, r8, r9, rbp and rsp.
+* Also orig_ax is used for passing direct trampoline address.
+* x86_32 doesn't support ftrace_regs.
+*/
struct pt_regs  regs;
 };
 




[RFC PATCH v2 01/31] tracing: Add a comment about ftrace_regs definition

2023-11-08 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

To clarify what will be expected on ftrace_regs, add a comment to the
architecture independent definition of the ftrace_regs.

Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v2:
  - newly added.
---
 include/linux/ftrace.h |   25 +
 1 file changed, 25 insertions(+)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index e8921871ef9a..b174af91d8be 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -118,6 +118,31 @@ extern int ftrace_enabled;
 
 #ifndef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS
 
+/**
+ * ftrace_regs - ftrace partial/optimal register set
+ *
+ * ftrace_regs represents a group of registers which is used at the
+ * function entry and exit. There are three types of registers.
+ *
+ * - Registers for passing the parameters to callee, including the stack
+ *   pointer. (e.g. rcx, rdx, rdi, rsi, r8, r9 and rsp on x86_64)
+ * - Registers for passing the return values to caller.
+ *   (e.g. rax and rdx on x86_64)
+ * - Registers for hooking the function return including the frame pointer
+ *   (the frame pointer is architecture/config dependent)
+ *   (e.g. rbp and rsp for x86_64)
+ *
+ * Also, architecture dependent fields can be used for internal process.
+ * (e.g. orig_ax on x86_64)
+ *
+ * On the function entry, those registers will be restored except for
+ * the stack pointer, so that user can change the function parameters.
+ * On the function exit, onlu registers which is used for return values
+ * are restored.
+ *
+ * NOTE: user *must not* access regs directly, only do it via APIs, because
+ * the member can be changed according to the architecture.
+ */
 struct ftrace_regs {
struct pt_regs  regs;
 };




[RFC PATCH v2 00/31] tracing: fprobe: function_graph: Multi-function graph and fprobe on fgraph

2023-11-08 Thread Masami Hiramatsu (Google)
Hi,

Here is the 2nd version of the series to re-implement the fprobe on
function-graph tracer. The previous version is;

https://lore.kernel.org/all/169920038849.482486.1579638721992967.stgit@devnote2/

In this version I merged the fixes to appropriate patches and fix
some typos/bugs, and fix some issues.
I also add some commentary patches for ftrace_regs.

This series does major 2 changes, enable multiple function-graphs on
the ftrace (e.g. allow function-graph on sub instances) and rewrite the
fprobe on this function-graph.

The former changes had been sent from Steven Rostedt 4 years ago (*),
which allows users to set different setting function-graph tracer (and
other tracers based on function-graph) in each trace-instances at the
same time.

(*) https://lore.kernel.org/all/20190525031633.811342...@goodmis.org/

The purpose of latter change are;

 1) Remove dependency of the rethook from fprobe so that we can reduce
   the return hook code and shadow stack.

 2) Make 'ftrace_regs' the common trace interface for the function
   boundary.

1) Currently we have 2(or 3) different function return hook codes,
 the function-graph tracer and rethook (and legacy kretprobe).
 But since this  is redundant and needs double maintenance cost,
 I would like to unify those. From the user's viewpoint, function-
 graph tracer is very useful to grasp the execution path. For this
 purpose, it is hard to use the rethook in the function-graph
 tracer, but the opposite is possible. (Strictly speaking, kretprobe
 can not use it because it requires 'pt_regs' for historical reasons.)

2) Now the fprobe provides the 'pt_regs' for its handler, but that is
 wrong for the function entry and exit. Moreover, depending on the
 architecture, there is no way to accurately reproduce 'pt_regs'
 outside of interrupt or exception handlers. This means fprobe should
 not use 'pt_regs' because it does not use such exceptions.
 (Conversely, kprobe should use 'pt_regs' because it is an abstract
  interface of the software breakpoint exception.)

This series changes fprobe to use function-graph tracer for tracing
function entry and exit, instead of mixture of ftrace and rethook.
Unlike the rethook which is a per-task list of system-wide allocated
nodes, the function graph's ret_stack is a per-task shadow stack.
Thus it does not need to set 'nr_maxactive' (which is the number of
pre-allocated nodes).
Also the handlers will get the 'ftrace_regs' instead of 'pt_regs'.
Since eBPF mulit_kprobe/multi_kretprobe events still use 'pt_regs' as
their register interface, this changes it to convert 'ftrace_regs' to
'pt_regs'. Of course this conversion makes an incomplete 'pt_regs',
so users must access only registers for function parameters or
return value. 

Design:
Instead of using ftrace's function entry hook directly, the new fprobe
is built on top of the function-graph's entry and return callbacks
with 'ftrace_regs'.

Since the fprobe requires access to 'ftrace_regs', the architecture
must support CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS, which enables to
call function-graph entry callback with 'ftrace_regs', and also
CONFIG_HAVE_FUNCTION_GRAPH_FREGS, which passes the ftrace_regs to
return_to_handler.

All fprobes share a single function-graph ops (means shares a common
ftrace filter) similar to the kprobe-on-ftrace. This needs another
layer to find corresponding fprobe in the common function-graph
callbacks, but has much better scalability, since the number of
registered function-graph ops is limited.

In the entry callback, the fprobe runs its entry_handler and saves the
address of 'fprobe' on the function-graph's shadow stack as data. The
return callback decodes the data to get the 'fprobe' address, and runs
the exit_handler.

The fprobe introduces two hash-tables, one is for entry callback which
searches fprobes related to the given function address passed by entry
callback. The other is for a return callback which checks if the given
'fprobe' data structure pointer is still valid. Note that it is
possible to unregister fprobe before the return callback runs. Thus
the address validation must be done before using it in the return
callback.

Series:
- Patch [1/31] and [2/31] are adding a comment for ftrace_regs.
- Patch [3/31] to [18/31] are the multiple function-graph support.
- Patch [19/31] and [21/31] adds new function-graph callbacks with
  ftrace_regs and x86-64 implementation.
- Patch [22/31] to [25/31] are preparation (adding util functions) of
  the new fprobe and its user.
- Patch [26/31] to [30/31] rewrites fprobes and updates its users.
- Patch [31/31] is a documentation update.

This series can be applied against the probes-fixes-v6.6-rc7 on linux-trace 
tree.

This series can also be found below branch.

https://git.kernel.org/pub/scm/linux/kernel/git/mhiramat/linux.git/log/?h=topic/fprobe-on-fgraph

Thank you,

---

Christophe JAILLET (1):
  seq_buf: Export seq_buf_puts()

Masami Hiramatsu (Google) (15):
  tracing: Add a comment 

[PATCH v2] tracing: fprobe-event: Fix to check tracepoint event and return

2023-11-08 Thread Masami Hiramatsu (Google)
From: Masami Hiramatsu (Google) 

Fix to check the tracepoint event is not valid with $retval.
The commit 08c9306fc2e3 ("tracing/fprobe-event: Assume fprobe is
a return event by $retval") introduced automatic return probe
conversion with $retval. But since tracepoint event does not
support return probe, $retval is not acceptable.

Without this fix, ftracetest, tprobe_syntax_errors.tc fails;

[22] Tracepoint probe event parser error log check  [FAIL]
 
 # tail 22-tprobe_syntax_errors.tc-log.mRKroL
 + ftrace_errlog_check trace_fprobe t kfree ^$retval dynamic_events
 + printf %s t kfree
 + wc -c
 + pos=8
 + printf %s t kfree ^$retval
 + tr -d ^
 + command=t kfree $retval
 + echo Test command: t kfree $retval
 Test command: t kfree $retval
 + echo
 

So 't kfree $retval' should fail (tracepoint doesn't support
return probe) but passed it.

Fixes: 08c9306fc2e3 ("tracing/fprobe-event: Assume fprobe is a return event by 
$retval")
Cc: sta...@vger.kernel.org
Signed-off-by: Masami Hiramatsu (Google) 
---
 Changes in v2:
   - update the patch description to show what test fails.
---
 kernel/trace/trace_fprobe.c |9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c
index 8bfe23af9c73..7d2ddbcfa377 100644
--- a/kernel/trace/trace_fprobe.c
+++ b/kernel/trace/trace_fprobe.c
@@ -927,11 +927,12 @@ static int parse_symbol_and_return(int argc, const char 
*argv[],
for (i = 2; i < argc; i++) {
tmp = strstr(argv[i], "$retval");
if (tmp && !isalnum(tmp[7]) && tmp[7] != '_') {
+   if (is_tracepoint) {
+   trace_probe_log_set_index(i);
+   trace_probe_log_err(tmp - argv[i], 
RETVAL_ON_PROBE);
+   return -EINVAL;
+   }
*is_return = true;
-   /*
-* NOTE: Don't check is_tracepoint here, because it will
-* be checked when the argument is parsed.
-*/
break;
}
}




Re: [PATCH] tracing: fprobe-event: Fix to check tracepoint event and return

2023-11-08 Thread Google
On Wed, 8 Nov 2023 20:01:50 +0900
Masami Hiramatsu (Google)  wrote:

> On Tue, 7 Nov 2023 09:40:45 +0900
> Masami Hiramatsu (Google)  wrote:
> 
> > On Mon, 6 Nov 2023 17:28:11 -0500
> > Steven Rostedt  wrote:
> > 
> > > On Sat,  4 Nov 2023 01:05:34 +0900
> > > "Masami Hiramatsu (Google)"  wrote:
> > > 
> > > > From: Masami Hiramatsu (Google) 
> > > > 
> > > > Fix to check the tracepoint event is not valid with $retval.
> > > > The commit 08c9306fc2e3 ("tracing/fprobe-event: Assume fprobe is
> > > > a return event by $retval") introduced automatic return probe
> > > > conversion with $retval. But since tracepoint event does not
> > > > support return probe, $retval is not acceptable.
> > > 
> > > Can you add the command that causes this to fail.
> > 
> > Actually this doesn't command fail but it expected to fail.
> > 
> > $ echo "t kfree $retval" >> dynamic_events
> 
> Wait, with some configuration, this doesn't cause a problem.
> Let me try to reproduce the issue...

OK, I confirmed that probes/fixes branch fails without this fix.

[22] Tracepoint probe event parser error log check  [FAIL]

Let me update the description.

Thank you,

> 
> Thank you,
> 
> > 
> > This should be rejected because 't' (tracepoint) should not be
> > a return probe. Without this fix, this is accepted.
> > (But it traces a random return value because the tracepoint function
> > will return 'void'.)
> > 
> > Thank you,
> > 
> > > 
> > > -- Steve
> > > 
> > > > 
> > > > Fixes: 08c9306fc2e3 ("tracing/fprobe-event: Assume fprobe is a return 
> > > > event by $retval")
> > > > Cc: sta...@vger.kernel.org
> > > > Signed-off-by: Masami Hiramatsu (Google) 
> > > > ---
> > > >  kernel/trace/trace_fprobe.c |9 +
> > > >  1 file changed, 5 insertions(+), 4 deletions(-)
> > > > 
> > > > diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c
> > > > index 8bfe23af9c73..7d2ddbcfa377 100644
> > > > --- a/kernel/trace/trace_fprobe.c
> > > > +++ b/kernel/trace/trace_fprobe.c
> > > > @@ -927,11 +927,12 @@ static int parse_symbol_and_return(int argc, 
> > > > const char *argv[],
> > > > for (i = 2; i < argc; i++) {
> > > > tmp = strstr(argv[i], "$retval");
> > > > if (tmp && !isalnum(tmp[7]) && tmp[7] != '_') {
> > > > +   if (is_tracepoint) {
> > > > +   trace_probe_log_set_index(i);
> > > > +   trace_probe_log_err(tmp - argv[i], 
> > > > RETVAL_ON_PROBE);
> > > > +   return -EINVAL;
> > > > +   }
> > > > *is_return = true;
> > > > -   /*
> > > > -* NOTE: Don't check is_tracepoint here, 
> > > > because it will
> > > > -* be checked when the argument is parsed.
> > > > -*/
> > > > break;
> > > > }
> > > > }
> > > 
> > 
> > 
> > -- 
> > Masami Hiramatsu (Google) 
> 
> 
> -- 
> Masami Hiramatsu (Google) 


-- 
Masami Hiramatsu (Google) 



Re: [PATCH] tracing: fprobe-event: Fix to check tracepoint event and return

2023-11-08 Thread Google
On Tue, 7 Nov 2023 09:40:45 +0900
Masami Hiramatsu (Google)  wrote:

> On Mon, 6 Nov 2023 17:28:11 -0500
> Steven Rostedt  wrote:
> 
> > On Sat,  4 Nov 2023 01:05:34 +0900
> > "Masami Hiramatsu (Google)"  wrote:
> > 
> > > From: Masami Hiramatsu (Google) 
> > > 
> > > Fix to check the tracepoint event is not valid with $retval.
> > > The commit 08c9306fc2e3 ("tracing/fprobe-event: Assume fprobe is
> > > a return event by $retval") introduced automatic return probe
> > > conversion with $retval. But since tracepoint event does not
> > > support return probe, $retval is not acceptable.
> > 
> > Can you add the command that causes this to fail.
> 
> Actually this doesn't command fail but it expected to fail.
> 
> $ echo "t kfree $retval" >> dynamic_events

Wait, with some configuration, this doesn't cause a problem.
Let me try to reproduce the issue...

Thank you,

> 
> This should be rejected because 't' (tracepoint) should not be
> a return probe. Without this fix, this is accepted.
> (But it traces a random return value because the tracepoint function
> will return 'void'.)
> 
> Thank you,
> 
> > 
> > -- Steve
> > 
> > > 
> > > Fixes: 08c9306fc2e3 ("tracing/fprobe-event: Assume fprobe is a return 
> > > event by $retval")
> > > Cc: sta...@vger.kernel.org
> > > Signed-off-by: Masami Hiramatsu (Google) 
> > > ---
> > >  kernel/trace/trace_fprobe.c |9 +
> > >  1 file changed, 5 insertions(+), 4 deletions(-)
> > > 
> > > diff --git a/kernel/trace/trace_fprobe.c b/kernel/trace/trace_fprobe.c
> > > index 8bfe23af9c73..7d2ddbcfa377 100644
> > > --- a/kernel/trace/trace_fprobe.c
> > > +++ b/kernel/trace/trace_fprobe.c
> > > @@ -927,11 +927,12 @@ static int parse_symbol_and_return(int argc, const 
> > > char *argv[],
> > >   for (i = 2; i < argc; i++) {
> > >   tmp = strstr(argv[i], "$retval");
> > >   if (tmp && !isalnum(tmp[7]) && tmp[7] != '_') {
> > > + if (is_tracepoint) {
> > > + trace_probe_log_set_index(i);
> > > + trace_probe_log_err(tmp - argv[i], 
> > > RETVAL_ON_PROBE);
> > > + return -EINVAL;
> > > + }
> > >   *is_return = true;
> > > - /*
> > > -  * NOTE: Don't check is_tracepoint here, because it will
> > > -  * be checked when the argument is parsed.
> > > -  */
> > >   break;
> > >   }
> > >   }
> > 
> 
> 
> -- 
> Masami Hiramatsu (Google) 


-- 
Masami Hiramatsu (Google) 



Re: [PATCH] fs : Fix warning using plain integer as NULL

2023-11-08 Thread Abhinav Singh

On 11/8/23 16:17, Abhinav Singh wrote:

Sparse static analysis tools generate a warning with this message
"Using plain integer as NULL pointer". In this case this warning is
being shown because we are trying to initialize  pointer to NULL using
integer value 0.

Signed-off-by: Abhinav Singh 
Reviewed-by: Jan Kara 
---
  fs/dax.c   | 2 +-
  fs/direct-io.c | 2 +-
  2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 3380b43cb6bb..423fc1607dfa 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1128,7 +1128,7 @@ static int dax_iomap_copy_around(loff_t pos, uint64_t 
length, size_t align_size,
/* zero the edges if srcmap is a HOLE or IOMAP_UNWRITTEN */
bool zero_edge = srcmap->flags & IOMAP_F_SHARED ||
 srcmap->type == IOMAP_UNWRITTEN;
-   void *saddr = 0;
+   void *saddr = NULL;
int ret = 0;
  
  	if (!zero_edge) {

diff --git a/fs/direct-io.c b/fs/direct-io.c
index 20533266ade6..60456263a338 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1114,7 +1114,7 @@ ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct 
inode *inode,
loff_t offset = iocb->ki_pos;
const loff_t end = offset + count;
struct dio *dio;
-   struct dio_submit sdio = { 0, };
+   struct dio_submit sdio = { NULL, };
struct buffer_head map_bh = { 0, };
struct blk_plug plug;
unsigned long align = offset | iov_iter_alignment(iter);
Thanks a lot  maintainers for looking into this patch and accepting this 
patch.


BR
Abhinav



[PATCH] fs : Fix warning using plain integer as NULL

2023-11-08 Thread Abhinav Singh
Sparse static analysis tools generate a warning with this message
"Using plain integer as NULL pointer". In this case this warning is
being shown because we are trying to initialize  pointer to NULL using
integer value 0.

Signed-off-by: Abhinav Singh 
Reviewed-by: Jan Kara 
---
 fs/dax.c   | 2 +-
 fs/direct-io.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 3380b43cb6bb..423fc1607dfa 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1128,7 +1128,7 @@ static int dax_iomap_copy_around(loff_t pos, uint64_t 
length, size_t align_size,
/* zero the edges if srcmap is a HOLE or IOMAP_UNWRITTEN */
bool zero_edge = srcmap->flags & IOMAP_F_SHARED ||
 srcmap->type == IOMAP_UNWRITTEN;
-   void *saddr = 0;
+   void *saddr = NULL;
int ret = 0;
 
if (!zero_edge) {
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 20533266ade6..60456263a338 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1114,7 +1114,7 @@ ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct 
inode *inode,
loff_t offset = iocb->ki_pos;
const loff_t end = offset + count;
struct dio *dio;
-   struct dio_submit sdio = { 0, };
+   struct dio_submit sdio = { NULL, };
struct buffer_head map_bh = { 0, };
struct blk_plug plug;
unsigned long align = offset | iov_iter_alignment(iter);
-- 
2.39.2




Re: [PATCH v5 09/11] remoteproc: qcom: Add Hexagon based multipd rproc driver

2023-11-08 Thread Manikanta Mylavarapu




On 8/2/2023 7:36 PM, Manikanta Mylavarapu wrote:

It adds support to bring up remoteproc's on multipd model.
Pd means protection domain. It's similar to process in Linux.
Here QDSP6 processor runs each wifi radio functionality on a
separate process. One process can't access other process
resources, so this is termed as PD i.e protection domain.

Here we have two pd's called root and user pd. We can correlate
Root pd as root and user pd as user in linux. Root pd has more
privileges than user pd. Root will provide services to user pd.

 From remoteproc driver perspective, root pd corresponds to QDSP6
processor bring up and user pd corresponds to Wifi radio (WCSS)
bring up.

Here WCSS(user) PD is dependent on Q6(root) PD, so first
q6 pd should be up before wcss pd. After wcss pd goes down,
q6 pd should be turned off.



Thanks Krzysztof, Kalle, Rob, Bjorn for all your reviews.
I have addressed them in this series.
Can you please suggest and help me to take this forward ?

Thanks & Regards,
Manikanta.


Re: [PATCH] fs : Fix warning using plain integer as NULL

2023-11-08 Thread Jan Kara
On Wed 08-11-23 10:15:50, Abhinav Singh wrote:
> Sparse static analysis tools generate a warning with this message
> "Using plain integer as NULL pointer". In this case this warning is
> being shown because we are trying to initialize  pointer to NULL using
> integer value 0.
> 
> Signed-off-by: Abhinav Singh 

Nice cleanup. Feel free to add:

Reviewed-by: Jan Kara 
Honza

> ---
>  fs/dax.c   | 2 +-
>  fs/direct-io.c | 2 +-
>  2 files changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/dax.c b/fs/dax.c
> index 3380b43cb6bb..423fc1607dfa 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -1128,7 +1128,7 @@ static int dax_iomap_copy_around(loff_t pos, uint64_t 
> length, size_t align_size,
>   /* zero the edges if srcmap is a HOLE or IOMAP_UNWRITTEN */
>   bool zero_edge = srcmap->flags & IOMAP_F_SHARED ||
>srcmap->type == IOMAP_UNWRITTEN;
> - void *saddr = 0;
> + void *saddr = NULL;
>   int ret = 0;
>  
>   if (!zero_edge) {
> diff --git a/fs/direct-io.c b/fs/direct-io.c
> index 20533266ade6..60456263a338 100644
> --- a/fs/direct-io.c
> +++ b/fs/direct-io.c
> @@ -1114,7 +1114,7 @@ ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct 
> inode *inode,
>   loff_t offset = iocb->ki_pos;
>   const loff_t end = offset + count;
>   struct dio *dio;
> - struct dio_submit sdio = { 0, };
> + struct dio_submit sdio = { NULL, };
>   struct buffer_head map_bh = { 0, };
>   struct blk_plug plug;
>   unsigned long align = offset | iov_iter_alignment(iter);
> -- 
> 2.39.2
> 
-- 
Jan Kara 
SUSE Labs, CR



Re: [PATCH] fs : Fix warning using plain integer as NULL

2023-11-08 Thread Christian Brauner
On Wed, 08 Nov 2023 10:15:50 +0530, Abhinav Singh wrote:
> Sparse static analysis tools generate a warning with this message
> "Using plain integer as NULL pointer". In this case this warning is
> being shown because we are trying to initialize  pointer to NULL using
> integer value 0.
> 
> 

Applied to the vfs.misc branch of the vfs/vfs.git tree.
Patches in the vfs.misc branch should appear in linux-next soon.

Please report any outstanding bugs that were missed during review in a
new review to the original patch series allowing us to drop it.

It's encouraged to provide Acked-bys and Reviewed-bys even though the
patch has now been applied. If possible patch trailers will be updated.

Note that commit hashes shown below are subject to change due to rebase,
trailer updates or similar. If in doubt, please check the listed branch.

tree:   https://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs.git
branch: vfs.misc

[1/1] fs : Fix warning using plain integer as NULL
  https://git.kernel.org/vfs/vfs/c/372bfbd2ea43