Hi Naveen,

On Thu, 17 Feb 2022 17:06:23 +0530
"Naveen N. Rao" <naveen.n....@linux.vnet.ibm.com> wrote:

> On some architectures, enabling function tracing results in multiple
> instructions being emitted at function entry. As an example, on
> powerpc64 with -mprofile-kernel, two instructions are emitted at
> function entry:
>       mflr    r0
>       bl      _mcount
> 
> It is desirable to nop out both these instructions when ftrace is not
> active. For that purpose, it is essential to mark both these
> instructions as belonging to ftrace so that other kernel subsystems
> (such as kprobes) do not modify these instructions.

Indeed, kprobes must handle this. However, to keep consistency of kprobes
usage with/without CONFIG_FUNCTION_TRACER, I think KPROBES_ON_FTRACE should
handle these instructions are virutal single instruction.
More specifically, it should allow user to put a kprobe on 'mflr r0' address
and the kprobes on 'bl _mcount' should return -EILSEQ. (because it is not an 
instruction boundary.) And the kprobe's ftrace handler temporarily modifies
the instruction pointer to the address of 'mflr'.

Thank you,

> 
> Add support for this by allowing architectures to override
> ftrace_cmp_recs() and to match against address ranges over and above a
> single MCOUNT_INSN_SIZE.
> 
> For powerpc32, we mark the two instructions preceding the call to
> _mcount() as belonging to ftrace.
> 
> For powerpc64, an additional aspect to consider is that functions can
> have a global entry point for setting up the TOC when invoked from other
> modules. If present, global entry point always involves two instructions
> (addis/lis and addi). To handle this, we provide a custom
> ftrace_init_nop() for powerpc64 where we identify functions having a
> global entry point and record this information in the LSB of
> dyn_ftrace->arch.mod. This information is used in ftrace_cmp_recs() to
> reserve instructions from the global entry point.
> 
> Suggested-by: Steven Rostedt <rost...@goodmis.org>
> Signed-off-by: Naveen N. Rao <naveen.n....@linux.vnet.ibm.com>
> ---
>  arch/powerpc/include/asm/ftrace.h  |  15 ++++
>  arch/powerpc/kernel/trace/ftrace.c | 110 ++++++++++++++++++++++++++---
>  kernel/trace/ftrace.c              |   2 +
>  3 files changed, 117 insertions(+), 10 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/ftrace.h 
> b/arch/powerpc/include/asm/ftrace.h
> index debe8c4f706260..8eb3235831633d 100644
> --- a/arch/powerpc/include/asm/ftrace.h
> +++ b/arch/powerpc/include/asm/ftrace.h
> @@ -59,6 +59,21 @@ static inline unsigned long ftrace_call_adjust(unsigned 
> long addr)
>  struct dyn_arch_ftrace {
>       struct module *mod;
>  };
> +
> +struct dyn_ftrace;
> +struct module *ftrace_mod_addr_get(struct dyn_ftrace *rec);
> +void ftrace_mod_addr_set(struct dyn_ftrace *rec, struct module *mod);
> +
> +#ifdef CONFIG_MPROFILE_KERNEL
> +int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
> +#define ftrace_init_nop ftrace_init_nop
> +#endif
> +
> +#if defined(CONFIG_MPROFILE_KERNEL) || defined(CONFIG_PPC32)
> +int ftrace_cmp_recs(const void *a, const void *b);
> +#define ftrace_cmp_recs ftrace_cmp_recs
> +#endif
> +
>  #endif /* __ASSEMBLY__ */
>  
>  #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
> diff --git a/arch/powerpc/kernel/trace/ftrace.c 
> b/arch/powerpc/kernel/trace/ftrace.c
> index 80b6285769f27c..11ce9296ce3cf2 100644
> --- a/arch/powerpc/kernel/trace/ftrace.c
> +++ b/arch/powerpc/kernel/trace/ftrace.c
> @@ -428,21 +428,21 @@ int ftrace_make_nop(struct module *mod,
>        * We should either already have a pointer to the module
>        * or it has been passed in.
>        */
> -     if (!rec->arch.mod) {
> +     if (!ftrace_mod_addr_get(rec)) {
>               if (!mod) {
>                       pr_err("No module loaded addr=%lx\n", addr);
>                       return -EFAULT;
>               }
> -             rec->arch.mod = mod;
> +             ftrace_mod_addr_set(rec, mod);
>       } else if (mod) {
> -             if (mod != rec->arch.mod) {
> +             if (mod != ftrace_mod_addr_get(rec)) {
>                       pr_err("Record mod %p not equal to passed in mod %p\n",
> -                            rec->arch.mod, mod);
> +                            ftrace_mod_addr_get(rec), mod);
>                       return -EINVAL;
>               }
>               /* nothing to do if mod == rec->arch.mod */
>       } else
> -             mod = rec->arch.mod;
> +             mod = ftrace_mod_addr_get(rec);
>  
>       return __ftrace_make_nop(mod, rec, addr);
>  #else
> @@ -451,6 +451,96 @@ int ftrace_make_nop(struct module *mod,
>  #endif /* CONFIG_MODULES */
>  }
>  
> +#define FUNC_MCOUNT_OFFSET_PPC32     8
> +#define FUNC_MCOUNT_OFFSET_PPC64_LEP 4
> +#define FUNC_MCOUNT_OFFSET_PPC64_GEP 12
> +
> +#ifdef CONFIG_MPROFILE_KERNEL
> +struct module *ftrace_mod_addr_get(struct dyn_ftrace *rec)
> +{
> +     return (struct module *)((unsigned long)rec->arch.mod & ~0x1);
> +}
> +
> +void ftrace_mod_addr_set(struct dyn_ftrace *rec, struct module *mod)
> +{
> +     rec->arch.mod = (struct module *)(((unsigned long)rec->arch.mod & 0x1) 
> | (unsigned long)mod);
> +}
> +
> +int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
> +{
> +     unsigned long offset, ip = rec->ip;
> +     ppc_inst_t op1, op2;
> +     int ret;
> +
> +     if (!kallsyms_lookup_size_offset(rec->ip, NULL, &offset) ||
> +         (offset != FUNC_MCOUNT_OFFSET_PPC64_GEP && offset != 
> FUNC_MCOUNT_OFFSET_PPC64_LEP)) {
> +             ip -= FUNC_MCOUNT_OFFSET_PPC64_GEP;
> +             ret = copy_inst_from_kernel_nofault(&op1, (void *)ip);
> +             ret |= copy_inst_from_kernel_nofault(&op2, (void *)(ip + 
> MCOUNT_INSN_SIZE));
> +             if (!ret &&
> +                 ((ppc_inst_val(op1) & 0xffff0000) == PPC_RAW_LIS(_R2, 0) ||
> +                  (ppc_inst_val(op1) & 0xffff0000) == PPC_RAW_ADDIS(_R2, 
> _R12, 0)) &&
> +                 (ppc_inst_val(op2) & 0xffff0000) == PPC_RAW_ADDI(_R2, _R2, 
> 0))
> +                     ftrace_mod_addr_set(rec, (struct module *)1);
> +     } else if (offset == FUNC_MCOUNT_OFFSET_PPC64_GEP) {
> +             ftrace_mod_addr_set(rec, (struct module *)1);
> +     }
> +
> +     return ftrace_make_nop(mod, rec, MCOUNT_ADDR);
> +}
> +#else
> +struct module *ftrace_mod_addr_get(struct dyn_ftrace *rec)
> +{
> +     return rec->arch.mod;
> +}
> +
> +void ftrace_mod_addr_set(struct dyn_ftrace *rec, struct module *mod)
> +{
> +     rec->arch.mod = mod;
> +}
> +#endif /* CONFIG_MPROFILE_KERNEL */
> +
> +#if defined(CONFIG_MPROFILE_KERNEL) || defined(CONFIG_PPC32)
> +int ftrace_location_get_offset(const struct dyn_ftrace *rec)
> +{
> +     if (IS_ENABLED(CONFIG_MPROFILE_KERNEL))
> +             /*
> +              * On ppc64le with -mprofile-kernel, function entry can have:
> +              *   addis r2, r12, M
> +              *   addi  r2, r2, N
> +              *   mflr  r0
> +              *   bl    _mcount
> +              *
> +              * The first two instructions are for TOC setup and represent 
> the global entry
> +              * point for cross-module calls, and may be missing if the 
> function is never called
> +              * from other modules.
> +              */
> +             return ((unsigned long)rec->arch.mod & 0x1) ? 
> FUNC_MCOUNT_OFFSET_PPC64_GEP :
> +                                                           
> FUNC_MCOUNT_OFFSET_PPC64_LEP;
> +     else
> +             /*
> +              * On ppc32, function entry always has:
> +              *   mflr r0
> +              *   stw  r0, 4(r1)
> +              *   bl   _mcount
> +              */
> +             return FUNC_MCOUNT_OFFSET_PPC32;
> +}
> +
> +int ftrace_cmp_recs(const void *a, const void *b)
> +{
> +     const struct dyn_ftrace *key = a;
> +     const struct dyn_ftrace *rec = b;
> +     int offset = ftrace_location_get_offset(rec);
> +
> +     if (key->flags < rec->ip - offset)
> +             return -1;
> +     if (key->ip >= rec->ip + MCOUNT_INSN_SIZE)
> +             return 1;
> +     return 0;
> +}
> +#endif
> +
>  #ifdef CONFIG_MODULES
>  #ifdef CONFIG_PPC64
>  /*
> @@ -494,7 +584,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long 
> addr)
>       ppc_inst_t instr;
>       void *ip = (void *)rec->ip;
>       unsigned long entry, ptr, tramp;
> -     struct module *mod = rec->arch.mod;
> +     struct module *mod = ftrace_mod_addr_get(rec);
>  
>       /* read where this goes */
>       if (copy_inst_from_kernel_nofault(op, ip))
> @@ -561,7 +651,7 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long 
> addr)
>       int err;
>       ppc_inst_t op;
>       u32 *ip = (u32 *)rec->ip;
> -     struct module *mod = rec->arch.mod;
> +     struct module *mod = ftrace_mod_addr_get(rec);
>       unsigned long tramp;
>  
>       /* read where this goes */
> @@ -678,7 +768,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned 
> long addr)
>        * Being that we are converting from nop, it had better
>        * already have a module defined.
>        */
> -     if (!rec->arch.mod) {
> +     if (!ftrace_mod_addr_get(rec)) {
>               pr_err("No module loaded\n");
>               return -EINVAL;
>       }
> @@ -699,7 +789,7 @@ __ftrace_modify_call(struct dyn_ftrace *rec, unsigned 
> long old_addr,
>       ppc_inst_t op;
>       unsigned long ip = rec->ip;
>       unsigned long entry, ptr, tramp;
> -     struct module *mod = rec->arch.mod;
> +     struct module *mod = ftrace_mod_addr_get(rec);
>  
>       /* If we never set up ftrace trampolines, then bail */
>       if (!mod->arch.tramp || !mod->arch.tramp_regs) {
> @@ -814,7 +904,7 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned 
> long old_addr,
>       /*
>        * Out of range jumps are called from modules.
>        */
> -     if (!rec->arch.mod) {
> +     if (!ftrace_mod_addr_get(rec)) {
>               pr_err("No module loaded\n");
>               return -EINVAL;
>       }
> diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
> index f9feb197b2daaf..68f20cf34b0c47 100644
> --- a/kernel/trace/ftrace.c
> +++ b/kernel/trace/ftrace.c
> @@ -1510,6 +1510,7 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long 
> ip, void *regs)
>       }
>  
>  
> +#ifndef ftrace_cmp_recs
>  static int ftrace_cmp_recs(const void *a, const void *b)
>  {
>       const struct dyn_ftrace *key = a;
> @@ -1521,6 +1522,7 @@ static int ftrace_cmp_recs(const void *a, const void *b)
>               return 1;
>       return 0;
>  }
> +#endif
>  
>  static struct dyn_ftrace *lookup_rec(unsigned long start, unsigned long end)
>  {
> -- 
> 2.35.1
> 


-- 
Masami Hiramatsu <mhira...@kernel.org>

Reply via email to