from:"Naveen N. Rao"

[RFC PATCH v4 01/17] powerpc/trace: Account for -fpatchable-function-entry support by toolchain

2024-07-14 Thread Naveen N Rao

So far, we have relied on the fact that gcc supports both
-mprofile-kernel, as well as -fpatchable-function-entry, and clang
supports neither. Our Makefile only checks for CONFIG_MPROFILE_KERNEL to
decide which files to build. Clang has a feature request out [*] to
implement -fpatchable-function-entry, and is unlikely to support
-mprofile-kernel.

Update our Makefile checks so that we pick up the correct files to build
once clang picks up support for -fpatchable-function-entry.

[*] https://github.com/llvm/llvm-project/issues/57031

Signed-off-by: Naveen N Rao 
---
 arch/powerpc/kernel/trace/Makefile | 11 +++
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kernel/trace/Makefile 
b/arch/powerpc/kernel/trace/Makefile
index 125f4ca588b9..d6c3885453bd 100644
--- a/arch/powerpc/kernel/trace/Makefile
+++ b/arch/powerpc/kernel/trace/Makefile
@@ -9,12 +9,15 @@ CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE)
 CFLAGS_REMOVE_ftrace_64_pg.o = $(CC_FLAGS_FTRACE)
 endif
 
-obj32-$(CONFIG_FUNCTION_TRACER)+= ftrace.o ftrace_entry.o
-ifdef CONFIG_MPROFILE_KERNEL
-obj64-$(CONFIG_FUNCTION_TRACER)+= ftrace.o ftrace_entry.o
+ifdef CONFIG_FUNCTION_TRACER
+obj32-y+= ftrace.o ftrace_entry.o
+ifeq ($(CONFIG_MPROFILE_KERNEL)$(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY),)
+obj64-y+= ftrace_64_pg.o 
ftrace_64_pg_entry.o
 else
-obj64-$(CONFIG_FUNCTION_TRACER)+= ftrace_64_pg.o 
ftrace_64_pg_entry.o
+obj64-y+= ftrace.o ftrace_entry.o
 endif
+endif
+
 obj-$(CONFIG_TRACING)  += trace_clock.o
 
 obj-$(CONFIG_PPC64)+= $(obj64-y)
-- 
2.45.2

[RFC PATCH v4 10/17] powerpc/ftrace: Add a postlink script to validate function tracer

2024-07-14 Thread Naveen N Rao

Function tracer on powerpc can only work with vmlinux having a .text
size of up to ~64MB due to powerpc branch instruction having a limited
relative branch range of 32MB. Today, this is only detected on kernel
boot when ftrace is init'ed. Add a post-link script to check the size of
.text so that we can detect this at build time, and break the build if
necessary.

We add a dependency on !COMPILE_TEST for CONFIG_HAVE_FUNCTION_TRACER so
that allyesconfig and other test builds can continue to work without
enabling ftrace.

Signed-off-by: Naveen N Rao 
---
 arch/powerpc/Kconfig   |  2 +-
 arch/powerpc/Makefile.postlink |  8 ++
 arch/powerpc/tools/ftrace_check.sh | 45 ++
 3 files changed, 54 insertions(+), 1 deletion(-)
 create mode 100755 arch/powerpc/tools/ftrace_check.sh

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index f8891fbe7c16..68f0e7a5576f 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -244,7 +244,7 @@ config PPC
select HAVE_FUNCTION_DESCRIPTORSif PPC64_ELF_ABI_V1
select HAVE_FUNCTION_ERROR_INJECTION
select HAVE_FUNCTION_GRAPH_TRACER
-   select HAVE_FUNCTION_TRACER if PPC64 || (PPC32 && CC_IS_GCC)
+   select HAVE_FUNCTION_TRACER if !COMPILE_TEST && (PPC64 || 
(PPC32 && CC_IS_GCC))
select HAVE_GCC_PLUGINS if GCC_VERSION >= 50200   # 
plugin support on gcc <= 5.1 is buggy on PPC
select HAVE_GENERIC_VDSO
select HAVE_HARDLOCKUP_DETECTOR_ARCHif PPC_BOOK3S_64 && SMP
diff --git a/arch/powerpc/Makefile.postlink b/arch/powerpc/Makefile.postlink
index ae5a4256b03d..bb601be36173 100644
--- a/arch/powerpc/Makefile.postlink
+++ b/arch/powerpc/Makefile.postlink
@@ -24,6 +24,9 @@ else
$(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/relocs_check.sh 
"$(OBJDUMP)" "$(NM)" "$@"
 endif
 
+quiet_cmd_ftrace_check = CHKFTRC $@
+  cmd_ftrace_check = $(CONFIG_SHELL) 
$(srctree)/arch/powerpc/tools/ftrace_check.sh "$(NM)" "$@"
+
 # `@true` prevents complaint when there is nothing to be done
 
 vmlinux: FORCE
@@ -34,6 +37,11 @@ endif
 ifdef CONFIG_RELOCATABLE
$(call if_changed,relocs_check)
 endif
+ifdef CONFIG_FUNCTION_TRACER
+ifndef CONFIG_PPC64_ELF_ABI_V1
+   $(call cmd,ftrace_check)
+endif
+endif
 
 clean:
rm -f .tmp_symbols.txt
diff --git a/arch/powerpc/tools/ftrace_check.sh 
b/arch/powerpc/tools/ftrace_check.sh
new file mode 100755
index ..33f2fd45e54d
--- /dev/null
+++ b/arch/powerpc/tools/ftrace_check.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-or-later
+#
+# This script checks vmlinux to ensure that all functions can call 
ftrace_caller() either directly,
+# or through the stub, ftrace_tramp_text, at the end of kernel text.
+
+# Error out if any command fails
+set -e
+
+# Allow for verbose output
+if [ "$V" = "1" ]; then
+   set -x
+fi
+
+if [ $# -lt 2 ]; then
+   echo "$0 [path to nm] [path to vmlinux]" 1>&2
+   exit 1
+fi
+
+# Have Kbuild supply the path to nm so we handle cross compilation.
+nm="$1"
+vmlinux="$2"
+
+stext_addr=$($nm "$vmlinux" | grep -e " [TA] _stext$" | cut -d' ' -f1 | tr 
'[[:lower:]]' '[[:upper:]]')
+ftrace_caller_addr=$($nm "$vmlinux" | grep -e " T ftrace_caller$" | cut -d' ' 
-f1 | tr '[[:lower:]]' '[[:upper:]]')
+ftrace_tramp_addr=$($nm "$vmlinux" | grep -e " T ftrace_tramp_text$" | cut -d' 
' -f1 | tr '[[:lower:]]' '[[:upper:]]')
+
+ftrace_caller_offset=$(echo "ibase=16;$ftrace_caller_addr - $stext_addr" | bc)
+ftrace_tramp_offset=$(echo "ibase=16;$ftrace_tramp_addr - $ftrace_caller_addr" 
| bc)
+sz_32m=$(printf "%d" 0x200)
+sz_64m=$(printf "%d" 0x400)
+
+# ftrace_caller - _stext < 32M
+if [ $ftrace_caller_offset -ge $sz_32m ]; then
+   echo "ERROR: ftrace_caller (0x$ftrace_caller_addr) is beyond 32MiB of 
_stext" 1>&2
+   echo "ERROR: consider disabling CONFIG_FUNCTION_TRACER, or reducing the 
size of kernel text" 1>&2
+   exit 1
+fi
+
+# ftrace_tramp_text - ftrace_caller < 64M
+if [ $ftrace_tramp_offset -ge $sz_64m ]; then
+   echo "ERROR: kernel text extends beyond 64MiB from ftrace_caller" 1>&2
+   echo "ERROR: consider disabling CONFIG_FUNCTION_TRACER, or reducing the 
size of kernel text" 1>&2
+   exit 1
+fi
-- 
2.45.2

[RFC PATCH v4 09/17] powerpc64/bpf: Fold bpf_jit_emit_func_call_hlp() into bpf_jit_emit_func_call_rel()

2024-07-14 Thread Naveen N Rao

Commit 61688a82e047 ("powerpc/bpf: enable kfunc call") enhanced
bpf_jit_emit_func_call_hlp() to handle calls out to module region, where
bpf progs are generated. The only difference now between
bpf_jit_emit_func_call_hlp() and bpf_jit_emit_func_call_rel() is in
handling of the initial pass where target function address is not known.
Fold that logic into bpf_jit_emit_func_call_hlp() and rename it to
bpf_jit_emit_func_call_rel() to simplify bpf function call JIT code.

We don't actually need to load/restore TOC across a call out to a
different kernel helper or to a different bpf program since they all
work with the kernel TOC. We only need to do it if we have to call out
to a module function. So, guard TOC load/restore with appropriate
conditions.

Signed-off-by: Naveen N Rao 
---
 arch/powerpc/net/bpf_jit_comp64.c | 61 +--
 1 file changed, 17 insertions(+), 44 deletions(-)

diff --git a/arch/powerpc/net/bpf_jit_comp64.c 
b/arch/powerpc/net/bpf_jit_comp64.c
index 2cbcdf93cc19..f3be024fc685 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -202,14 +202,22 @@ void bpf_jit_build_epilogue(u32 *image, struct 
codegen_context *ctx)
EMIT(PPC_RAW_BLR());
 }
 
-static int
-bpf_jit_emit_func_call_hlp(u32 *image, u32 *fimage, struct codegen_context 
*ctx, u64 func)
+int bpf_jit_emit_func_call_rel(u32 *image, u32 *fimage, struct codegen_context 
*ctx, u64 func)
 {
unsigned long func_addr = func ? ppc_function_entry((void *)func) : 0;
long reladdr;
 
-   if (WARN_ON_ONCE(!kernel_text_address(func_addr)))
-   return -EINVAL;
+   /* bpf to bpf call, func is not known in the initial pass. Emit 5 nops 
as a placeholder */
+   if (!func) {
+   for (int i = 0; i < 5; i++)
+   EMIT(PPC_RAW_NOP());
+   /* elfv1 needs an additional instruction to load addr from 
descriptor */
+   if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V1))
+   EMIT(PPC_RAW_NOP());
+   EMIT(PPC_RAW_MTCTR(_R12));
+   EMIT(PPC_RAW_BCTRL());
+   return 0;
+   }
 
 #ifdef CONFIG_PPC_KERNEL_PCREL
reladdr = func_addr - local_paca->kernelbase;
@@ -266,7 +274,8 @@ bpf_jit_emit_func_call_hlp(u32 *image, u32 *fimage, struct 
codegen_context *ctx,
 * We can clobber r2 since we get called through a
 * function pointer (so caller will save/restore r2).
 */
-   EMIT(PPC_RAW_LD(_R2, bpf_to_ppc(TMP_REG_2), 8));
+   if (is_module_text_address(func_addr))
+   EMIT(PPC_RAW_LD(_R2, bpf_to_ppc(TMP_REG_2), 8));
} else {
PPC_LI64(_R12, func);
EMIT(PPC_RAW_MTCTR(_R12));
@@ -276,46 +285,14 @@ bpf_jit_emit_func_call_hlp(u32 *image, u32 *fimage, 
struct codegen_context *ctx,
 * Load r2 with kernel TOC as kernel TOC is used if function 
address falls
 * within core kernel text.
 */
-   EMIT(PPC_RAW_LD(_R2, _R13, offsetof(struct paca_struct, 
kernel_toc)));
+   if (is_module_text_address(func_addr))
+   EMIT(PPC_RAW_LD(_R2, _R13, offsetof(struct paca_struct, 
kernel_toc)));
}
 #endif
 
return 0;
 }
 
-int bpf_jit_emit_func_call_rel(u32 *image, u32 *fimage, struct codegen_context 
*ctx, u64 func)
-{
-   unsigned int i, ctx_idx = ctx->idx;
-
-   if (WARN_ON_ONCE(func && is_module_text_address(func)))
-   return -EINVAL;
-
-   /* skip past descriptor if elf v1 */
-   func += FUNCTION_DESCR_SIZE;
-
-   /* Load function address into r12 */
-   PPC_LI64(_R12, func);
-
-   /* For bpf-to-bpf function calls, the callee's address is unknown
-* until the last extra pass. As seen above, we use PPC_LI64() to
-* load the callee's address, but this may optimize the number of
-* instructions required based on the nature of the address.
-*
-* Since we don't want the number of instructions emitted to increase,
-* we pad the optimized PPC_LI64() call with NOPs to guarantee that
-* we always have a five-instruction sequence, which is the maximum
-* that PPC_LI64() can emit.
-*/
-   if (!image)
-   for (i = ctx->idx - ctx_idx; i < 5; i++)
-   EMIT(PPC_RAW_NOP());
-
-   EMIT(PPC_RAW_MTCTR(_R12));
-   EMIT(PPC_RAW_BCTRL());
-
-   return 0;
-}
-
 static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 
out)
 {
/*
@@ -1102,11 +1079,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, 
u32 *fimage, struct code
if (ret < 0)
return ret;
 
-

[RFC PATCH v4 08/17] powerpc/ftrace: Move ftrace stub used for init text before _einittext

2024-07-14 Thread Naveen N Rao

Move the ftrace stub used to cover inittext before _einittext so that it
is within kernel text, as seen through core_kernel_text(). This is
required for a subsequent change to ftrace.

Signed-off-by: Naveen N Rao 
---
 arch/powerpc/kernel/vmlinux.lds.S | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/vmlinux.lds.S 
b/arch/powerpc/kernel/vmlinux.lds.S
index f420df7888a7..0aef9959f2cd 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -267,14 +267,13 @@ SECTIONS
.init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
_sinittext = .;
INIT_TEXT
-
+   *(.tramp.ftrace.init);
/*
 *.init.text might be RO so we must ensure this section ends on
 * a page boundary.
 */
. = ALIGN(PAGE_SIZE);
_einittext = .;
-   *(.tramp.ftrace.init);
} :text
 
/* .exit.text is discarded at runtime, not link time,
-- 
2.45.2

[RFC PATCH v4 07/17] powerpc/ftrace: Skip instruction patching if the instructions are the same

2024-07-14 Thread Naveen N Rao

To simplify upcoming changes to ftrace, add a check to skip actual
instruction patching if the old and new instructions are the same. We
still validate that the instruction is what we expect, but don't
actually patch the same instruction again.

Signed-off-by: Naveen N Rao 
---
 arch/powerpc/kernel/trace/ftrace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/trace/ftrace.c 
b/arch/powerpc/kernel/trace/ftrace.c
index fe0546fbac8e..719517265d39 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -82,7 +82,7 @@ static inline int ftrace_modify_code(unsigned long ip, 
ppc_inst_t old, ppc_inst_
 {
int ret = ftrace_validate_inst(ip, old);
 
-   if (!ret)
+   if (!ret && !ppc_inst_equal(old, new))
ret = patch_instruction((u32 *)ip, new);
 
return ret;
-- 
2.45.2

[RFC PATCH v4 06/17] powerpc/ftrace: Remove pointer to struct module from dyn_arch_ftrace

2024-07-14 Thread Naveen N Rao

Pointer to struct module is only relevant for ftrace records belonging
to kernel modules. Having this field in dyn_arch_ftrace wastes memory
for all ftrace records belonging to the kernel. Remove the same in
favour of looking up the module from the ftrace record address, similar
to other architectures.

Reviewed-by: Nicholas Piggin 
Signed-off-by: Naveen N Rao 
---
 arch/powerpc/include/asm/ftrace.h|  1 -
 arch/powerpc/kernel/trace/ftrace.c   | 49 +
 arch/powerpc/kernel/trace/ftrace_64_pg.c | 69 ++--
 3 files changed, 56 insertions(+), 63 deletions(-)

diff --git a/arch/powerpc/include/asm/ftrace.h 
b/arch/powerpc/include/asm/ftrace.h
index 107fc5a48456..201f9d15430a 100644
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -26,7 +26,6 @@ unsigned long prepare_ftrace_return(unsigned long parent, 
unsigned long ip,
 struct module;
 struct dyn_ftrace;
 struct dyn_arch_ftrace {
-   struct module *mod;
 };
 
 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
diff --git a/arch/powerpc/kernel/trace/ftrace.c 
b/arch/powerpc/kernel/trace/ftrace.c
index 8c3e523e4f96..fe0546fbac8e 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -106,28 +106,43 @@ static unsigned long find_ftrace_tramp(unsigned long ip)
return 0;
 }
 
+#ifdef CONFIG_MODULES
+static unsigned long ftrace_lookup_module_stub(unsigned long ip, unsigned long 
addr)
+{
+   struct module *mod = NULL;
+
+   preempt_disable();
+   mod = __module_text_address(ip);
+   preempt_enable();
+
+   if (!mod)
+   pr_err("No module loaded at addr=%lx\n", ip);
+
+   return (addr == (unsigned long)ftrace_caller ? mod->arch.tramp : 
mod->arch.tramp_regs);
+}
+#else
+static unsigned long ftrace_lookup_module_stub(unsigned long ip, unsigned long 
addr)
+{
+   return 0;
+}
+#endif
+
 static int ftrace_get_call_inst(struct dyn_ftrace *rec, unsigned long addr, 
ppc_inst_t *call_inst)
 {
unsigned long ip = rec->ip;
unsigned long stub;
 
-   if (is_offset_in_branch_range(addr - ip)) {
+   if (is_offset_in_branch_range(addr - ip))
/* Within range */
stub = addr;
-#ifdef CONFIG_MODULES
-   } else if (rec->arch.mod) {
-   /* Module code would be going to one of the module stubs */
-   stub = (addr == (unsigned long)ftrace_caller ? 
rec->arch.mod->arch.tramp :
-  
rec->arch.mod->arch.tramp_regs);
-#endif
-   } else if (core_kernel_text(ip)) {
+   else if (core_kernel_text(ip))
/* We would be branching to one of our ftrace stubs */
stub = find_ftrace_tramp(ip);
-   if (!stub) {
-   pr_err("0x%lx: No ftrace stubs reachable\n", ip);
-   return -EINVAL;
-   }
-   } else {
+   else
+   stub = ftrace_lookup_module_stub(ip, addr);
+
+   if (!stub) {
+   pr_err("0x%lx: No ftrace stubs reachable\n", ip);
return -EINVAL;
}
 
@@ -262,14 +277,6 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace 
*rec)
if (ret)
return ret;
 
-   if (!core_kernel_text(ip)) {
-   if (!mod) {
-   pr_err("0x%lx: No module provided for non-kernel 
address\n", ip);
-   return -EFAULT;
-   }
-   rec->arch.mod = mod;
-   }
-
/* Nop-out the ftrace location */
new = ppc_inst(PPC_RAW_NOP());
addr = MCOUNT_ADDR;
diff --git a/arch/powerpc/kernel/trace/ftrace_64_pg.c 
b/arch/powerpc/kernel/trace/ftrace_64_pg.c
index 12fab1803bcf..8a551dfca3d0 100644
--- a/arch/powerpc/kernel/trace/ftrace_64_pg.c
+++ b/arch/powerpc/kernel/trace/ftrace_64_pg.c
@@ -116,6 +116,20 @@ static unsigned long find_bl_target(unsigned long ip, 
ppc_inst_t op)
 }
 
 #ifdef CONFIG_MODULES
+static struct module *ftrace_lookup_module(struct dyn_ftrace *rec)
+{
+   struct module *mod;
+
+   preempt_disable();
+   mod = __module_text_address(rec->ip);
+   preempt_enable();
+
+   if (!mod)
+   pr_err("No module loaded at addr=%lx\n", rec->ip);
+
+   return mod;
+}
+
 static int
 __ftrace_make_nop(struct module *mod,
  struct dyn_ftrace *rec, unsigned long addr)
@@ -124,6 +138,12 @@ __ftrace_make_nop(struct module *mod,
unsigned long ip = rec->ip;
ppc_inst_t op, pop;
 
+   if (!mod) {
+   mod = ftrace_lookup_module(rec);
+   if (!mod)
+   return -EINVAL;
+   }
+
/* read where this goes */
if (copy_inst_from_kernel_nofault(&op, (void *)ip)) {
pr_err("Fetching opcode failed.\n");
@@ -366,27 +386,6 @@ int ftrace_m

[RFC PATCH v4 05/17] powerpc/module_64: Convert #ifdef to IS_ENABLED()

2024-07-14 Thread Naveen N Rao

Minor refactor for converting #ifdef to IS_ENABLED().

Reviewed-by: Nicholas Piggin 
Signed-off-by: Naveen N Rao 
---
 arch/powerpc/kernel/module_64.c | 10 ++
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index e9bab599d0c2..1db88409bd95 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -241,14 +241,8 @@ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr,
}
}
 
-#ifdef CONFIG_DYNAMIC_FTRACE
-   /* make the trampoline to the ftrace_caller */
-   relocs++;
-#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
-   /* an additional one for ftrace_regs_caller */
-   relocs++;
-#endif
-#endif
+   /* stubs for ftrace_caller and ftrace_regs_caller */
+   relocs += IS_ENABLED(CONFIG_DYNAMIC_FTRACE) + 
IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS);
 
pr_debug("Looks like a total of %lu stubs, max\n", relocs);
return relocs * sizeof(struct ppc64_stub_entry);
-- 
2.45.2

[RFC PATCH v4 04/17] powerpc32/ftrace: Unify 32-bit and 64-bit ftrace entry code

2024-07-14 Thread Naveen N Rao

On 32-bit powerpc, gcc generates a three instruction sequence for
function profiling:
mflrr0
stw r0, 4(r1)
bl  _mcount

On kernel boot, the call to _mcount() is nop-ed out, to be patched back
in when ftrace is actually enabled. The 'stw' instruction therefore is
not necessary unless ftrace is enabled. Nop it out during ftrace init.

When ftrace is enabled, we want the 'stw' so that stack unwinding works
properly. Perform the same within the ftrace handler, similar to 64-bit
powerpc.

Reviewed-by: Nicholas Piggin 
Signed-off-by: Naveen N Rao 
---
 arch/powerpc/kernel/trace/ftrace.c   | 6 --
 arch/powerpc/kernel/trace/ftrace_entry.S | 4 ++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kernel/trace/ftrace.c 
b/arch/powerpc/kernel/trace/ftrace.c
index 2ef504700e8d..8c3e523e4f96 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -240,8 +240,10 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace 
*rec)
} else if (IS_ENABLED(CONFIG_PPC32)) {
/* Expected sequence: 'mflr r0', 'stw r0,4(r1)', 'bl _mcount' */
ret = ftrace_validate_inst(ip - 8, ppc_inst(PPC_RAW_MFLR(_R0)));
-   if (!ret)
-   ret = ftrace_validate_inst(ip - 4, 
ppc_inst(PPC_RAW_STW(_R0, _R1, 4)));
+   if (ret)
+   return ret;
+   ret = ftrace_modify_code(ip - 4, ppc_inst(PPC_RAW_STW(_R0, _R1, 
4)),
+ppc_inst(PPC_RAW_NOP()));
} else if (IS_ENABLED(CONFIG_MPROFILE_KERNEL)) {
/* Expected sequence: 'mflr r0', ['std r0,16(r1)'], 'bl 
_mcount' */
ret = ftrace_read_inst(ip - 4, &old);
diff --git a/arch/powerpc/kernel/trace/ftrace_entry.S 
b/arch/powerpc/kernel/trace/ftrace_entry.S
index 76dbe9fd2c0f..244a1c7bb1e8 100644
--- a/arch/powerpc/kernel/trace/ftrace_entry.S
+++ b/arch/powerpc/kernel/trace/ftrace_entry.S
@@ -33,6 +33,8 @@
  * and then arrange for the ftrace function to be called.
  */
 .macro ftrace_regs_entry allregs
+   /* Save the original return address in A's stack frame */
+   PPC_STL r0, LRSAVE(r1)
/* Create a minimal stack frame for representing B */
PPC_STLUr1, -STACK_FRAME_MIN_SIZE(r1)
 
@@ -44,8 +46,6 @@
SAVE_GPRS(3, 10, r1)
 
 #ifdef CONFIG_PPC64
-   /* Save the original return address in A's stack frame */
-   std r0, LRSAVE+SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE(r1)
/* Ok to continue? */
lbz r3, PACA_FTRACE_ENABLED(r13)
cmpdi   r3, 0
-- 
2.45.2

[RFC PATCH v4 03/17] powerpc64/ftrace: Nop out additional 'std' instruction emitted by gcc v5.x

2024-07-14 Thread Naveen N Rao

Gcc v5.x emits a 3-instruction sequence for -mprofile-kernel:
mflrr0
std r0, 16(r1)
bl  _mcount

Gcc v6.x moved to a simpler 2-instruction sequence by removing the 'std'
instruction. The store saved the return address in the LR save area in
the caller stack frame for stack unwinding. However, with dynamic
ftrace, we no longer have a call to _mcount on kernel boot when ftrace
is not enabled. When ftrace is enabled, that store is performed within
ftrace_caller(). As such, the additional 'std' instruction is redundant.
Nop it out on kernel boot.

With this change, we now use the same 2-instruction profiling sequence
with both -mprofile-kernel, as well as -fpatchable-function-entry on
64-bit powerpc.

Signed-off-by: Naveen N Rao 
---
 arch/powerpc/kernel/trace/ftrace.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/trace/ftrace.c 
b/arch/powerpc/kernel/trace/ftrace.c
index d8d6b4fd9a14..2ef504700e8d 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -246,8 +246,12 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace 
*rec)
/* Expected sequence: 'mflr r0', ['std r0,16(r1)'], 'bl 
_mcount' */
ret = ftrace_read_inst(ip - 4, &old);
if (!ret && !ppc_inst_equal(old, ppc_inst(PPC_RAW_MFLR(_R0 {
+   /* Gcc v5.x emit the additional 'std' instruction, gcc 
v6.x don't */
ret = ftrace_validate_inst(ip - 8, 
ppc_inst(PPC_RAW_MFLR(_R0)));
-   ret |= ftrace_validate_inst(ip - 4, 
ppc_inst(PPC_RAW_STD(_R0, _R1, 16)));
+   if (ret)
+   return ret;
+   ret = ftrace_modify_code(ip - 4, 
ppc_inst(PPC_RAW_STD(_R0, _R1, 16)),
+ppc_inst(PPC_RAW_NOP()));
}
} else {
return -EINVAL;
-- 
2.45.2

[RFC PATCH v4 02/17] powerpc/kprobes: Use ftrace to determine if a probe is at function entry

2024-07-14 Thread Naveen N Rao

Rather than hard-coding the offset into a function to be used to
determine if a kprobe is at function entry, use ftrace_location() to
determine the ftrace location within the function and categorize all
instructions till that offset to be function entry.

For functions that cannot be traced, we fall back to using a fixed
offset of 8 (two instructions) to categorize a probe as being at
function entry for 64-bit elfv2, unless we are using pcrel.

Acked-by: Masami Hiramatsu (Google) 
Signed-off-by: Naveen N Rao 
---
 arch/powerpc/kernel/kprobes.c | 18 --
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 14c5ddec3056..ca204f4f21c1 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -105,24 +105,22 @@ kprobe_opcode_t *kprobe_lookup_name(const char *name, 
unsigned int offset)
return addr;
 }
 
-static bool arch_kprobe_on_func_entry(unsigned long offset)
+static bool arch_kprobe_on_func_entry(unsigned long addr, unsigned long offset)
 {
-#ifdef CONFIG_PPC64_ELF_ABI_V2
-#ifdef CONFIG_KPROBES_ON_FTRACE
-   return offset <= 16;
-#else
-   return offset <= 8;
-#endif
-#else
+   unsigned long ip = ftrace_location(addr);
+
+   if (ip)
+   return offset <= (ip - addr);
+   if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2) && 
!IS_ENABLED(CONFIG_PPC_KERNEL_PCREL))
+   return offset <= 8;
return !offset;
-#endif
 }
 
 /* XXX try and fold the magic of kprobe_lookup_name() in this */
 kprobe_opcode_t *arch_adjust_kprobe_addr(unsigned long addr, unsigned long 
offset,
 bool *on_func_entry)
 {
-   *on_func_entry = arch_kprobe_on_func_entry(offset);
+   *on_func_entry = arch_kprobe_on_func_entry(addr, offset);
return (kprobe_opcode_t *)(addr + offset);
 }
 
-- 
2.45.2

[RFC PATCH v4 17/17] powerpc64/bpf: Add support for bpf trampolines

2024-07-14 Thread Naveen N Rao

Add support for bpf_arch_text_poke() and arch_prepare_bpf_trampoline()
for 64-bit powerpc. While the code is generic, BPF trampolines are only
enabled on 64-bit powerpc. 32-bit powerpc will need testing and some
updates.

BPF Trampolines adhere to the existing ftrace ABI utilizing a
two-instruction profiling sequence, as well as the newer ABI utilizing a
three-instruction profiling sequence enabling return with a 'blr'. The
trampoline code itself closely follows x86 implementation.

BPF prog JIT is extended to mimic 64-bit powerpc approach for ftrace
having a single nop at function entry, followed by the function
profiling sequence out-of-line and a separate long branch stub for calls
to trampolines that are out of range. A dummy_tramp is provided to
simplify synchronization similar to arm64.

When attaching a bpf trampoline to a bpf prog, we can patch up to three
things:
- the nop at bpf prog entry to go to the out-of-line stub
- the instruction in the out-of-line stub to either call the bpf trampoline
directly, or to branch to the long_branch stub.
- the trampoline address before the long_branch stub.

We do not need any synchronization here since we always have a valid
branch target regardless of the order in which the above stores are
seen. dummy_tramp ensures that the long_branch stub goes to a valid
destination on other cpus, even when the branch to the long_branch stub
is seen before the updated trampoline address.

However, when detaching a bpf trampoline from a bpf prog, or if changing
the bpf trampoline address, we need synchronization to ensure that other
cpus can no longer branch into the older trampoline so that it can be
safely freed. bpf_tramp_image_put() uses rcu_tasks to ensure all cpus
make forward progress, but we still need to ensure that other cpus
execute isync (or some CSI) so that they don't go back into the
trampoline again.

Signed-off-by: Naveen N Rao 
---
 arch/powerpc/include/asm/ppc-opcode.h |  14 +
 arch/powerpc/net/bpf_jit.h|  12 +
 arch/powerpc/net/bpf_jit_comp.c   | 842 +-
 arch/powerpc/net/bpf_jit_comp32.c |   7 +-
 arch/powerpc/net/bpf_jit_comp64.c |   7 +-
 5 files changed, 879 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/ppc-opcode.h 
b/arch/powerpc/include/asm/ppc-opcode.h
index b98a9e982c03..4312bcb913a4 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -587,12 +587,26 @@
 #define PPC_RAW_MTSPR(spr, d)  (0x7c0003a6 | ___PPC_RS(d) | 
__PPC_SPR(spr))
 #define PPC_RAW_EIEIO()(0x7c0006ac)
 
+/* bcl 20,31,$+4 */
+#define PPC_RAW_BCL4() (0x429f0005)
 #define PPC_RAW_BRANCH(offset) (0x4800 | PPC_LI(offset))
 #define PPC_RAW_BL(offset) (0x4801 | PPC_LI(offset))
 #define PPC_RAW_TW(t0, a, b)   (0x7c08 | ___PPC_RS(t0) | 
___PPC_RA(a) | ___PPC_RB(b))
 #define PPC_RAW_TRAP() PPC_RAW_TW(31, 0, 0)
 #define PPC_RAW_SETB(t, bfa)   (0x7c000100 | ___PPC_RT(t) | 
___PPC_RA((bfa) << 2))
 
+#ifdef CONFIG_PPC32
+#define PPC_RAW_STLPPC_RAW_STW
+#define PPC_RAW_STLU   PPC_RAW_STWU
+#define PPC_RAW_LL PPC_RAW_LWZ
+#define PPC_RAW_CMPLI  PPC_RAW_CMPWI
+#else
+#define PPC_RAW_STLPPC_RAW_STD
+#define PPC_RAW_STLU   PPC_RAW_STDU
+#define PPC_RAW_LL PPC_RAW_LD
+#define PPC_RAW_CMPLI  PPC_RAW_CMPDI
+#endif
+
 /* Deal with instructions that older assemblers aren't aware of */
 #definePPC_BCCTR_FLUSH stringify_in_c(.long 
PPC_INST_BCCTR_FLUSH)
 #definePPC_CP_ABORTstringify_in_c(.long PPC_RAW_CP_ABORT)
diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index cdea5dccaefe..2d04ce5a23da 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -12,6 +12,7 @@
 
 #include 
 #include 
+#include 
 
 #ifdef CONFIG_PPC64_ELF_ABI_V1
 #define FUNCTION_DESCR_SIZE24
@@ -21,6 +22,9 @@
 
 #define CTX_NIA(ctx) ((unsigned long)ctx->idx * 4)
 
+#define SZLsizeof(unsigned long)
+#define BPF_INSN_SAFETY64
+
 #define PLANT_INSTR(d, idx, instr)   \
do { if (d) { (d)[idx] = instr; } idx++; } while (0)
 #define EMIT(instr)PLANT_INSTR(image, ctx->idx, instr)
@@ -81,6 +85,13 @@
EMIT(PPC_RAW_ORI(d, d, (uintptr_t)(i) &   \
0x)); \
} } while (0)
+#define PPC_LI_ADDRPPC_LI64
+#define PPC64_LOAD_PACA()\
+   EMIT(PPC_RAW_LD(_R2, _R13, offsetof(struct paca_struct, kernel_toc)))
+#else
+#define PPC_LI64(d, i) BUILD_BUG()
+#define PPC_LI_ADDRPPC_LI32
+#define PPC64_LOAD_PACA() BUILD_BUG()
 #endif
 
 /*
@@ -165,6 +176,7 @@ int bpf_jit

[RFC PATCH v4 16/17] samples/ftrace: Add support for ftrace direct samples on powerpc

2024-07-14 Thread Naveen N Rao

Add powerpc 32-bit and 64-bit samples for ftrace direct. This serves to
show the sample instruction sequence to be used by ftrace direct calls
to adhere to the ftrace ABI.

On 64-bit powerpc, TOC setup requires some additional work.

Signed-off-by: Naveen N Rao 
---
 arch/powerpc/Kconfig|   2 +
 samples/ftrace/ftrace-direct-modify.c   |  85 +++-
 samples/ftrace/ftrace-direct-multi-modify.c | 101 +++-
 samples/ftrace/ftrace-direct-multi.c|  79 ++-
 samples/ftrace/ftrace-direct-too.c  |  83 +++-
 samples/ftrace/ftrace-direct.c  |  69 -
 6 files changed, 414 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 29aab3770415..f6ff44acf112 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -275,6 +275,8 @@ config PPC
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RELIABLE_STACKTRACE
select HAVE_RSEQ
+   select HAVE_SAMPLE_FTRACE_DIRECTif 
HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+   select HAVE_SAMPLE_FTRACE_DIRECT_MULTI  if 
HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
select HAVE_SETUP_PER_CPU_AREA  if PPC64
select HAVE_SOFTIRQ_ON_OWN_STACK
select HAVE_STACKPROTECTOR  if PPC32 && 
$(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r2)
diff --git a/samples/ftrace/ftrace-direct-modify.c 
b/samples/ftrace/ftrace-direct-modify.c
index 81220390851a..cfea7a38befb 100644
--- a/samples/ftrace/ftrace-direct-modify.c
+++ b/samples/ftrace/ftrace-direct-modify.c
@@ -2,7 +2,7 @@
 #include 
 #include 
 #include 
-#ifndef CONFIG_ARM64
+#if !defined(CONFIG_ARM64) && !defined(CONFIG_PPC32)
 #include 
 #endif
 
@@ -199,6 +199,89 @@ asm (
 
 #endif /* CONFIG_LOONGARCH */
 
+#ifdef CONFIG_PPC
+#include 
+
+#ifdef CONFIG_PPC64
+#define STACK_FRAME_SIZE 48
+#else
+#define STACK_FRAME_SIZE 24
+#endif
+
+#if defined(CONFIG_PPC64_ELF_ABI_V2) && !defined(CONFIG_PPC_KERNEL_PCREL)
+#define PPC64_TOC_SAVE_AND_UPDATE  \
+"  std 2, 24(1)\n" \
+"  bcl 20, 31, 1f\n"   \
+"   1: mflr12\n"   \
+"  ld  2, (99f - 1b)(12)\n"
+#define PPC64_TOC_RESTORE  \
+"  ld  2, 24(1)\n"
+#define PPC64_TOC  \
+"   99:.quad   .TOC.@tocbase\n"
+#else
+#define PPC64_TOC_SAVE_AND_UPDATE ""
+#define PPC64_TOC_RESTORE ""
+#define PPC64_TOC ""
+#endif
+
+#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE
+#define PPC_FTRACE_RESTORE_LR  \
+   PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n"   \
+"  mtlr0\n"
+#define PPC_FTRACE_RET \
+"  blr\n"
+#else
+#define PPC_FTRACE_RESTORE_LR  \
+   PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n"   \
+"  mtctr   0\n"
+#define PPC_FTRACE_RET \
+"  mtlr0\n"\
+"  bctr\n"
+#endif
+
+asm (
+"  .pushsection.text, \"ax\", @progbits\n"
+"  .type   my_tramp1, @function\n"
+"  .globl  my_tramp1\n"
+"   my_tramp1:\n"
+   PPC_STL"0, "__stringify(PPC_LR_STKOFF)"(1)\n"
+   PPC_STLU"   1, -"__stringify(STACK_FRAME_MIN_SIZE)"(1)\n"
+"  mflr0\n"
+   PPC_STL"0, "__stringify(PPC_LR_STKOFF)"(1)\n"
+   PPC_STLU"   1, -"__stringify(STACK_FRAME_SIZE)"(1)\n"
+   PPC64_TOC_SAVE_AND_UPDATE
+"  bl  my_direct_func1\n"
+   PPC64_TOC_RESTORE
+"  addi1, 1, "__stringify(STACK_FRAME_SIZE)"\n"
+   PPC_FTRACE_RESTORE_LR
+"  addi1, 1, "__stringify(STACK_FRAME_MIN_SIZE)"\n"
+   PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n"
+   PPC_FTRACE_RET
+"  .size   my_tramp1, .-my_tramp1\n"
+
+"  .type   my_tramp2, @function\n"
+"  .globl  my_tramp2\n"
+"   my_tramp2:\n"
+   PPC_STL"0, "__stringify(PPC_LR_STKOFF)"(1)\n"
+   PPC_STLU"   1, -"__stringify(STACK_FRAME_MIN_SIZE)"(1)\n"
+"  mflr0\n"
+   PPC_STL"0, "__stringify(PPC_LR_STKOFF)"(1)\n"
+   PPC_STLU"   1, -"__stringify(STACK_FRAME_SIZE)"(1)\n"

[RFC PATCH v4 15/17] powerpc/ftrace: Add support for DYNAMIC_FTRACE_WITH_DIRECT_CALLS

2024-07-14 Thread Naveen N Rao

Add support for DYNAMIC_FTRACE_WITH_DIRECT_CALLS similar to the arm64
implementation.

ftrace direct calls allow custom trampolines to be called into directly
from function ftrace call sites, bypassing the ftrace trampoline
completely. This functionality is currently utilized by BPF trampolines
to hook into kernel function entries.

Since we have limited relative branch range, we support ftrace direct
calls through support for DYNAMIC_FTRACE_WITH_CALL_OPS. In this
approach, ftrace trampoline is not entirely bypassed. Rather, it is
re-purposed into a stub that reads direct_call field from the associated
ftrace_ops structure and branches into that, if it is not NULL. For
this, it is sufficient if we can ensure that the ftrace trampoline is
reachable from all traceable functions.

When multiple ftrace_ops are associated with a call site, we utilize a
call back to set pt_regs->orig_gpr3 that can then be tested on the
return path from the ftrace trampoline to branch into the direct caller.

Signed-off-by: Naveen N Rao 
---
 arch/powerpc/Kconfig |   1 +
 arch/powerpc/include/asm/ftrace.h|  15 +++
 arch/powerpc/kernel/asm-offsets.c|   3 +
 arch/powerpc/kernel/trace/ftrace.c   |   9 ++
 arch/powerpc/kernel/trace/ftrace_entry.S | 114 +--
 5 files changed, 113 insertions(+), 29 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index cb6031d86dc9..29aab3770415 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -236,6 +236,7 @@ config PPC
select HAVE_DYNAMIC_FTRACE
select HAVE_DYNAMIC_FTRACE_WITH_ARGSif 
ARCH_USING_PATCHABLE_FUNCTION_ENTRY || MPROFILE_KERNEL || PPC32
select HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS if PPC_FTRACE_OUT_OF_LINE || 
(PPC32 && ARCH_USING_PATCHABLE_FUNCTION_ENTRY)
+   select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS if 
HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS
select HAVE_DYNAMIC_FTRACE_WITH_REGSif 
ARCH_USING_PATCHABLE_FUNCTION_ENTRY || MPROFILE_KERNEL || PPC32
select HAVE_EBPF_JIT
select HAVE_EFFICIENT_UNALIGNED_ACCESS
diff --git a/arch/powerpc/include/asm/ftrace.h 
b/arch/powerpc/include/asm/ftrace.h
index 267bd52fef21..2f1a6d25838d 100644
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -150,6 +150,21 @@ extern unsigned int ftrace_ool_stub_text_end_count, 
ftrace_ool_stub_text_count,
 #endif
 void ftrace_free_init_tramp(void);
 unsigned long ftrace_call_adjust(unsigned long addr);
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+/*
+ * When an ftrace registered caller is tracing a function that is also set by a
+ * register_ftrace_direct() call, it needs to be differentiated in the
+ * ftrace_caller trampoline so that the direct call can be invoked after the
+ * other ftrace ops. To do this, place the direct caller in the orig_gpr3 field
+ * of pt_regs. This tells ftrace_caller that there's a direct caller.
+ */
+static inline void arch_ftrace_set_direct_caller(struct ftrace_regs *fregs, 
unsigned long addr)
+{
+   struct pt_regs *regs = &fregs->regs;
+   regs->orig_gpr3 = addr;
+}
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
 #else
 static inline void ftrace_free_init_tramp(void) { }
 static inline unsigned long ftrace_call_adjust(unsigned long addr) { return 
addr; }
diff --git a/arch/powerpc/kernel/asm-offsets.c 
b/arch/powerpc/kernel/asm-offsets.c
index 60d1e388c2ba..dbd56264a8bc 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -680,6 +680,9 @@ int main(void)
 
 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS
OFFSET(FTRACE_OPS_FUNC, ftrace_ops, func);
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+   OFFSET(FTRACE_OPS_DIRECT_CALL, ftrace_ops, direct_call);
+#endif
 #endif
 
return 0;
diff --git a/arch/powerpc/kernel/trace/ftrace.c 
b/arch/powerpc/kernel/trace/ftrace.c
index 291c6c3d3a78..4316a7cfbdb8 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -150,6 +150,15 @@ static int ftrace_get_call_inst(struct dyn_ftrace *rec, 
unsigned long addr, ppc_
else
ip = rec->ip;
 
+   if (!is_offset_in_branch_range(addr - ip) && addr != FTRACE_ADDR && 
addr != FTRACE_REGS_ADDR) {
+   /* This can only happen with ftrace direct */
+   if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS)) {
+   pr_err("0x%lx (0x%lx): Unexpected target address 
0x%lx\n", ip, rec->ip, addr);
+   return -EINVAL;
+   }
+   addr = FTRACE_ADDR;
+   }
+
if (is_offset_in_branch_range(addr - ip))
/* Within range */
stub = addr;
diff --git a/arch/powerpc/kernel/trace/ftrace_entry.S 
b/arch/powerpc/kernel/trace/ftrace_entry.S
index c019380bdd6a..eea4cb3737a8 100644
--- a/arch/powerpc/kernel/trace/ftrace_entry.S
+++ b

[RFC PATCH v4 14/17] powerpc/ftrace: Add support for DYNAMIC_FTRACE_WITH_CALL_OPS

2024-07-14 Thread Naveen N Rao

Implement support for DYNAMIC_FTRACE_WITH_CALL_OPS similar to the
arm64 implementation.

This works by patching-in a pointer to an associated ftrace_ops
structure before each traceable function. If multiple ftrace_ops are
associated with a call site, then a special ftrace_list_ops is used to
enable iterating over all the registered ftrace_ops. If no ftrace_ops
are associated with a call site, then a special ftrace_nop_ops structure
is used to render the ftrace call as a no-op. ftrace trampoline can then
read the associated ftrace_ops for a call site by loading from an offset
from the LR, and branch directly to the associated function.

The primary advantage with this approach is that we don't have to
iterate over all the registered ftrace_ops for call sites that have a
single ftrace_ops registered. This is the equivalent of implementing
support for dynamic ftrace trampolines, which set up a special ftrace
trampoline for each registered ftrace_ops and have individual call sites
branch into those directly.

A secondary advantage is that this gives us a way to add support for
direct ftrace callers without having to resort to using stubs. The
address of the direct call trampoline can be loaded from the ftrace_ops
structure.

To support this, we reserve a nop before each function on 32-bit
powerpc. For 64-bit powerpc, two nops are reserved before each
out-of-line stub. During ftrace activation, we update this location with
the associated ftrace_ops pointer. Then, on ftrace entry, we load from
this location and call into ftrace_ops->func().

For 64-bit powerpc, we ensure that the out-of-line stub area is
doubleword aligned so that ftrace_ops address can be updated atomically.

Signed-off-by: Naveen N Rao 
---
 arch/powerpc/Kconfig   |  1 +
 arch/powerpc/Makefile  |  4 ++
 arch/powerpc/include/asm/ftrace.h  |  5 +-
 arch/powerpc/kernel/asm-offsets.c  |  4 ++
 arch/powerpc/kernel/trace/ftrace.c | 59 +-
 arch/powerpc/kernel/trace/ftrace_entry.S   | 36 ++---
 arch/powerpc/tools/ftrace-gen-ool-stubs.sh |  5 +-
 7 files changed, 102 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index a4dff8624510..cb6031d86dc9 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -235,6 +235,7 @@ config PPC
select HAVE_DEBUG_STACKOVERFLOW
select HAVE_DYNAMIC_FTRACE
select HAVE_DYNAMIC_FTRACE_WITH_ARGSif 
ARCH_USING_PATCHABLE_FUNCTION_ENTRY || MPROFILE_KERNEL || PPC32
+   select HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS if PPC_FTRACE_OUT_OF_LINE || 
(PPC32 && ARCH_USING_PATCHABLE_FUNCTION_ENTRY)
select HAVE_DYNAMIC_FTRACE_WITH_REGSif 
ARCH_USING_PATCHABLE_FUNCTION_ENTRY || MPROFILE_KERNEL || PPC32
select HAVE_EBPF_JIT
select HAVE_EFFICIENT_UNALIGNED_ACCESS
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index c973e6cd1ae8..7dede0ec0163 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -158,8 +158,12 @@ KBUILD_CPPFLAGS+= -DCC_USING_PATCHABLE_FUNCTION_ENTRY
 ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE
 CC_FLAGS_FTRACE := -fpatchable-function-entry=1
 else
+ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS # PPC32 only
+CC_FLAGS_FTRACE := -fpatchable-function-entry=3,1
+else
 CC_FLAGS_FTRACE := -fpatchable-function-entry=2
 endif
+endif
 else
 CC_FLAGS_FTRACE := -pg
 ifdef CONFIG_MPROFILE_KERNEL
diff --git a/arch/powerpc/include/asm/ftrace.h 
b/arch/powerpc/include/asm/ftrace.h
index dc870824359c..267bd52fef21 100644
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -138,8 +138,11 @@ static inline u8 this_cpu_get_ftrace_enabled(void) { 
return 1; }
 extern unsigned int ftrace_tramp_text[], ftrace_tramp_init[];
 #ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE
 struct ftrace_ool_stub {
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS
+   struct ftrace_ops *ftrace_op;
+#endif
u32 insn[4];
-};
+} __aligned(sizeof(unsigned long));
 extern struct ftrace_ool_stub ftrace_ool_stub_text_end[], 
ftrace_ool_stub_text[],
  ftrace_ool_stub_inittext[];
 extern unsigned int ftrace_ool_stub_text_end_count, ftrace_ool_stub_text_count,
diff --git a/arch/powerpc/kernel/asm-offsets.c 
b/arch/powerpc/kernel/asm-offsets.c
index 6854547d3164..60d1e388c2ba 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -678,5 +678,9 @@ int main(void)
DEFINE(FTRACE_OOL_STUB_SIZE, sizeof(struct ftrace_ool_stub));
 #endif
 
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS
+   OFFSET(FTRACE_OPS_FUNC, ftrace_ops, func);
+#endif
+
return 0;
 }
diff --git a/arch/powerpc/kernel/trace/ftrace.c 
b/arch/powerpc/kernel/trace/ftrace.c
index b4de8b8cbe3a..291c6c3d3a78 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -38,8 +38,11 @@ unsigned long ftrace_call_adjust(unsigned long addr)
return

[RFC PATCH v4 13/17] powerpc64/ftrace: Support .text larger than 32MB with out-of-line stubs

2024-07-14 Thread Naveen N Rao

We are restricted to a .text size of ~32MB when using out-of-line
function profile sequence. Allow this to be extended up to the previous
limit of ~64MB by reserving space in the middle of .text.

A new config option CONFIG_PPC_FTRACE_OUT_OF_LINE_NUM_RESERVE is
introduced to specify the number of function stubs that are reserved in
.text. On boot, ftrace utilizes stubs from this area first before using
the stub area at the end of .text.

A ppc64le defconfig has ~44k functions that can be traced. A more
conservative value of 32k functions is chosen as the default value of
PPC_FTRACE_OUT_OF_LINE_NUM_RESERVE so that we do not allot more space
than necessary by default. If building a kernel that only has 32k
trace-able functions, we won't allot any more space at the end of .text
during the pass on vmlinux.o. Otherwise, only the remaining functions
get space for stubs at the end of .text. This default value should help
cover a .text size of ~48MB in total (including space reserved at the
end of .text which can cover up to 32MB), which should be sufficient for
most common builds. For a very small kernel build, this can be set to 0.
Or, this can be bumped up to a larger value to support vmlinux .text
size up to ~64MB.

Signed-off-by: Naveen N Rao 
---
 arch/powerpc/Kconfig   | 12 
 arch/powerpc/include/asm/ftrace.h  |  6 --
 arch/powerpc/kernel/trace/ftrace.c | 21 +
 arch/powerpc/kernel/trace/ftrace_entry.S   |  8 
 arch/powerpc/tools/Makefile|  2 +-
 arch/powerpc/tools/ftrace-gen-ool-stubs.sh | 11 +++
 6 files changed, 49 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index f50cfd15bb73..a4dff8624510 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -573,6 +573,18 @@ config PPC_FTRACE_OUT_OF_LINE
depends on PPC64
select ARCH_WANTS_PRE_LINK_VMLINUX
 
+config PPC_FTRACE_OUT_OF_LINE_NUM_RESERVE
+   int "Number of ftrace out-of-line stubs to reserve within .text"
+   default 32768 if PPC_FTRACE_OUT_OF_LINE
+   default 0
+   help
+ Number of stubs to reserve for use by ftrace. This space is
+ reserved within .text, and is distinct from any additional space
+ added at the end of .text before the final vmlinux link. Set to
+ zero to have stubs only be generated at the end of vmlinux (only
+ if the size of vmlinux is less than 32MB). Set to a higher value
+ if building vmlinux larger than 48MB.
+
 config HOTPLUG_CPU
bool "Support for enabling/disabling CPUs"
depends on SMP && (PPC_PSERIES || \
diff --git a/arch/powerpc/include/asm/ftrace.h 
b/arch/powerpc/include/asm/ftrace.h
index 0589bb252de7..dc870824359c 100644
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -140,8 +140,10 @@ extern unsigned int ftrace_tramp_text[], 
ftrace_tramp_init[];
 struct ftrace_ool_stub {
u32 insn[4];
 };
-extern struct ftrace_ool_stub ftrace_ool_stub_text_end[], 
ftrace_ool_stub_inittext[];
-extern unsigned int ftrace_ool_stub_text_end_count, 
ftrace_ool_stub_inittext_count;
+extern struct ftrace_ool_stub ftrace_ool_stub_text_end[], 
ftrace_ool_stub_text[],
+ ftrace_ool_stub_inittext[];
+extern unsigned int ftrace_ool_stub_text_end_count, ftrace_ool_stub_text_count,
+   ftrace_ool_stub_inittext_count;
 #endif
 void ftrace_free_init_tramp(void);
 unsigned long ftrace_call_adjust(unsigned long addr);
diff --git a/arch/powerpc/kernel/trace/ftrace.c 
b/arch/powerpc/kernel/trace/ftrace.c
index c03336301bad..b4de8b8cbe3a 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -168,7 +168,7 @@ static int ftrace_get_call_inst(struct dyn_ftrace *rec, 
unsigned long addr, ppc_
 static int ftrace_init_ool_stub(struct module *mod, struct dyn_ftrace *rec)
 {
 #ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE
-   static int ool_stub_text_end_index, ool_stub_inittext_index;
+   static int ool_stub_text_index, ool_stub_text_end_index, 
ool_stub_inittext_index;
int ret = 0, ool_stub_count, *ool_stub_index;
ppc_inst_t inst;
/*
@@ -191,9 +191,22 @@ static int ftrace_init_ool_stub(struct module *mod, struct 
dyn_ftrace *rec)
ool_stub_index = &ool_stub_inittext_index;
ool_stub_count = ftrace_ool_stub_inittext_count;
} else if (is_kernel_text(rec->ip)) {
-   ool_stub = ftrace_ool_stub_text_end;
-   ool_stub_index = &ool_stub_text_end_index;
-   ool_stub_count = ftrace_ool_stub_text_end_count;
+   /*
+* ftrace records are sorted, so we first use up the stub area 
within .text
+* (ftrace_ool_stub_text) before using the area at the end of 
.text
+* (ftrace_ool_stub_text_end), un

[RFC PATCH v4 12/17] powerpc64/ftrace: Move ftrace sequence out of line

2024-07-14 Thread Naveen N Rao

Function profile sequence on powerpc includes two instructions at the
beginning of each function:
mflrr0
bl  ftrace_caller

The call to ftrace_caller() gets nop'ed out during kernel boot and is
patched in when ftrace is enabled.

Given the sequence, we cannot return from ftrace_caller with 'blr' as we
need to keep LR and r0 intact. This results in link stack (return
address predictor) imbalance when ftrace is enabled. To address that, we
would like to use a three instruction sequence:
mflrr0
bl  ftrace_caller
mtlrr0

Further more, to support DYNAMIC_FTRACE_WITH_CALL_OPS, we need to
reserve two instruction slots before the function. This results in a
total of five instruction slots to be reserved for ftrace use on each
function that is traced.

Move the function profile sequence out-of-line to minimize its impact.
To do this, we reserve a single nop at function entry using
-fpatchable-function-entry=1 and add a pass on vmlinux.o to determine
the total number of functions that can be traced. This is then used to
generate a .S file reserving the appropriate amount of space for use as
ftrace stubs, which is built and linked into vmlinux.

On bootup, the stub space is split into separate stubs per function and
populated with the proper instruction sequence. A pointer to the
associated stub is maintained in dyn_arch_ftrace.

For modules, space for ftrace stubs is reserved from the generic module
stub space.

This is restricted to and enabled by default only on 64-bit powerpc,
though there are some changes to accommodate 32-bit powerpc. This is
done so that 32-bit powerpc could choose to opt into this based on
further tests and benchmarks.

As an example, after this patch, kernel functions will have a single nop
at function entry:
:
addis   r2,r12,467
addir2,r2,-16028
nop
mfocrf  r11,8
...

When ftrace is enabled, the nop is converted to an unconditional branch
to the stub associated with that function:
:
addis   r2,r12,467
addir2,r2,-16028
b   ftrace_ool_stub_text_end+0x11b28
mfocrf  r11,8
...

The associated stub:
:
mflrr0
bl  ftrace_caller
mtlrr0
b   kernel_clone+0xc
...

Signed-off-by: Naveen N Rao 
---
 arch/powerpc/Kconfig   |   5 +
 arch/powerpc/Makefile  |   4 +
 arch/powerpc/include/asm/ftrace.h  |  11 ++
 arch/powerpc/include/asm/module.h  |   5 +
 arch/powerpc/kernel/asm-offsets.c  |   4 +
 arch/powerpc/kernel/module_64.c|  58 +++-
 arch/powerpc/kernel/trace/ftrace.c | 157 +++--
 arch/powerpc/kernel/trace/ftrace_entry.S   | 116 +++
 arch/powerpc/tools/Makefile|  10 ++
 arch/powerpc/tools/ftrace-gen-ool-stubs.sh |  48 +++
 10 files changed, 381 insertions(+), 37 deletions(-)
 create mode 100644 arch/powerpc/tools/Makefile
 create mode 100755 arch/powerpc/tools/ftrace-gen-ool-stubs.sh

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 68f0e7a5576f..f50cfd15bb73 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -568,6 +568,11 @@ config ARCH_USING_PATCHABLE_FUNCTION_ENTRY
def_bool 
$(success,$(srctree)/arch/powerpc/tools/gcc-check-fpatchable-function-entry.sh 
$(CC) -mlittle-endian) if PPC64 && CPU_LITTLE_ENDIAN
def_bool 
$(success,$(srctree)/arch/powerpc/tools/gcc-check-fpatchable-function-entry.sh 
$(CC) -mbig-endian) if PPC64 && CPU_BIG_ENDIAN
 
+config PPC_FTRACE_OUT_OF_LINE
+   def_bool PPC64 && ARCH_USING_PATCHABLE_FUNCTION_ENTRY
+   depends on PPC64
+   select ARCH_WANTS_PRE_LINK_VMLINUX
+
 config HOTPLUG_CPU
bool "Support for enabling/disabling CPUs"
depends on SMP && (PPC_PSERIES || \
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index bbfe4a1f06ef..c973e6cd1ae8 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -155,7 +155,11 @@ CC_FLAGS_NO_FPU:= $(call 
cc-option,-msoft-float)
 ifdef CONFIG_FUNCTION_TRACER
 ifdef CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY
 KBUILD_CPPFLAGS+= -DCC_USING_PATCHABLE_FUNCTION_ENTRY
+ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE
+CC_FLAGS_FTRACE := -fpatchable-function-entry=1
+else
 CC_FLAGS_FTRACE := -fpatchable-function-entry=2
+endif
 else
 CC_FLAGS_FTRACE := -pg
 ifdef CONFIG_MPROFILE_KERNEL
diff --git a/arch/powerpc/include/asm/ftrace.h 
b/arch/powerpc/include/asm/ftrace.h
index 201f9d15430a..0589bb252de7 100644
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -26,6 +26,10 @@ unsigned long prepare_ftrace_return(unsigned long parent, 
unsigned long ip,
 struct module;
 struct dyn_ftrace;
 struct dyn_arch_ftrace {
+#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE
+   /* pointer to the associated out-of-line stub

[RFC PATCH v4 11/17] kbuild: Add generic hook for architectures to use before the final vmlinux link

2024-07-14 Thread Naveen N Rao

On powerpc, we would like to be able to make a pass on vmlinux.o and
generate a new object file to be linked into vmlinux. Add a generic pass
in Makefile.vmlinux that architectures can use for this purpose.

Architectures need to select CONFIG_ARCH_WANTS_PRE_LINK_VMLINUX and must
provide arch//tools/Makefile with .arch.vmlinux.o target, which
will be invoked prior to the final vmlinux link step.

Signed-off-by: Naveen N Rao 
---
 arch/Kconfig |  6 ++
 scripts/Makefile.vmlinux |  8 
 scripts/link-vmlinux.sh  | 11 ---
 3 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 975dd22a2dbd..ef868ff8156a 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -1643,4 +1643,10 @@ config CC_HAS_SANE_FUNCTION_ALIGNMENT
 config ARCH_NEED_CMPXCHG_1_EMU
bool
 
+config ARCH_WANTS_PRE_LINK_VMLINUX
+   def_bool n
+   help
+ An architecture can select this if it provides 
arch//tools/Makefile
+ with .arch.vmlinux.o target to be linked into vmlinux.
+
 endmenu
diff --git a/scripts/Makefile.vmlinux b/scripts/Makefile.vmlinux
index 49946cb96844..6410e0be7f52 100644
--- a/scripts/Makefile.vmlinux
+++ b/scripts/Makefile.vmlinux
@@ -22,6 +22,14 @@ targets += .vmlinux.export.o
 vmlinux: .vmlinux.export.o
 endif
 
+ifdef CONFIG_ARCH_WANTS_PRE_LINK_VMLINUX
+targets += .arch.vmlinux.o
+.arch.vmlinux.o: vmlinux.o FORCE
+   $(Q)$(MAKE) $(build)=arch/$(SRCARCH)/tools .arch.vmlinux.o
+
+vmlinux: .arch.vmlinux.o
+endif
+
 ARCH_POSTLINK := $(wildcard $(srctree)/arch/$(SRCARCH)/Makefile.postlink)
 
 # Final link of vmlinux with optional arch pass after final link
diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
index 518c70b8db50..aafaed1412ea 100755
--- a/scripts/link-vmlinux.sh
+++ b/scripts/link-vmlinux.sh
@@ -122,7 +122,7 @@ gen_btf()
return 1
fi
 
-   vmlinux_link ${1}
+   vmlinux_link ${1} ${arch_vmlinux_o}
 
info "BTF" ${2}
LLVM_OBJCOPY="${OBJCOPY}" ${PAHOLE} -J ${PAHOLE_FLAGS} ${1}
@@ -178,7 +178,7 @@ kallsyms_step()
kallsymso=${kallsyms_vmlinux}.o
kallsyms_S=${kallsyms_vmlinux}.S
 
-   vmlinux_link ${kallsyms_vmlinux} "${kallsymso_prev}" 
${btf_vmlinux_bin_o}
+   vmlinux_link ${kallsyms_vmlinux} "${kallsymso_prev}" 
${btf_vmlinux_bin_o} ${arch_vmlinux_o}
mksysmap ${kallsyms_vmlinux} ${kallsyms_vmlinux}.syms
kallsyms ${kallsyms_vmlinux}.syms ${kallsyms_S}
 
@@ -223,6 +223,11 @@ fi
 
 ${MAKE} -f "${srctree}/scripts/Makefile.build" obj=init 
init/version-timestamp.o
 
+arch_vmlinux_o=""
+if is_enabled CONFIG_ARCH_WANTS_PRE_LINK_VMLINUX; then
+   arch_vmlinux_o=.arch.vmlinux.o
+fi
+
 btf_vmlinux_bin_o=""
 if is_enabled CONFIG_DEBUG_INFO_BTF; then
btf_vmlinux_bin_o=.btf.vmlinux.bin.o
@@ -273,7 +278,7 @@ if is_enabled CONFIG_KALLSYMS; then
fi
 fi
 
-vmlinux_link vmlinux "${kallsymso}" ${btf_vmlinux_bin_o}
+vmlinux_link vmlinux "${kallsymso}" ${btf_vmlinux_bin_o} ${arch_vmlinux_o}
 
 # fill in BTF IDs
 if is_enabled CONFIG_DEBUG_INFO_BTF && is_enabled CONFIG_BPF; then
-- 
2.45.2

[RFC PATCH v4 00/17] powerpc: Core ftrace rework, support for ftrace direct and bpf trampolines

2024-07-14 Thread Naveen N Rao

This is v4 of the series posted here:
http://lkml.kernel.org/r/cover.1718908016.git.nav...@kernel.org

This series reworks core ftrace support on powerpc to have the function 
profiling sequence moved out of line. This enables us to have a single 
nop at kernel function entry virtually eliminating effect of the 
function tracer when it is not enabled. The function profile sequence is 
moved out of line and is allocated at two separate places depending on a 
new config option.

For 64-bit powerpc, the function profiling sequence is also updated to 
include an additional instruction 'mtlr r0' after the usual 
two-instruction sequence to fix link stack imbalance (return address 
predictor) when ftrace is enabled. This showed an improvement of ~22% in 
null_syscall benchmark on a Power 10 system with ftrace enabled.

Finally, support for ftrace direct calls is added based on support for
DYNAMIC_FTRACE_WITH_CALL_OPS. BPF Trampoline support is added atop this.

Support for ftrace direct calls is added for 32-bit powerpc. There is 
some code to enable bpf trampolines for 32-bit powerpc, but it is not 
complete and will need to be pursued separately.

This is marked RFC so that this can get more testing. Patches 1 to 10 
are independent of this series and can go in separately though. Rest of 
the patches depend on the series from Benjamin Gray adding support for 
patch_uint() and patch_ulong():
http://lkml.kernel.org/r/20240515024445.236364-1-bg...@linux.ibm.com


Changelog v4:
- Patches 1, 10 and 13 are new.
- Address review comments from Nick. Numerous changes throughout the 
  patch series.
- Extend support for ftrace ool to vmlinux text up to 64MB (patch 13).
- Address remaining TODOs in support for BPF Trampolines.
- Update synchronization when patching instructions during trampoline 
  attach/detach.


- Naveen


Naveen N Rao (17):
  powerpc/trace: Account for -fpatchable-function-entry support by
toolchain
  powerpc/kprobes: Use ftrace to determine if a probe is at function
entry
  powerpc64/ftrace: Nop out additional 'std' instruction emitted by gcc
v5.x
  powerpc32/ftrace: Unify 32-bit and 64-bit ftrace entry code
  powerpc/module_64: Convert #ifdef to IS_ENABLED()
  powerpc/ftrace: Remove pointer to struct module from dyn_arch_ftrace
  powerpc/ftrace: Skip instruction patching if the instructions are the
same
  powerpc/ftrace: Move ftrace stub used for init text before _einittext
  powerpc64/bpf: Fold bpf_jit_emit_func_call_hlp() into
bpf_jit_emit_func_call_rel()
  powerpc/ftrace: Add a postlink script to validate function tracer
  kbuild: Add generic hook for architectures to use before the final
vmlinux link
  powerpc64/ftrace: Move ftrace sequence out of line
  powerpc64/ftrace: Support .text larger than 32MB with out-of-line
stubs
  powerpc/ftrace: Add support for DYNAMIC_FTRACE_WITH_CALL_OPS
  powerpc/ftrace: Add support for DYNAMIC_FTRACE_WITH_DIRECT_CALLS
  samples/ftrace: Add support for ftrace direct samples on powerpc
  powerpc64/bpf: Add support for bpf trampolines

 arch/Kconfig|   6 +
 arch/powerpc/Kconfig|  23 +-
 arch/powerpc/Makefile   |   8 +
 arch/powerpc/Makefile.postlink  |   8 +
 arch/powerpc/include/asm/ftrace.h   |  32 +-
 arch/powerpc/include/asm/module.h   |   5 +
 arch/powerpc/include/asm/ppc-opcode.h   |  14 +
 arch/powerpc/kernel/asm-offsets.c   |  11 +
 arch/powerpc/kernel/kprobes.c   |  18 +-
 arch/powerpc/kernel/module_64.c |  66 +-
 arch/powerpc/kernel/trace/Makefile  |  11 +-
 arch/powerpc/kernel/trace/ftrace.c  | 295 ++-
 arch/powerpc/kernel/trace/ftrace_64_pg.c|  69 +-
 arch/powerpc/kernel/trace/ftrace_entry.S| 246 --
 arch/powerpc/kernel/vmlinux.lds.S   |   3 +-
 arch/powerpc/net/bpf_jit.h  |  12 +
 arch/powerpc/net/bpf_jit_comp.c | 842 +++-
 arch/powerpc/net/bpf_jit_comp32.c   |   7 +-
 arch/powerpc/net/bpf_jit_comp64.c   |  68 +-
 arch/powerpc/tools/Makefile |  10 +
 arch/powerpc/tools/ftrace-gen-ool-stubs.sh  |  52 ++
 arch/powerpc/tools/ftrace_check.sh  |  45 ++
 samples/ftrace/ftrace-direct-modify.c   |  85 +-
 samples/ftrace/ftrace-direct-multi-modify.c | 101 ++-
 samples/ftrace/ftrace-direct-multi.c|  79 +-
 samples/ftrace/ftrace-direct-too.c  |  83 +-
 samples/ftrace/ftrace-direct.c  |  69 +-
 scripts/Makefile.vmlinux|   8 +
 scripts/link-vmlinux.sh |  11 +-
 29 files changed, 2083 insertions(+), 204 deletions(-)
 create mode 100644 arch/powerpc/tools/Makefile
 create mode 100755 arch/powerpc/tools/ftrace-gen-ool-stubs.sh
 create mode 100755 arch/powerpc/tools/ftrace_check.sh


base-commit: 582b0e554593e530b1386eacafee6c412c5673cc
prerequisite-patch-id: a1d50e589288239d5a8b

[PATCH v4] trace/kprobe: Display the actual notrace function when rejecting a probe

2023-12-13 Thread Naveen N Rao

Trying to probe update_sd_lb_stats() using perf results in the below
message in the kernel log:
trace_kprobe: Could not probe notrace function _text

This is because 'perf probe' specifies the kprobe location as an offset
from '_text':
$ sudo perf probe -D update_sd_lb_stats
p:probe/update_sd_lb_stats _text+1830728

However, the error message is misleading and doesn't help convey the
actual notrace function that is being probed. Fix this by looking up the
actual function name that is being probed. With this fix, we now get the
below message in the kernel log:
trace_kprobe: Could not probe notrace function 
update_sd_lb_stats.constprop.0

Signed-off-by: Naveen N Rao 
---
v4: Use printk format specifier %ps with probe address to lookup the 
symbol, as suggested by Masami.

 kernel/trace/trace_kprobe.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 3d7a180a8427..0017404d6e8d 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -487,8 +487,8 @@ static int __register_trace_kprobe(struct trace_kprobe *tk)
return -EINVAL;
 
if (within_notrace_func(tk)) {
-   pr_warn("Could not probe notrace function %s\n",
-   trace_kprobe_symbol(tk));
+   pr_warn("Could not probe notrace function %ps\n",
+   (void *)trace_kprobe_address(tk));
return -EINVAL;
}
 

base-commit: 4758560fa268cecfa1144f015aa9f2525d164b7e
-- 
2.43.0

Re: [PATCH v3] trace/kprobe: Display the actual notrace function when rejecting a probe

2023-12-13 Thread Naveen N Rao

On Thu, Dec 14, 2023 at 08:02:10AM +0900, Masami Hiramatsu wrote:
> On Wed, 13 Dec 2023 20:09:14 +0530
> Naveen N Rao  wrote:
> 
> > Trying to probe update_sd_lb_stats() using perf results in the below
> > message in the kernel log:
> >   trace_kprobe: Could not probe notrace function _text
> > 
> > This is because 'perf probe' specifies the kprobe location as an offset
> > from '_text':
> >   $ sudo perf probe -D update_sd_lb_stats
> >   p:probe/update_sd_lb_stats _text+1830728
> > 
> > However, the error message is misleading and doesn't help convey the
> > actual notrace function that is being probed. Fix this by looking up the
> > actual function name that is being probed. With this fix, we now get the
> > below message in the kernel log:
> >   trace_kprobe: Could not probe notrace function 
> > update_sd_lb_stats.constprop.0
> > 
> > Signed-off-by: Naveen N Rao 
> > ---
> > v3: Remove tk parameter from within_notrace_func() as suggested by 
> > Masami
> > 
> >  kernel/trace/trace_kprobe.c | 11 ++-
> >  1 file changed, 6 insertions(+), 5 deletions(-)
> > 
> > diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
> > index 3d7a180a8427..dc36c6ed6131 100644
> > --- a/kernel/trace/trace_kprobe.c
> > +++ b/kernel/trace/trace_kprobe.c
> > @@ -449,9 +449,8 @@ static bool __within_notrace_func(unsigned long addr)
> > return !ftrace_location_range(addr, addr + size - 1);
> >  }
> >  
> > -static bool within_notrace_func(struct trace_kprobe *tk)
> > +static bool within_notrace_func(unsigned long addr)
> >  {
> > -   unsigned long addr = trace_kprobe_address(tk);
> > char symname[KSYM_NAME_LEN], *p;
> >  
> > if (!__within_notrace_func(addr))
> > @@ -471,12 +470,14 @@ static bool within_notrace_func(struct trace_kprobe 
> > *tk)
> > return true;
> >  }
> >  #else
> > -#define within_notrace_func(tk)(false)
> > +#define within_notrace_func(addr)  (false)
> >  #endif
> >  
> >  /* Internal register function - just handle k*probes and flags */
> >  static int __register_trace_kprobe(struct trace_kprobe *tk)
> >  {
> > +   unsigned long addr = trace_kprobe_address(tk);
> > +   char symname[KSYM_NAME_LEN];
> > int i, ret;
> >  
> > ret = security_locked_down(LOCKDOWN_KPROBES);
> > @@ -486,9 +487,9 @@ static int __register_trace_kprobe(struct trace_kprobe 
> > *tk)
> > if (trace_kprobe_is_registered(tk))
> > return -EINVAL;
> >  
> > -   if (within_notrace_func(tk)) {
> > +   if (within_notrace_func(addr)) {
> > pr_warn("Could not probe notrace function %s\n",
> > -   trace_kprobe_symbol(tk));
> > +   lookup_symbol_name(addr, symname) ? 
> > trace_kprobe_symbol(tk) : symname);
> 
> Can we just use %ps and (void *)trace_kprobe_address(tk) here?
> 
> That will be simpler.

Indeed - that is much simpler. v4 on its way...

Thanks!
- Naveen

[PATCH v3] trace/kprobe: Display the actual notrace function when rejecting a probe

2023-12-13 Thread Naveen N Rao

Trying to probe update_sd_lb_stats() using perf results in the below
message in the kernel log:
  trace_kprobe: Could not probe notrace function _text

This is because 'perf probe' specifies the kprobe location as an offset
from '_text':
  $ sudo perf probe -D update_sd_lb_stats
  p:probe/update_sd_lb_stats _text+1830728

However, the error message is misleading and doesn't help convey the
actual notrace function that is being probed. Fix this by looking up the
actual function name that is being probed. With this fix, we now get the
below message in the kernel log:
  trace_kprobe: Could not probe notrace function update_sd_lb_stats.constprop.0

Signed-off-by: Naveen N Rao 
---
v3: Remove tk parameter from within_notrace_func() as suggested by 
Masami

 kernel/trace/trace_kprobe.c | 11 ++-
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 3d7a180a8427..dc36c6ed6131 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -449,9 +449,8 @@ static bool __within_notrace_func(unsigned long addr)
return !ftrace_location_range(addr, addr + size - 1);
 }
 
-static bool within_notrace_func(struct trace_kprobe *tk)
+static bool within_notrace_func(unsigned long addr)
 {
-   unsigned long addr = trace_kprobe_address(tk);
char symname[KSYM_NAME_LEN], *p;
 
if (!__within_notrace_func(addr))
@@ -471,12 +470,14 @@ static bool within_notrace_func(struct trace_kprobe *tk)
return true;
 }
 #else
-#define within_notrace_func(tk)(false)
+#define within_notrace_func(addr)  (false)
 #endif
 
 /* Internal register function - just handle k*probes and flags */
 static int __register_trace_kprobe(struct trace_kprobe *tk)
 {
+   unsigned long addr = trace_kprobe_address(tk);
+   char symname[KSYM_NAME_LEN];
int i, ret;
 
ret = security_locked_down(LOCKDOWN_KPROBES);
@@ -486,9 +487,9 @@ static int __register_trace_kprobe(struct trace_kprobe *tk)
if (trace_kprobe_is_registered(tk))
return -EINVAL;
 
-   if (within_notrace_func(tk)) {
+   if (within_notrace_func(addr)) {
pr_warn("Could not probe notrace function %s\n",
-   trace_kprobe_symbol(tk));
+   lookup_symbol_name(addr, symname) ? 
trace_kprobe_symbol(tk) : symname);
return -EINVAL;
}
 

base-commit: 4758560fa268cecfa1144f015aa9f2525d164b7e
-- 
2.43.0

Re: [PATCH v1 2/2] powerpc: Enable OPTPROBES on PPC32

2021-04-19 Thread Naveen N. Rao


Christophe Leroy wrote:

For that, create a 32 bits version of patch_imm64_load_insns()
and create a patch_imm_load_insns() which calls
patch_imm32_load_insns() on PPC32 and patch_imm64_load_insns()
on PPC64.

Adapt optprobes_head.S for PPC32. Use PPC_LL/PPC_STL macros instead
of raw ld/std, opt out things linked to paca and use stmw/lmw to
save/restore registers.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/Kconfig |  2 +-
 arch/powerpc/kernel/optprobes.c  | 24 +--
 arch/powerpc/kernel/optprobes_head.S | 46 +++-
 3 files changed, 53 insertions(+), 19 deletions(-)


Thanks for adding support for ppc32. It is good to see that it works 
without too many changes.




diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index c1344c05226c..49b538e54efb 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -227,7 +227,7 @@ config PPC
select HAVE_MOD_ARCH_SPECIFIC
select HAVE_NMI if PERF_EVENTS || (PPC64 && 
PPC_BOOK3S)
select HAVE_HARDLOCKUP_DETECTOR_ARCHif PPC64 && PPC_BOOK3S && SMP
-   select HAVE_OPTPROBES   if PPC64
+   select HAVE_OPTPROBES
select HAVE_PERF_EVENTS
select HAVE_PERF_EVENTS_NMI if PPC64
select HAVE_HARDLOCKUP_DETECTOR_PERFif PERF_EVENTS && HAVE_PERF_EVENTS_NMI 
&& !HAVE_HARDLOCKUP_DETECTOR_ARCH
diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c
index 58fdb9f66e0f..cdf87086fa33 100644
--- a/arch/powerpc/kernel/optprobes.c
+++ b/arch/powerpc/kernel/optprobes.c
@@ -141,11 +141,21 @@ void arch_remove_optimized_kprobe(struct optimized_kprobe 
*op)
}
 }

+static void patch_imm32_load_insns(unsigned long val, int reg, kprobe_opcode_t 
*addr)
+{
+   patch_instruction((struct ppc_inst *)addr,
+ ppc_inst(PPC_RAW_LIS(reg, IMM_H(val;
+   addr++;
+
+   patch_instruction((struct ppc_inst *)addr,
+ ppc_inst(PPC_RAW_ORI(reg, reg, IMM_L(val;
+}
+
 /*
  * Generate instructions to load provided immediate 64-bit value
  * to register 'reg' and patch these instructions at 'addr'.
  */
-static void patch_imm64_load_insns(unsigned long val, int reg, kprobe_opcode_t 
*addr)
+static void patch_imm64_load_insns(unsigned long long val, int reg, 
kprobe_opcode_t *addr)


Do you really need this?


 {
/* lis reg,(op)@highest */
patch_instruction((struct ppc_inst *)addr,
@@ -177,6 +187,14 @@ static void patch_imm64_load_insns(unsigned long val, int 
reg, kprobe_opcode_t *
   ___PPC_RS(reg) | (val & 0x)));
 }

+static void patch_imm_load_insns(unsigned long val, int reg, kprobe_opcode_t 
*addr)
+{
+   if (IS_ENABLED(CONFIG_PPC64))
+   patch_imm64_load_insns(val, reg, addr);
+   else
+   patch_imm32_load_insns(val, reg, addr);
+}
+
 int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe 
*p)
 {
struct ppc_inst branch_op_callback, branch_emulate_step, temp;
@@ -230,7 +248,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe 
*op, struct kprobe *p)
 * Fixup the template with instructions to:
 * 1. load the address of the actual probepoint
 */
-   patch_imm64_load_insns((unsigned long)op, 3, buff + TMPL_OP_IDX);
+   patch_imm_load_insns((unsigned long)op, 3, buff + TMPL_OP_IDX);

/*
 * 2. branch to optimized_callback() and emulate_step()
@@ -264,7 +282,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe 
*op, struct kprobe *p)
 * 3. load instruction to be emulated into relevant register, and
 */
temp = ppc_inst_read((struct ppc_inst *)p->ainsn.insn);
-   patch_imm64_load_insns(ppc_inst_as_ulong(temp), 4, buff + 
TMPL_INSN_IDX);
+   patch_imm_load_insns(ppc_inst_as_ulong(temp), 4, buff + TMPL_INSN_IDX);

/*
 * 4. branch back from trampoline
diff --git a/arch/powerpc/kernel/optprobes_head.S 
b/arch/powerpc/kernel/optprobes_head.S
index ff8ba4d3824d..49f31e554573 100644
--- a/arch/powerpc/kernel/optprobes_head.S
+++ b/arch/powerpc/kernel/optprobes_head.S
@@ -30,39 +30,47 @@ optinsn_slot:
.global optprobe_template_entry
 optprobe_template_entry:
/* Create an in-memory pt_regs */
-   stdur1,-INT_FRAME_SIZE(r1)
+   PPC_STLUr1,-INT_FRAME_SIZE(r1)
SAVE_GPR(0,r1)
/* Save the previous SP into stack */
addir0,r1,INT_FRAME_SIZE
-   std r0,GPR1(r1)
+   PPC_STL r0,GPR1(r1)
+#ifdef CONFIG_PPC64
SAVE_10GPRS(2,r1)
SAVE_10GPRS(12,r1)
SAVE_10GPRS(22,r1)
+#else
+   stmwr2, GPR2(r1)
+#endif
/* Save SPRS */
mfmsr   r5
-   std r5,_MSR(r1)
+   PPC_STL r5,_MSR(r1)
li  r5,0x700
-   std r5,_TRAP(r1)
+   PPC_STL r5,_TRAP(r1)
li  r5,0
-   std r5,ORIG_GPR3(r1)

Re: [PATCH v4] powerpc/uprobes: Validation for prefixed instruction

2021-03-09 Thread Naveen N. Rao

On 2021/03/09 08:54PM, Michael Ellerman wrote:
> Ravi Bangoria  writes:
> > As per ISA 3.1, prefixed instruction should not cross 64-byte
> > boundary. So don't allow Uprobe on such prefixed instruction.
> >
> > There are two ways probed instruction is changed in mapped pages.
> > First, when Uprobe is activated, it searches for all the relevant
> > pages and replace instruction in them. In this case, if that probe
> > is on the 64-byte unaligned prefixed instruction, error out
> > directly. Second, when Uprobe is already active and user maps a
> > relevant page via mmap(), instruction is replaced via mmap() code
> > path. But because Uprobe is invalid, entire mmap() operation can
> > not be stopped. In this case just print an error and continue.
> >
> > Signed-off-by: Ravi Bangoria 
> > Acked-by: Naveen N. Rao 
> 
> Do we have a Fixes: tag for this?

Since this is an additional check we are adding, I don't think we should 
add a Fixes: tag. Nothing is broken per-se -- we're just adding more 
checks to catch simple mistakes. Also, like Oleg pointed out, there are 
still many other ways for users to shoot themselves in the foot with 
uprobes and prefixed instructions, if they so desire.

However, if you still think we should add a Fixes: tag, we can perhaps 
use the below commit since I didn't see any specific commit adding 
support for prefixed instructions for uprobes:

Fixes: 650b55b707fdfa ("powerpc: Add prefixed instructions to 
instruction data type")

> 
> > ---
> > v3: 
> > https://lore.kernel.org/r/20210304050529.59391-1-ravi.bango...@linux.ibm.com
> > v3->v4:
> >   - CONFIG_PPC64 check was not required, remove it.
> >   - Use SZ_ macros instead of hardcoded numbers.
> >
> >  arch/powerpc/kernel/uprobes.c | 7 +++
> >  1 file changed, 7 insertions(+)
> >
> > diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c
> > index e8a63713e655..4cbfff6e94a3 100644
> > --- a/arch/powerpc/kernel/uprobes.c
> > +++ b/arch/powerpc/kernel/uprobes.c
> > @@ -41,6 +41,13 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe,
> > if (addr & 0x03)
> > return -EINVAL;
> >  
> > +   if (cpu_has_feature(CPU_FTR_ARCH_31) &&
> > +   ppc_inst_prefixed(auprobe->insn) &&
> > +   (addr & (SZ_64 - 4)) == SZ_64 - 4) {
> > +   pr_info_ratelimited("Cannot register a uprobe on 64 byte 
> > unaligned prefixed instruction\n");
> > +   return -EINVAL;
> 
> I realise we already did the 0x03 check above, but I still think this
> would be clearer simply as:
> 
>   (addr & 0x3f == 60)

Indeed, I like the use of `60' there -- hex is overrated ;)

- Naveen

Re: [PATCH v3] powerpc/uprobes: Validation for prefixed instruction

2021-03-03 Thread Naveen N. Rao

On 2021/03/04 10:35AM, Ravi Bangoria wrote:
> As per ISA 3.1, prefixed instruction should not cross 64-byte
> boundary. So don't allow Uprobe on such prefixed instruction.
> 
> There are two ways probed instruction is changed in mapped pages.
> First, when Uprobe is activated, it searches for all the relevant
> pages and replace instruction in them. In this case, if that probe
> is on the 64-byte unaligned prefixed instruction, error out
> directly. Second, when Uprobe is already active and user maps a
> relevant page via mmap(), instruction is replaced via mmap() code
> path. But because Uprobe is invalid, entire mmap() operation can
> not be stopped. In this case just print an error and continue.
> 
> Signed-off-by: Ravi Bangoria 
> ---
> v2: 
> https://lore.kernel.org/r/20210204104703.273429-1-ravi.bango...@linux.ibm.com
> v2->v3:
>   - Drop restriction for Uprobe on suffix of prefixed instruction.
> It needs lot of code change including generic code but what
> we get in return is not worth it.
> 
>  arch/powerpc/kernel/uprobes.c | 8 
>  1 file changed, 8 insertions(+)
> 
> diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c
> index e8a63713e655..c400971ebe70 100644
> --- a/arch/powerpc/kernel/uprobes.c
> +++ b/arch/powerpc/kernel/uprobes.c
> @@ -41,6 +41,14 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe,
>   if (addr & 0x03)
>   return -EINVAL;
>  
> + if (!IS_ENABLED(CONFIG_PPC64) || !cpu_has_feature(CPU_FTR_ARCH_31))
> + return 0;

Sorry, I missed this last time, but I think we can drop the above 
checks. ppc_inst_prefixed() already factors in the dependency on 
CONFIG_PPC64, and I don't think we need to confirm if we're running on a 
ISA V3.1 for the below check.

With that:
Acked-by: Naveen N. Rao 

> +
> + if (ppc_inst_prefixed(auprobe->insn) && (addr & 0x3F) == 0x3C) {
> + pr_info_ratelimited("Cannot register a uprobe on 64 byte 
> unaligned prefixed instruction\n");
> + return -EINVAL;
> + }
> +

- Naveen

Re: [PATCH v2] powerpc/uprobes: Validation for prefixed instruction

2021-02-04 Thread Naveen N. Rao

On 2021/02/04 06:38PM, Naveen N. Rao wrote:
> On 2021/02/04 04:17PM, Ravi Bangoria wrote:
> > Don't allow Uprobe on 2nd word of a prefixed instruction. As per
> > ISA 3.1, prefixed instruction should not cross 64-byte boundary.
> > So don't allow Uprobe on such prefixed instruction as well.
> > 
> > There are two ways probed instruction is changed in mapped pages.
> > First, when Uprobe is activated, it searches for all the relevant
> > pages and replace instruction in them. In this case, if we notice
> > that probe is on the 2nd word of prefixed instruction, error out
> > directly. Second, when Uprobe is already active and user maps a
> > relevant page via mmap(), instruction is replaced via mmap() code
> > path. But because Uprobe is invalid, entire mmap() operation can
> > not be stopped. In this case just print an error and continue.
> > 
> > Signed-off-by: Ravi Bangoria 
> > ---
> > v1: 
> > http://lore.kernel.org/r/20210119091234.76317-1-ravi.bango...@linux.ibm.com
> > v1->v2:
> >   - Instead of introducing new arch hook from verify_opcode(), use
> > existing hook arch_uprobe_analyze_insn().
> >   - Add explicit check for prefixed instruction crossing 64-byte
> > boundary. If probe is on such instruction, throw an error.
> > 
> >  arch/powerpc/kernel/uprobes.c | 66 ++-
> >  1 file changed, 65 insertions(+), 1 deletion(-)
> > 
> > diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c
> > index e8a63713e655..485d19a2a31f 100644
> > --- a/arch/powerpc/kernel/uprobes.c
> > +++ b/arch/powerpc/kernel/uprobes.c
> > @@ -7,6 +7,7 @@
> >   * Adapted from the x86 port by Ananth N Mavinakayanahalli 
> > 
> >   */
> >  #include 
> > +#include 
> >  #include 
> >  #include 
> >  #include 
> > @@ -28,6 +29,69 @@ bool is_trap_insn(uprobe_opcode_t *insn)
> > return (is_trap(*insn));
> >  }
> >  
> > +#ifdef CONFIG_PPC64
> > +static int get_instr(struct mm_struct *mm, unsigned long addr, u32 *instr)
> > +{
> > +   struct page *page;
> > +   struct vm_area_struct *vma;
> > +   void *kaddr;
> > +   unsigned int gup_flags = FOLL_FORCE | FOLL_SPLIT_PMD;
> > +
> > +   if (get_user_pages_remote(mm, addr, 1, gup_flags, &page, &vma, NULL) <= 
> > 0)
> > +   return -EINVAL;
> > +
> > +   kaddr = kmap_atomic(page);
> > +   *instr = *((u32 *)(kaddr + (addr & ~PAGE_MASK)));
> > +   kunmap_atomic(kaddr);
> > +   put_page(page);
> > +   return 0;
> > +}
> > +
> > +static int validate_prefixed_instr(struct mm_struct *mm, unsigned long 
> > addr)
> > +{
> > +   struct ppc_inst inst;
> > +   u32 prefix, suffix;
> > +
> > +   /*
> > +* No need to check if addr is pointing to beginning of the
> > +* page. Even if probe is on a suffix of page-unaligned
> > +* prefixed instruction, hw will raise exception and kernel
> > +* will send SIGBUS.
> > +*/
> > +   if (!(addr & ~PAGE_MASK))
> > +   return 0;
> > +
> > +   if (get_instr(mm, addr, &prefix) < 0)
> > +   return -EINVAL;
> > +   if (get_instr(mm, addr + 4, &suffix) < 0)
> > +   return -EINVAL;
> > +
> > +   inst = ppc_inst_prefix(prefix, suffix);
> > +   if (ppc_inst_prefixed(inst) && (addr & 0x3F) == 0x3C) {
> > +   printk_ratelimited("Cannot register a uprobe on 64 byte "
>   ^^ pr_info_ratelimited()
> 
> It should be sufficient to check the primary opcode to determine if it 
> is a prefixed instruction. You don't have to read the suffix. I see that 
> we don't have a helper to do this currently, so you could do:
> 
>   if (ppc_inst_primary_opcode(ppc_inst(prefix)) == 1)

Seeing the kprobes code, I realized that we have to check for another 
scenario (Thanks, Jordan!). If this is the suffix of a prefix 
instruction for which a uprobe has already been installed, then the 
previous word will be a 'trap' instruction. You need to check if there 
is a uprobe at the previous word, and if the original instruction there 
was a prefix instruction.

- Naveen

Re: [PATCH v2] powerpc/uprobes: Validation for prefixed instruction

2021-02-04 Thread Naveen N. Rao

On 2021/02/04 04:19PM, Ravi Bangoria wrote:
> 
> 
> On 2/4/21 4:17 PM, Ravi Bangoria wrote:
> > Don't allow Uprobe on 2nd word of a prefixed instruction. As per
> > ISA 3.1, prefixed instruction should not cross 64-byte boundary.
> > So don't allow Uprobe on such prefixed instruction as well.
> > 
> > There are two ways probed instruction is changed in mapped pages.
> > First, when Uprobe is activated, it searches for all the relevant
> > pages and replace instruction in them. In this case, if we notice
> > that probe is on the 2nd word of prefixed instruction, error out
> > directly. Second, when Uprobe is already active and user maps a
> > relevant page via mmap(), instruction is replaced via mmap() code
> > path. But because Uprobe is invalid, entire mmap() operation can
> > not be stopped. In this case just print an error and continue.
> 
> @mpe,
> 
> arch_uprobe_analyze_insn() can return early if
> cpu_has_feature(CPU_FTR_ARCH_31) is not set. But that will
> miss out a rare scenario of user running binary with prefixed
> instruction on p10 predecessors. Please let me know if I
> should add cpu_has_feature(CPU_FTR_ARCH_31) or not.

The check you are adding is very specific to prefixed instructions, so 
it makes sense to add a cpu feature check for v3.1.

On older processors, those are invalid instructions like any other. The 
instruction emulation infrastructure will refuse to emulate it and the 
instruction will be single stepped.

- Naveen

Re: [PATCH v2] powerpc/uprobes: Validation for prefixed instruction

2021-02-04 Thread Naveen N. Rao

On 2021/02/04 04:17PM, Ravi Bangoria wrote:
> Don't allow Uprobe on 2nd word of a prefixed instruction. As per
> ISA 3.1, prefixed instruction should not cross 64-byte boundary.
> So don't allow Uprobe on such prefixed instruction as well.
> 
> There are two ways probed instruction is changed in mapped pages.
> First, when Uprobe is activated, it searches for all the relevant
> pages and replace instruction in them. In this case, if we notice
> that probe is on the 2nd word of prefixed instruction, error out
> directly. Second, when Uprobe is already active and user maps a
> relevant page via mmap(), instruction is replaced via mmap() code
> path. But because Uprobe is invalid, entire mmap() operation can
> not be stopped. In this case just print an error and continue.
> 
> Signed-off-by: Ravi Bangoria 
> ---
> v1: 
> http://lore.kernel.org/r/20210119091234.76317-1-ravi.bango...@linux.ibm.com
> v1->v2:
>   - Instead of introducing new arch hook from verify_opcode(), use
> existing hook arch_uprobe_analyze_insn().
>   - Add explicit check for prefixed instruction crossing 64-byte
> boundary. If probe is on such instruction, throw an error.
> 
>  arch/powerpc/kernel/uprobes.c | 66 ++-
>  1 file changed, 65 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/powerpc/kernel/uprobes.c b/arch/powerpc/kernel/uprobes.c
> index e8a63713e655..485d19a2a31f 100644
> --- a/arch/powerpc/kernel/uprobes.c
> +++ b/arch/powerpc/kernel/uprobes.c
> @@ -7,6 +7,7 @@
>   * Adapted from the x86 port by Ananth N Mavinakayanahalli 
> 
>   */
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -28,6 +29,69 @@ bool is_trap_insn(uprobe_opcode_t *insn)
>   return (is_trap(*insn));
>  }
>  
> +#ifdef CONFIG_PPC64
> +static int get_instr(struct mm_struct *mm, unsigned long addr, u32 *instr)
> +{
> + struct page *page;
> + struct vm_area_struct *vma;
> + void *kaddr;
> + unsigned int gup_flags = FOLL_FORCE | FOLL_SPLIT_PMD;
> +
> + if (get_user_pages_remote(mm, addr, 1, gup_flags, &page, &vma, NULL) <= 
> 0)
> + return -EINVAL;
> +
> + kaddr = kmap_atomic(page);
> + *instr = *((u32 *)(kaddr + (addr & ~PAGE_MASK)));
> + kunmap_atomic(kaddr);
> + put_page(page);
> + return 0;
> +}
> +
> +static int validate_prefixed_instr(struct mm_struct *mm, unsigned long addr)
> +{
> + struct ppc_inst inst;
> + u32 prefix, suffix;
> +
> + /*
> +  * No need to check if addr is pointing to beginning of the
> +  * page. Even if probe is on a suffix of page-unaligned
> +  * prefixed instruction, hw will raise exception and kernel
> +  * will send SIGBUS.
> +  */
> + if (!(addr & ~PAGE_MASK))
> + return 0;
> +
> + if (get_instr(mm, addr, &prefix) < 0)
> + return -EINVAL;
> + if (get_instr(mm, addr + 4, &suffix) < 0)
> + return -EINVAL;
> +
> + inst = ppc_inst_prefix(prefix, suffix);
> + if (ppc_inst_prefixed(inst) && (addr & 0x3F) == 0x3C) {
> + printk_ratelimited("Cannot register a uprobe on 64 byte "
^^ pr_info_ratelimited()

It should be sufficient to check the primary opcode to determine if it 
is a prefixed instruction. You don't have to read the suffix. I see that 
we don't have a helper to do this currently, so you could do:

if (ppc_inst_primary_opcode(ppc_inst(prefix)) == 1)

In the future, it might be worthwhile to add IS_PREFIX() as a macro 
similar to IS_MTMSRD() if there are more such uses.

Along with this, if you also add the below to the start of this 
function, you can get rid of the #ifdef:

if (!IS_ENABLED(CONFIG_PPC64))
return 0;


- Naveen

Re: [PATCH] kprobes: Warn if the kprobe is reregistered

2021-02-03 Thread Naveen N . Rao

On 2021/02/03 11:59PM, Masami Hiramatsu wrote:
> Warn if the kprobe is reregistered, since there must be
> a software bug (actively used resource must not be re-registered)
> and caller must be fixed.
> 
> Signed-off-by: Masami Hiramatsu 
> ---
>  kernel/kprobes.c |   13 -
>  1 file changed, 8 insertions(+), 5 deletions(-)

Suggested-by: Naveen N. Rao 
Acked-by: Naveen N. Rao 


Thanks,
Naveen

Re: [PATCH] selftests/ftrace: Disable multiple_kprobes test on powerpc

2021-01-05 Thread Naveen N. Rao

Masami Hiramatsu wrote:

On Tue, 5 Jan 2021 19:01:56 +0900
Masami Hiramatsu  wrote:

On Tue,  5 Jan 2021 12:27:30 +0530
"Naveen N. Rao"  wrote:

> Not all symbols are blacklisted on powerpc. Disable multiple_kprobes
> test until that is sorted, so that rest of ftrace and kprobe selftests
> can be run.

This looks good to me, but could you try to find the functions
which should be blocked from kprobes?
(Usually, the function which are involved in the sw-breakpoint
 handling, including locks etc.)

Ah, OK. 
I wonder why CONFIG_KPROBE_EVENTS_ON_NOTRACE=n doesn't help, it

was ignored if the arch doesn't support CONFIG_KPROBES_ON_FTRACE.

Good point, though we do support CONFIG_KPROBES_ON_FTRACE on powerpc so 
the below patch is unlikely to help. However, since entry code is 
unlikely to be the source of the issue due to 
CONFIG_KPROBE_EVENTS_ON_NOTRACE, I will take another look to see where 
the problem lies.

Naveen, could you try to run this test case with following patch
on powerpc?

diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index b911e9f6d9f5..241a55313476 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -433,7 +433,7 @@ static int disable_trace_kprobe(struct trace_event_call 
*call,
return 0;
 }

-#if defined(CONFIG_KPROBES_ON_FTRACE) && \
+#if defined(CONFIG_FUNCTION_TRACER) && \
!defined(CONFIG_KPROBE_EVENTS_ON_NOTRACE)
 static bool __within_notrace_func(unsigned long addr)
 {

This looks like a good change regardless, so if you intend to post this 
separately:

Acked-by: Naveen N. Rao 

Thanks,
Naveen

Re: [PATCH] selftests/ftrace: Disable multiple_kprobes test on powerpc

2021-01-05 Thread Naveen N. Rao


Masami Hiramatsu wrote:

On Tue,  5 Jan 2021 12:27:30 +0530
"Naveen N. Rao"  wrote:


Not all symbols are blacklisted on powerpc. Disable multiple_kprobes
test until that is sorted, so that rest of ftrace and kprobe selftests
can be run.


This looks good to me, but could you try to find the functions
which should be blocked from kprobes?
(Usually, the function which are involved in the sw-breakpoint
 handling, including locks etc.)


Yes, we did add several blacklists some time back, but there has been 
quite a bit of churn in our entry code. I've been meaning to audit it 
for a while now, but this has been blocking tests. It would be nice to 
skip this test for now until I am able to spend some time on this.


Thanks,
Naveen

[PATCH] selftests/ftrace: Disable multiple_kprobes test on powerpc

2021-01-04 Thread Naveen N. Rao

Not all symbols are blacklisted on powerpc. Disable multiple_kprobes
test until that is sorted, so that rest of ftrace and kprobe selftests
can be run.

Signed-off-by: Naveen N. Rao 
---
 .../testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc 
b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
index 312d237800969e..41503c32f53eed 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
@@ -7,7 +7,7 @@
 # Currently HAVE_KPROBES_ON_FTRACE defined on x86 and powerpc64le
 case `uname -m` in
   x86_64|i[3456]86) OFFS=5;;
-  ppc64le) OFFS=8;;
+  ppc*) exit_unsupported;;
   *) OFFS=0;;
 esac
 

base-commit: 36bbbd0e234d817938bdc52121a0f5473b3e58f5
-- 
2.25.4

Re: [PATCH 3/4] perf tools: Update powerpc's syscall.tbl

2020-12-23 Thread Naveen N. Rao


Arnaldo Carvalho de Melo wrote:

Em Fri, Dec 18, 2020 at 08:08:56PM +0530, Naveen N. Rao escreveu:

Hi Arnaldo,

Arnaldo Carvalho de Melo wrote:
> Em Fri, Dec 18, 2020 at 08:26:59AM -0300, Arnaldo Carvalho de Melo escreveu:
> > Em Fri, Dec 18, 2020 at 03:59:23PM +0800, Tiezhu Yang escreveu:
> > > This silences the following tools/perf/ build warning:
> > > Warning: Kernel ABI header at 
'tools/perf/arch/powerpc/entry/syscalls/syscall.tbl' differs from latest version at 
'arch/powerpc/kernel/syscalls/syscall.tbl'
> > 
> > Hi Ravi, Naveen,
> > 
> > 	Can I get your Reviewed-by or Acked-by for this change and the

> > other that adds s390's syscall.tbl to check_headers.sh so that we get
> 
> oops s/s390/powerpc/g :-)
> 
> > notified when the copy drifts, so that we can see if it still continues

> > working and we can get new syscalls to be supported in things like 'perf
> > trace'?

Yes, this looks good to me:
Reviewed-by: Naveen N. Rao 

FWIW, I had posted a similar patch back in April, but glad to have this go
in ;)
http://lkml.kernel.org/r/20200220063740.785913-1-naveen.n@linux.vnet.ibm.com


My bad :-\


No worries, thanks for checking on this one.

- Naveen

Re: [PATCH 3/4] perf tools: Update powerpc's syscall.tbl

2020-12-18 Thread Naveen N. Rao


Hi Arnaldo,

Arnaldo Carvalho de Melo wrote:

Em Fri, Dec 18, 2020 at 08:26:59AM -0300, Arnaldo Carvalho de Melo escreveu:

Em Fri, Dec 18, 2020 at 03:59:23PM +0800, Tiezhu Yang escreveu:
> This silences the following tools/perf/ build warning:
> Warning: Kernel ABI header at 
'tools/perf/arch/powerpc/entry/syscalls/syscall.tbl' differs from latest version 
at 'arch/powerpc/kernel/syscalls/syscall.tbl'

Hi Ravi, Naveen,

Can I get your Reviewed-by or Acked-by for this change and the
other that adds s390's syscall.tbl to check_headers.sh so that we get


oops s/s390/powerpc/g :-)


notified when the copy drifts, so that we can see if it still continues
working and we can get new syscalls to be supported in things like 'perf
trace'?


Yes, this looks good to me:
Reviewed-by: Naveen N. Rao 

FWIW, I had posted a similar patch back in April, but glad to have this 
go in ;)

http://lkml.kernel.org/r/20200220063740.785913-1-naveen.n@linux.vnet.ibm.com


Thanks,
Naveen

Re: [RFC PATCH 01/14] ftrace: Fix updating FTRACE_FL_TRAMP

2020-12-01 Thread Naveen N. Rao


Steven Rostedt wrote:

On Thu, 26 Nov 2020 23:38:38 +0530
"Naveen N. Rao"  wrote:


On powerpc, kprobe-direct.tc triggered FTRACE_WARN_ON() in
ftrace_get_addr_new() followed by the below message:
  Bad trampoline accounting at: 4222522f (wake_up_process+0xc/0x20) 
(f001)

The set of steps leading to this involved:
- modprobe ftrace-direct-too
- enable_probe
- modprobe ftrace-direct
- rmmod ftrace-direct <-- trigger

The problem turned out to be that we were not updating flags in the
ftrace record properly. From the above message about the trampoline
accounting being bad, it can be seen that the ftrace record still has
FTRACE_FL_TRAMP set though ftrace-direct module is going away. This
happens because we are checking if any ftrace_ops has the
FTRACE_FL_TRAMP flag set _before_ updating the filter hash.

The fix for this is to look for any _other_ ftrace_ops that also needs
FTRACE_FL_TRAMP.


I'm applying this now and sending this for -rc and stable.

The code worked on x86 because x86 has a way to make all users use
trampolines, so this was never an issue (everything has a trampoline).
I modified the kernel so that x86 would not create its own trampoline
(see the weak function arch_ftrace_update_trampoline(), and I was able
to reproduce the bug.


Good to know that you were able to reproduce this.



I'm adding:

Cc: sta...@vger.kernel.org
Fixes: a124692b698b0 ("ftrace: Enable trampoline when rec count returns back to 
one")


That looks good to me. Thanks for picking the two patches and for your 
review on the others!



- Naveen

[RFC PATCH 09/14] powerpc/ftrace: Use a hash table for tracking ftrace stubs

2020-11-26 Thread Naveen N. Rao

In preparation for having to deal with large number of ftrace stubs in
support of ftrace direct calls, convert existing stubs to use a hash
table. The hash table is key'ed off the target address for the stubs
since there could be multiple stubs for the same target to cover the
full kernel text.

Signed-off-by: Naveen N. Rao 
---
 arch/powerpc/kernel/trace/ftrace.c | 75 +-
 1 file changed, 33 insertions(+), 42 deletions(-)

diff --git a/arch/powerpc/kernel/trace/ftrace.c 
b/arch/powerpc/kernel/trace/ftrace.c
index 14b39f7797d455..7ddb6e4b527c39 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -13,6 +13,7 @@
 
 #define pr_fmt(fmt) "ftrace-powerpc: " fmt
 
+#include 
 #include 
 #include 
 #include 
@@ -32,14 +33,12 @@
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 
-/*
- * We generally only have a single long_branch tramp and at most 2 or 3 plt
- * tramps generated. But, we don't use the plt tramps currently. We also allot
- * 2 tramps after .text and .init.text. So, we only end up with around 3 usable
- * tramps in total. Set aside 8 just to be sure.
- */
-#defineNUM_FTRACE_TRAMPS   8
-static unsigned long ftrace_tramps[NUM_FTRACE_TRAMPS];
+static DEFINE_HASHTABLE(ppc_ftrace_stubs, 8);
+struct ppc_ftrace_stub_data {
+   unsigned long addr;
+   unsigned long target;
+   struct hlist_node hentry;
+};
 
 static struct ppc_inst
 ftrace_call_replace(unsigned long ip, unsigned long addr, int link)
@@ -288,36 +287,31 @@ __ftrace_make_nop(struct module *mod,
 #endif /* PPC64 */
 #endif /* CONFIG_MODULES */
 
-static unsigned long find_ftrace_tramp(unsigned long ip)
+static unsigned long find_ftrace_tramp(unsigned long ip, unsigned long target)
 {
-   int i;
+   struct ppc_ftrace_stub_data *stub;
struct ppc_inst instr;
 
-   /*
-* We have the compiler generated long_branch tramps at the end
-* and we prefer those
-*/
-   for (i = NUM_FTRACE_TRAMPS - 1; i >= 0; i--)
-   if (!ftrace_tramps[i])
-   continue;
-   else if (create_branch(&instr, (void *)ip,
-  ftrace_tramps[i], 0) == 0)
-   return ftrace_tramps[i];
+   hash_for_each_possible(ppc_ftrace_stubs, stub, hentry, target)
+   if (stub->target == target && !create_branch(&instr, (void 
*)ip, stub->addr, 0))
+   return stub->addr;
 
return 0;
 }
 
-static int add_ftrace_tramp(unsigned long tramp)
+static int add_ftrace_tramp(unsigned long tramp, unsigned long target)
 {
-   int i;
+   struct ppc_ftrace_stub_data *stub;
 
-   for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
-   if (!ftrace_tramps[i]) {
-   ftrace_tramps[i] = tramp;
-   return 0;
-   }
+   stub = kmalloc(sizeof(*stub), GFP_KERNEL);
+   if (!stub)
+   return -1;
 
-   return -1;
+   stub->addr = tramp;
+   stub->target = target;
+   hash_add(ppc_ftrace_stubs, &stub->hentry, target);
+
+   return 0;
 }
 
 /*
@@ -328,16 +322,14 @@ static int add_ftrace_tramp(unsigned long tramp)
  */
 static int setup_mcount_compiler_tramp(unsigned long tramp)
 {
-   int i;
struct ppc_inst op;
-   unsigned long ptr;
struct ppc_inst instr;
+   struct ppc_ftrace_stub_data *stub;
+   unsigned long ptr, ftrace_target = ppc_global_function_entry((void 
*)FTRACE_REGS_ADDR);
 
/* Is this a known long jump tramp? */
-   for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
-   if (!ftrace_tramps[i])
-   break;
-   else if (ftrace_tramps[i] == tramp)
+   hash_for_each_possible(ppc_ftrace_stubs, stub, hentry, ftrace_target)
+   if (stub->target == ftrace_target && stub->addr == tramp)
return 0;
 
/* New trampoline -- read where this goes */
@@ -361,19 +353,18 @@ static int setup_mcount_compiler_tramp(unsigned long 
tramp)
}
 
/* Let's re-write the tramp to go to ftrace_[regs_]caller */
-   ptr = ppc_global_function_entry((void *)FTRACE_REGS_ADDR);
-   if (create_branch(&instr, (void *)tramp, ptr, 0)) {
+   if (create_branch(&instr, (void *)tramp, ftrace_target, 0)) {
pr_debug("%ps is not reachable from existing mcount tramp\n",
-   (void *)ptr);
+   (void *)ftrace_target);
return -1;
}
 
-   if (patch_branch((struct ppc_inst *)tramp, ptr, 0)) {
+   if (patch_branch((struct ppc_inst *)tramp, ftrace_target, 0)) {
pr_debug("REL24 out of range!\n");
return -1;
}
 
-   if (add_ftrace_tramp(tramp)) {
+   if (add_ftrace_tramp(tramp, ftrace_target)) {

[RFC PATCH 11/14] powerpc/ftrace: Use GPR save/restore macros in ftrace_graph_caller()

2020-11-26 Thread Naveen N. Rao

Use SAVE_8GPRS(), REST_8GPRS() and _NIP(), along with using the standard
SWITCH_FRAME_SIZE for the stack frame in ftrace_graph_caller() to
simplify code. This increases the stack frame size, but it is unlikely
to be an issue since ftrace_[regs_]caller() have just used a similar
stack frame size, and it isn't evident that the graph caller has too
deep a call stack to cause issues.

Signed-off-by: Naveen N. Rao 
---
 .../powerpc/kernel/trace/ftrace_64_mprofile.S | 28 +--
 1 file changed, 7 insertions(+), 21 deletions(-)

diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S 
b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
index f9fd5f743eba34..bbe871b47ade58 100644
--- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
+++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
@@ -279,24 +279,17 @@ livepatch_handler:
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 _GLOBAL(ftrace_graph_caller)
-   stdur1, -112(r1)
+   stdur1,-SWITCH_FRAME_SIZE(r1)
/* with -mprofile-kernel, parameter regs are still alive at _mcount */
-   std r10, 104(r1)
-   std r9, 96(r1)
-   std r8, 88(r1)
-   std r7, 80(r1)
-   std r6, 72(r1)
-   std r5, 64(r1)
-   std r4, 56(r1)
-   std r3, 48(r1)
+   SAVE_8GPRS(3, r1)
 
/* Save callee's TOC in the ABI compliant location */
std r2, 24(r1)
ld  r2, PACATOC(r13)/* get kernel TOC in r2 */
 
-   addir5, r1, 112
+   addir5, r1, SWITCH_FRAME_SIZE
mfctr   r4  /* ftrace_caller has moved local addr here */
-   std r4, 40(r1)
+   std r4, _NIP(r1)
mflrr3  /* ftrace_caller has restored LR from stack */
subir4, r4, MCOUNT_INSN_SIZE
 
@@ -309,21 +302,14 @@ _GLOBAL(ftrace_graph_caller)
 */
mtlrr3
 
-   ld  r0, 40(r1)
+   ld  r0, _NIP(r1)
mtctr   r0
-   ld  r10, 104(r1)
-   ld  r9, 96(r1)
-   ld  r8, 88(r1)
-   ld  r7, 80(r1)
-   ld  r6, 72(r1)
-   ld  r5, 64(r1)
-   ld  r4, 56(r1)
-   ld  r3, 48(r1)
+   REST_8GPRS(3, r1)
 
/* Restore callee's TOC */
ld  r2, 24(r1)
 
-   addir1, r1, 112
+   addir1, r1, SWITCH_FRAME_SIZE
mflrr0
std r0, LRSAVE(r1)
bctr
-- 
2.25.4

[RFC PATCH 14/14] samples/ftrace: Add powerpc support for ftrace direct samples

2020-11-26 Thread Naveen N. Rao

Add a simple powerpc trampoline to demonstrate use of ftrace direct on
powerpc.

Signed-off-by: Naveen N. Rao 
---
 samples/Kconfig   |  2 +-
 samples/ftrace/ftrace-direct-modify.c | 58 +++
 samples/ftrace/ftrace-direct-too.c| 48 --
 samples/ftrace/ftrace-direct.c| 45 +++--
 4 files changed, 144 insertions(+), 9 deletions(-)

diff --git a/samples/Kconfig b/samples/Kconfig
index 0ed6e4d71d87b1..fdc9e44dba3b95 100644
--- a/samples/Kconfig
+++ b/samples/Kconfig
@@ -26,7 +26,7 @@ config SAMPLE_TRACE_PRINTK
 config SAMPLE_FTRACE_DIRECT
tristate "Build register_ftrace_direct() example"
depends on DYNAMIC_FTRACE_WITH_DIRECT_CALLS && m
-   depends on X86_64 # has x86_64 inlined asm
+   depends on X86_64 || PPC64 # has inlined asm
help
  This builds an ftrace direct function example
  that hooks to wake_up_process and prints the parameters.
diff --git a/samples/ftrace/ftrace-direct-modify.c 
b/samples/ftrace/ftrace-direct-modify.c
index c13a5bc5095bea..89d66a12d300e1 100644
--- a/samples/ftrace/ftrace-direct-modify.c
+++ b/samples/ftrace/ftrace-direct-modify.c
@@ -2,6 +2,7 @@
 #include 
 #include 
 #include 
+#include 
 
 void my_direct_func1(void)
 {
@@ -18,6 +19,7 @@ extern void my_tramp2(void *);
 
 static unsigned long my_ip = (unsigned long)schedule;
 
+#ifdef CONFIG_X86
 asm (
 "  .pushsection.text, \"ax\", @progbits\n"
 "  .type   my_tramp1, @function\n"
@@ -38,6 +40,58 @@ asm (
 "  .size   my_tramp2, .-my_tramp2\n"
 "  .popsection\n"
 );
+#elif CONFIG_PPC64
+asm (
+"  .pushsection.text, \"ax\", @progbits\n"
+"  .type   my_tramp1, @function\n"
+"  .global my_tramp1\n"
+"   my_tramp1:\n"
+"  std 0, 16(1)\n"
+"  stdu1, -480(1)\n"
+"  std 2, 24(1)\n"
+"  mflr7\n"
+"  std 7, 368(1)\n"
+"  bcl 20, 31, 1f\n"
+"1:mflr12\n"
+"  ld  2, (3f - 1b)(12)\n"
+"  bl  my_direct_func1\n"
+"  nop\n"
+"  ld  2, 24(1)\n"
+"  ld  7, 368(1)\n"
+"  mtctr   7\n"
+"  addi1, 1, 480\n"
+"  ld  0, 16(1)\n"
+"  mtlr0\n"
+"  bctr\n"
+"  .size   my_tramp1, .-my_tramp1\n"
+"  nop\n"
+"  .type   my_tramp2, @function\n"
+"  .global my_tramp2\n"
+"   my_tramp2:\n"
+"  std 0, 16(1)\n"
+"  stdu1, -480(1)\n"
+"  std 2, 24(1)\n"
+"  mflr7\n"
+"  std 7, 368(1)\n"
+"  bcl 20, 31, 2f\n"
+"2:mflr12\n"
+"  ld  2, (3f - 2b)(12)\n"
+"  bl  my_direct_func2\n"
+"  nop\n"
+"  ld  2, 24(1)\n"
+"  ld  7, 368(1)\n"
+"  mtctr   7\n"
+"  addi1, 1, 480\n"
+"  ld  0, 16(1)\n"
+"  mtlr0\n"
+"  bctr\n"
+"  .size   my_tramp2, .-my_tramp2\n"
+"3:\n"
+"  .quad   .TOC.@tocbase\n"
+"  .popsection\n"
+);
+#endif
+
 
 static unsigned long my_tramp = (unsigned long)my_tramp1;
 static unsigned long tramps[2] = {
@@ -72,6 +126,10 @@ static int __init ftrace_direct_init(void)
 {
int ret;
 
+#ifdef CONFIG_PPC64
+   my_ip = ppc_function_entry((void *)my_ip) + 4;
+#endif
+
ret = register_ftrace_direct(my_ip, my_tramp);
if (!ret)
simple_tsk = kthread_run(simple_thread, NULL, 
"event-sample-fn");
diff --git a/samples/ftrace/ftrace-direct-too.c 
b/samples/ftrace/ftrace-direct-too.c
index d5c5022be66429..9a82abecbe0dcc 100644
--- a/samples/ftrace/ftrace-direct-too.c
+++ b/samples/ftrace/ftrace-direct-too.c
@@ -3,6 +3,7 @@
 
 #include  /* for handle_mm_fault() */
 #include 
+#include 
 
 void my_direct_func(struct vm_area_struct *vma,
unsigned long address, unsigned int flags)
@@ -13,6 +14,9 @@ void my_direct_func(struct vm_area_struct *vma,
 
 extern void my_tramp(void *);
 
+static unsigned long my_ip = (unsigned long)handle_mm_fault;
+
+#ifdef CONFIG_X86
 asm (
 "  .pushsection.text, \"ax\", @progbits\n"
 "  .type   my_tramp, @function\n"
@@ -31,18 +35,54 @@ asm (
 "  .size   my_tramp, .-my_tramp\n"
 "  .popsection\n"
 );
+#elif CONFIG_PPC64
+asm (
+"  .pushsection.text, \"ax\", @progbits\n"
+"  .type   my_tramp, @function\n

[RFC PATCH 10/14] powerpc/ftrace: Drop assumptions about ftrace trampoline target

2020-11-26 Thread Naveen N. Rao

We currently assume that ftrace locations are patched to go to either
ftrace_caller or ftrace_regs_caller. Drop this assumption in preparation
for supporting ftrace direct calls.

Signed-off-by: Naveen N. Rao 
---
 arch/powerpc/kernel/trace/ftrace.c | 107 +++--
 1 file changed, 86 insertions(+), 21 deletions(-)

diff --git a/arch/powerpc/kernel/trace/ftrace.c 
b/arch/powerpc/kernel/trace/ftrace.c
index 7ddb6e4b527c39..fcb21a9756e456 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -322,14 +322,15 @@ static int add_ftrace_tramp(unsigned long tramp, unsigned 
long target)
  */
 static int setup_mcount_compiler_tramp(unsigned long tramp)
 {
+   int i;
struct ppc_inst op;
struct ppc_inst instr;
struct ppc_ftrace_stub_data *stub;
unsigned long ptr, ftrace_target = ppc_global_function_entry((void 
*)FTRACE_REGS_ADDR);
 
-   /* Is this a known long jump tramp? */
-   hash_for_each_possible(ppc_ftrace_stubs, stub, hentry, ftrace_target)
-   if (stub->target == ftrace_target && stub->addr == tramp)
+   /* Is this a known tramp? */
+   hash_for_each(ppc_ftrace_stubs, i, stub, hentry)
+   if (stub->addr == tramp)
return 0;
 
/* New trampoline -- read where this goes */
@@ -608,23 +609,16 @@ static int __ftrace_make_call_kernel(struct dyn_ftrace 
*rec, unsigned long addr)
 {
struct ppc_inst op;
void *ip = (void *)rec->ip;
-   unsigned long tramp, entry, ptr;
+   unsigned long tramp, ptr;
 
-   /* Make sure we're being asked to patch branch to a known ftrace addr */
-   entry = ppc_global_function_entry((void *)ftrace_caller);
ptr = ppc_global_function_entry((void *)addr);
 
-   if (ptr != entry) {
 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
-   entry = ppc_global_function_entry((void *)ftrace_regs_caller);
-   if (ptr != entry) {
+   /* Make sure we branch to ftrace_regs_caller since we only setup stubs 
for that */
+   tramp = ppc_global_function_entry((void *)ftrace_caller);
+   if (ptr == tramp)
+   ptr = ppc_global_function_entry((void *)FTRACE_REGS_ADDR);
 #endif
-   pr_err("Unknown ftrace addr to patch: %ps\n", (void 
*)ptr);
-   return -EINVAL;
-#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
-   }
-#endif
-   }
 
/* Make sure we have a nop */
if (probe_kernel_read_inst(&op, ip)) {
@@ -637,7 +631,7 @@ static int __ftrace_make_call_kernel(struct dyn_ftrace 
*rec, unsigned long addr)
return -EINVAL;
}
 
-   tramp = find_ftrace_tramp((unsigned long)ip, FTRACE_REGS_ADDR);
+   tramp = find_ftrace_tramp((unsigned long)ip, ptr);
if (!tramp) {
pr_err("No ftrace trampolines reachable from %ps\n", ip);
return -EINVAL;
@@ -783,6 +777,81 @@ __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long 
old_addr,
 }
 #endif
 
+static int
+__ftrace_modify_call_kernel(struct dyn_ftrace *rec, unsigned long old_addr, 
unsigned long addr)
+{
+   struct ppc_inst op;
+   unsigned long ip = rec->ip;
+   unsigned long entry, ptr, tramp;
+
+   /* read where this goes */
+   if (probe_kernel_read_inst(&op, (void *)ip)) {
+   pr_err("Fetching opcode failed.\n");
+   return -EFAULT;
+   }
+
+   /* Make sure that this is still a 24bit jump */
+   if (!is_bl_op(op)) {
+   pr_err("Not expected bl: opcode is %s\n", ppc_inst_as_str(op));
+   return -EINVAL;
+   }
+
+   /* lets find where the pointer goes */
+   tramp = find_bl_target(ip, op);
+   entry = ppc_global_function_entry((void *)old_addr);
+
+   pr_devel("ip:%lx jumps to %lx", ip, tramp);
+
+   if (tramp != entry) {
+   /* old_addr is not within range, so we must have used a 
trampoline */
+   struct ppc_ftrace_stub_data *stub;
+
+   hash_for_each_possible(ppc_ftrace_stubs, stub, hentry, entry)
+   if (stub->target == entry && stub->addr == tramp)
+   break;
+
+   if (stub->target != entry || stub->addr != tramp) {
+   pr_err("we don't know about the tramp at %lx!\n", 
tramp);
+   return -EFAULT;
+   }
+   }
+
+   /* The new target may be within range */
+   if (test_24bit_addr(ip, addr)) {
+   /* within range */
+   if (patch_branch((struct ppc_inst *)ip, addr, BRANCH_SET_LINK)) 
{
+   pr_err("REL24 out of range!\n");
+   return -EINVAL;
+   }
+
+   return 0;
+   }
+
+   ptr = ppc_global_function_entry((void *)a

[RFC PATCH 13/14] powerpc/ftrace: Add support for register_ftrace_direct() for MPROFILE_KERNEL

2020-11-26 Thread Naveen N. Rao

Add support for register_ftrace_direct() for MPROFILE_KERNEL, as it
depends on DYNAMIC_FTRACE_WITH_REGS.

Since powerpc only provides a branch range of 32MB, we set aside a 64k
area within kernel text for creating stubs that can be used to branch to
the provided trampoline, which can be located in the module area. This
is limited to kernel text, and as such, ftrace direct calls are not
supported for functions in kernel modules at this time.

We use orig_gpr3 to stash the address of the direct call trampoline in
arch_ftrace_set_direct_caller().  ftrace_regs_caller() is updated to
check for this to determine if we need to redirect to a direct call
trampoline. As the direct call trampoline has to work as an alternative
for the ftrace trampoline, we setup LR and r0 appropriately, and update
ctr to the trampoline address.  Finally, ftrace_graph_caller() is
updated to save/restore r0.

Signed-off-by: Naveen N. Rao 
---
 arch/powerpc/Kconfig  |   1 +
 arch/powerpc/include/asm/ftrace.h |  14 ++
 arch/powerpc/kernel/trace/ftrace.c| 140 +-
 .../powerpc/kernel/trace/ftrace_64_mprofile.S |  40 -
 4 files changed, 182 insertions(+), 13 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index cfc6dd787f532c..a87ac2e403196e 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -197,6 +197,7 @@ config PPC
select HAVE_DEBUG_KMEMLEAK
select HAVE_DEBUG_STACKOVERFLOW
select HAVE_DYNAMIC_FTRACE
+   select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLSif MPROFILE_KERNEL
select HAVE_DYNAMIC_FTRACE_WITH_REGSif MPROFILE_KERNEL
select HAVE_EBPF_JITif PPC64
select HAVE_EFFICIENT_UNALIGNED_ACCESS  if !(CPU_LITTLE_ENDIAN && 
POWER7_CPU)
diff --git a/arch/powerpc/include/asm/ftrace.h 
b/arch/powerpc/include/asm/ftrace.h
index bc76970b6ee532..2f1c46e9f5d416 100644
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -10,6 +10,8 @@
 
 #define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
 
+#define FTRACE_STUBS_SIZE  65536
+
 #ifdef __ASSEMBLY__
 
 /* Based off of objdump optput from glibc */
@@ -59,6 +61,18 @@ static inline unsigned long ftrace_call_adjust(unsigned long 
addr)
 struct dyn_arch_ftrace {
struct module *mod;
 };
+
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+/*
+ * When there is a direct caller registered, we use regs->orig_gpr3 (similar to
+ * how x86 uses orig_ax) to let ftrace_{regs_}_caller know that we should go
+ * there instead of returning to the function
+ */
+static inline void arch_ftrace_set_direct_caller(struct pt_regs *regs, 
unsigned long addr)
+{
+   regs->orig_gpr3 = addr;
+}
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
 #endif /* __ASSEMBLY__ */
 
 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
diff --git a/arch/powerpc/kernel/trace/ftrace.c 
b/arch/powerpc/kernel/trace/ftrace.c
index fcb21a9756e456..815b14ae45a71f 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -37,6 +37,7 @@ static DEFINE_HASHTABLE(ppc_ftrace_stubs, 8);
 struct ppc_ftrace_stub_data {
unsigned long addr;
unsigned long target;
+   refcount_t refs;
struct hlist_node hentry;
 };
 
@@ -299,7 +300,7 @@ static unsigned long find_ftrace_tramp(unsigned long ip, 
unsigned long target)
return 0;
 }
 
-static int add_ftrace_tramp(unsigned long tramp, unsigned long target)
+static int add_ftrace_tramp(unsigned long tramp, unsigned long target, int 
lock)
 {
struct ppc_ftrace_stub_data *stub;
 
@@ -309,11 +310,123 @@ static int add_ftrace_tramp(unsigned long tramp, 
unsigned long target)
 
stub->addr = tramp;
stub->target = target;
+   refcount_set(&stub->refs, 1);
+   if (lock)
+   refcount_inc(&stub->refs);
hash_add(ppc_ftrace_stubs, &stub->hentry, target);
 
return 0;
 }
 
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+static u32 ftrace_direct_stub_insns[] = {
+   PPC_RAW_LIS(12, 0),
+   PPC_RAW_ORI(12, 12, 0),
+   PPC_RAW_SLDI(12, 12, 32),
+   PPC_RAW_ORIS(12, 12, 0),
+   PPC_RAW_ORI(12, 12, 0),
+   PPC_RAW_MTCTR(12),
+   PPC_RAW_BCTR(),
+};
+#define FTRACE_NUM_STUBS   (FTRACE_STUBS_SIZE / 
sizeof(ftrace_direct_stub_insns))
+static DECLARE_BITMAP(stubs_bitmap, FTRACE_NUM_STUBS);
+extern unsigned int ftrace_stubs[];
+
+static unsigned long get_ftrace_tramp(unsigned long ip, unsigned long target)
+{
+   struct ppc_ftrace_stub_data *stub_data;
+   struct ppc_inst instr;
+   unsigned int *stub;
+   int index;
+
+   hash_for_each_possible(ppc_ftrace_stubs, stub_data, hentry, target) {
+   if (stub_data->target == target &&
+   !create_branch(&instr, (void *)ip, 
stub_data->addr, 0)) {
+   refcount_inc(&s

[RFC PATCH 12/14] powerpc/ftrace: Drop saving LR to stack save area for -mprofile-kernel

2020-11-26 Thread Naveen N. Rao

In -mprofile-kernel variant of ftrace_graph_caller(), we save the
optionally-updated LR address into the stack save area at the end. This
is likely an offshoot of the initial -mprofile-kernel implementation in
gcc emitting the same as part of the -mprofile-kernel instruction
sequence. However, this is not required. Drop it.

Signed-off-by: Naveen N. Rao 
---
 arch/powerpc/kernel/trace/ftrace_64_mprofile.S | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S 
b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
index bbe871b47ade58..c5602e9b07faa3 100644
--- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
+++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
@@ -310,7 +310,5 @@ _GLOBAL(ftrace_graph_caller)
ld  r2, 24(r1)
 
addir1, r1, SWITCH_FRAME_SIZE
-   mflrr0
-   std r0, LRSAVE(r1)
bctr
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
-- 
2.25.4

[RFC PATCH 08/14] powerpc/ftrace: Use FTRACE_REGS_ADDR to identify the correct ftrace trampoline

2020-11-26 Thread Naveen N. Rao

Use FTRACE_REGS_ADDR instead of keying off
CONFIG_DYNAMIC_FTRACE_WITH_REGS to identify the proper ftrace trampoline
address to use.

Signed-off-by: Naveen N. Rao 
---
 arch/powerpc/kernel/trace/ftrace.c | 12 ++--
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/kernel/trace/ftrace.c 
b/arch/powerpc/kernel/trace/ftrace.c
index 4fe5f373172fd2..14b39f7797d455 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -361,11 +361,7 @@ static int setup_mcount_compiler_tramp(unsigned long tramp)
}
 
/* Let's re-write the tramp to go to ftrace_[regs_]caller */
-#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
-   ptr = ppc_global_function_entry((void *)ftrace_regs_caller);
-#else
-   ptr = ppc_global_function_entry((void *)ftrace_caller);
-#endif
+   ptr = ppc_global_function_entry((void *)FTRACE_REGS_ADDR);
if (create_branch(&instr, (void *)tramp, ptr, 0)) {
pr_debug("%ps is not reachable from existing mcount tramp\n",
(void *)ptr);
@@ -885,11 +881,7 @@ int __init ftrace_dyn_arch_init(void)
0x7d8903a6, /* mtctr   r12  */
0x4e800420, /* bctr */
};
-#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
-   unsigned long addr = ppc_global_function_entry((void 
*)ftrace_regs_caller);
-#else
-   unsigned long addr = ppc_global_function_entry((void *)ftrace_caller);
-#endif
+   unsigned long addr = ppc_global_function_entry((void 
*)FTRACE_REGS_ADDR);
long reladdr = addr - kernel_toc_addr();
 
if (reladdr > 0x7FFF || reladdr < -(0x8000L)) {
-- 
2.25.4

[RFC PATCH 07/14] powerpc/ftrace: Remove dead code

2020-11-26 Thread Naveen N. Rao

ftrace_plt_tramps[] was intended to speed up skipping plt branches, but
the code wasn't completed. It is also not significantly better than
reading and decoding the instruction. Remove the same.

Signed-off-by: Naveen N. Rao 
---
 arch/powerpc/kernel/trace/ftrace.c | 8 
 1 file changed, 8 deletions(-)

diff --git a/arch/powerpc/kernel/trace/ftrace.c 
b/arch/powerpc/kernel/trace/ftrace.c
index 42761ebec9f755..4fe5f373172fd2 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -332,7 +332,6 @@ static int setup_mcount_compiler_tramp(unsigned long tramp)
struct ppc_inst op;
unsigned long ptr;
struct ppc_inst instr;
-   static unsigned long ftrace_plt_tramps[NUM_FTRACE_TRAMPS];
 
/* Is this a known long jump tramp? */
for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
@@ -341,13 +340,6 @@ static int setup_mcount_compiler_tramp(unsigned long tramp)
else if (ftrace_tramps[i] == tramp)
return 0;
 
-   /* Is this a known plt tramp? */
-   for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
-   if (!ftrace_plt_tramps[i])
-   break;
-   else if (ftrace_plt_tramps[i] == tramp)
-   return -1;
-
/* New trampoline -- read where this goes */
if (probe_kernel_read_inst(&op, (void *)tramp)) {
pr_debug("Fetching opcode failed.\n");
-- 
2.25.4

[RFC PATCH 06/14] powerpc: Add support for CONFIG_HAVE_FUNCTION_ARG_ACCESS_API

2020-11-26 Thread Naveen N. Rao

Add register_get_kernel_argument() for a rudimentary way to access
kernel function arguments.

Signed-off-by: Naveen N. Rao 
---
 arch/powerpc/Kconfig  |  1 +
 arch/powerpc/include/asm/ptrace.h | 31 +++
 2 files changed, 32 insertions(+)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index e9f13fe084929b..cfc6dd787f532c 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -202,6 +202,7 @@ config PPC
select HAVE_EFFICIENT_UNALIGNED_ACCESS  if !(CPU_LITTLE_ENDIAN && 
POWER7_CPU)
select HAVE_FAST_GUP
select HAVE_FTRACE_MCOUNT_RECORD
+   select HAVE_FUNCTION_ARG_ACCESS_API
select HAVE_FUNCTION_ERROR_INJECTION
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_TRACER
diff --git a/arch/powerpc/include/asm/ptrace.h 
b/arch/powerpc/include/asm/ptrace.h
index e2c778c176a3a6..956828c07abd70 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -62,6 +62,8 @@ struct pt_regs
 };
 #endif
 
+#define NR_REG_ARGUMENTS 8
+
 #ifdef __powerpc64__
 
 /*
@@ -85,8 +87,10 @@ struct pt_regs
 
 #ifdef PPC64_ELF_ABI_v2
 #define STACK_FRAME_MIN_SIZE   32
+#define STACK_FRAME_PARM_SAVE  32
 #else
 #define STACK_FRAME_MIN_SIZE   STACK_FRAME_OVERHEAD
+#define STACK_FRAME_PARM_SAVE  48
 #endif
 
 /* Size of dummy stack frame allocated when calling signal handler. */
@@ -103,6 +107,7 @@ struct pt_regs
 #define STACK_INT_FRAME_SIZE   (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD)
 #define STACK_FRAME_MARKER 2
 #define STACK_FRAME_MIN_SIZE   STACK_FRAME_OVERHEAD
+#define STACK_FRAME_PARM_SAVE  8
 
 /* Size of stack frame allocated when calling signal handler. */
 #define __SIGNAL_FRAMESIZE 64
@@ -309,6 +314,32 @@ static inline unsigned long 
regs_get_kernel_stack_nth(struct pt_regs *regs,
return 0;
 }
 
+/**
+ * regs_get_kernel_argument() - get Nth function argument in kernel
+ * @regs:  pt_regs of that context
+ * @n: function argument number (start from 0)
+ *
+ * regs_get_argument() returns @n th argument of the function call.
+ * Note that this chooses most probable assignment, and is incorrect
+ * in scenarios where double or fp/vector parameters are involved.
+ * This also doesn't take into account stack alignment requirements.
+ *
+ * This is expected to be called from kprobes or ftrace with regs
+ * at function entry, so the current function has not setup its stack.
+ */
+static inline unsigned long regs_get_kernel_argument(struct pt_regs *regs,
+unsigned int n)
+{
+   if (n >= NR_REG_ARGUMENTS) {
+#ifndef __powerpc64__
+   n -= NR_REG_ARGUMENTS;
+#endif
+   n += STACK_FRAME_PARM_SAVE / sizeof(unsigned long);
+   return regs_get_kernel_stack_nth(regs, n);
+   } else {
+   return regs_get_register(regs, offsetof(struct pt_regs, gpr[n + 
3]));
+   }
+}
 #endif /* __ASSEMBLY__ */
 
 #ifndef __powerpc64__
-- 
2.25.4

[RFC PATCH 03/14] ftrace: Fix cleanup in error path of register_ftrace_direct()

2020-11-26 Thread Naveen N. Rao

We need to remove hash entry if register_ftrace_function() fails.
Consolidate the cleanup to be done after register_ftrace_function() at
the end.

Signed-off-by: Naveen N. Rao 
---
 kernel/trace/ftrace.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 9c1bba8cc51b03..3844a4a1346a9c 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -5136,8 +5136,6 @@ int register_ftrace_direct(unsigned long ip, unsigned 
long addr)
__add_hash_entry(direct_functions, entry);
 
ret = ftrace_set_filter_ip(&direct_ops, ip, 0, 0);
-   if (ret)
-   remove_hash_entry(direct_functions, entry);
 
if (!ret && !(direct_ops.flags & FTRACE_OPS_FL_ENABLED)) {
ret = register_ftrace_function(&direct_ops);
@@ -5146,6 +5144,7 @@ int register_ftrace_direct(unsigned long ip, unsigned 
long addr)
}
 
if (ret) {
+   remove_hash_entry(direct_functions, entry);
kfree(entry);
if (!direct->count) {
list_del_rcu(&direct->next);
-- 
2.25.4

[RFC PATCH 02/14] ftrace: Fix DYNAMIC_FTRACE_WITH_DIRECT_CALLS dependency

2020-11-26 Thread Naveen N. Rao

DYNAMIC_FTRACE_WITH_DIRECT_CALLS should depend on
DYNAMIC_FTRACE_WITH_REGS since we need ftrace_regs_caller().

Fixes: 763e34e74bb7d5c ("ftrace: Add register_ftrace_direct()")
Signed-off-by: Naveen N. Rao 
---
 kernel/trace/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index a4020c0b4508c9..e1bf5228fb692a 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -202,7 +202,7 @@ config DYNAMIC_FTRACE_WITH_REGS
 
 config DYNAMIC_FTRACE_WITH_DIRECT_CALLS
def_bool y
-   depends on DYNAMIC_FTRACE
+   depends on DYNAMIC_FTRACE_WITH_REGS
depends on HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
 
 config FUNCTION_PROFILER
-- 
2.25.4

[RFC PATCH 05/14] ftrace: Add architectural helpers for [un]register_ftrace_direct()

2020-11-26 Thread Naveen N. Rao

Architectures may want to do some validation (such as to ensure that the
trampoline code is reachable from the provided ftrace location) before
accepting ftrace direct registration. Add helpers for the same.

Signed-off-by: Naveen N. Rao 
---
 include/linux/ftrace.h |  2 ++
 kernel/trace/ftrace.c  | 27 +--
 2 files changed, 23 insertions(+), 6 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 46b4b7ee28c41f..3fdcb4c513bc2d 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -290,6 +290,8 @@ int ftrace_modify_direct_caller(struct ftrace_func_entry 
*entry,
unsigned long old_addr,
unsigned long new_addr);
 unsigned long ftrace_find_rec_direct(unsigned long ip);
+int arch_register_ftrace_direct(unsigned long ip, unsigned long addr);
+void arch_unregister_ftrace_direct(unsigned long ip, unsigned long addr);
 #else
 # define ftrace_direct_func_count 0
 static inline int register_ftrace_direct(unsigned long ip, unsigned long addr)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 7476f2458b6d95..0e259b90527722 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -5005,6 +5005,13 @@ ftrace_set_addr(struct ftrace_ops *ops, unsigned long 
ip, int remove,
 }
 
 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
+int __weak arch_register_ftrace_direct(unsigned long ip, unsigned long addr)
+{
+   return 0;
+}
+
+void __weak arch_unregister_ftrace_direct(unsigned long ip, unsigned long 
addr) { }
+
 /**
  * register_ftrace_direct - Call a custom trampoline directly
  * @ip: The address of the nop at the beginning of a function
@@ -5028,6 +5035,7 @@ int register_ftrace_direct(unsigned long ip, unsigned 
long addr)
struct ftrace_hash *free_hash = NULL;
struct dyn_ftrace *rec;
int ret = -EBUSY;
+   int arch_ret;
 
mutex_lock(&direct_mutex);
 
@@ -5082,18 +5090,24 @@ int register_ftrace_direct(unsigned long ip, unsigned 
long addr)
entry->direct = addr;
__add_hash_entry(direct_functions, entry);
 
-   ret = ftrace_set_filter_ip(&direct_ops, ip, 0, 0);
+   arch_ret = arch_register_ftrace_direct(ip, addr);
 
-   if (!ret && !(direct_ops.flags & FTRACE_OPS_FL_ENABLED)) {
-   ret = register_ftrace_function(&direct_ops);
-   if (ret)
-   ftrace_set_filter_ip(&direct_ops, ip, 1, 0);
+   if (!arch_ret) {
+   ret = ftrace_set_filter_ip(&direct_ops, ip, 0, 0);
+
+   if (!ret && !(direct_ops.flags & FTRACE_OPS_FL_ENABLED)) {
+   ret = register_ftrace_function(&direct_ops);
+   if (ret)
+   ftrace_set_filter_ip(&direct_ops, ip, 1, 0);
+   }
}
 
-   if (ret) {
+   if (arch_ret || ret) {
remove_hash_entry(direct_functions, entry);
ftrace_direct_func_count--;
kfree(entry);
+   if (!arch_ret)
+   arch_unregister_ftrace_direct(ip, addr);
}
  out_unlock:
mutex_unlock(&direct_mutex);
@@ -5155,6 +5169,7 @@ int unregister_ftrace_direct(unsigned long ip, unsigned 
long addr)
remove_hash_entry(direct_functions, entry);
ftrace_direct_func_count--;
kfree(entry);
+   arch_unregister_ftrace_direct(ip, addr);
  out_unlock:
mutex_unlock(&direct_mutex);
 
-- 
2.25.4

[RFC PATCH 04/14] ftrace: Remove ftrace_find_direct_func()

2020-11-26 Thread Naveen N. Rao

This is a revert of commit 013bf0da047481 ("ftrace: Add
ftrace_find_direct_func()")

ftrace_find_direct_func() was meant for use in the function graph tracer
by architecture specific code. However, commit ff205766dbbee0 ("ftrace:
Fix function_graph tracer interaction with BPF trampoline") disabled
function graph tracer for direct calls leaving this without any users.

In addition, modify_ftrace_direct() allowed redirecting the direct call
to a different trampoline that was never registered through
register_ftrace_direct(). This meant that ftrace_direct_funcs didn't
capture all trampolines.

Signed-off-by: Naveen N. Rao 
---
 include/linux/ftrace.h |  5 ---
 kernel/trace/ftrace.c  | 84 ++
 2 files changed, 4 insertions(+), 85 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 1bd3a0356ae478..46b4b7ee28c41f 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -285,7 +285,6 @@ extern int ftrace_direct_func_count;
 int register_ftrace_direct(unsigned long ip, unsigned long addr);
 int unregister_ftrace_direct(unsigned long ip, unsigned long addr);
 int modify_ftrace_direct(unsigned long ip, unsigned long old_addr, unsigned 
long new_addr);
-struct ftrace_direct_func *ftrace_find_direct_func(unsigned long addr);
 int ftrace_modify_direct_caller(struct ftrace_func_entry *entry,
struct dyn_ftrace *rec,
unsigned long old_addr,
@@ -306,10 +305,6 @@ static inline int modify_ftrace_direct(unsigned long ip,
 {
return -ENOTSUPP;
 }
-static inline struct ftrace_direct_func *ftrace_find_direct_func(unsigned long 
addr)
-{
-   return NULL;
-}
 static inline int ftrace_modify_direct_caller(struct ftrace_func_entry *entry,
  struct dyn_ftrace *rec,
  unsigned long old_addr,
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 3844a4a1346a9c..7476f2458b6d95 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -5005,46 +5005,6 @@ ftrace_set_addr(struct ftrace_ops *ops, unsigned long 
ip, int remove,
 }
 
 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
-
-struct ftrace_direct_func {
-   struct list_headnext;
-   unsigned long   addr;
-   int count;
-};
-
-static LIST_HEAD(ftrace_direct_funcs);
-
-/**
- * ftrace_find_direct_func - test an address if it is a registered direct 
caller
- * @addr: The address of a registered direct caller
- *
- * This searches to see if a ftrace direct caller has been registered
- * at a specific address, and if so, it returns a descriptor for it.
- *
- * This can be used by architecture code to see if an address is
- * a direct caller (trampoline) attached to a fentry/mcount location.
- * This is useful for the function_graph tracer, as it may need to
- * do adjustments if it traced a location that also has a direct
- * trampoline attached to it.
- */
-struct ftrace_direct_func *ftrace_find_direct_func(unsigned long addr)
-{
-   struct ftrace_direct_func *entry;
-   bool found = false;
-
-   /* May be called by fgraph trampoline (protected by rcu tasks) */
-   list_for_each_entry_rcu(entry, &ftrace_direct_funcs, next) {
-   if (entry->addr == addr) {
-   found = true;
-   break;
-   }
-   }
-   if (found)
-   return entry;
-
-   return NULL;
-}
-
 /**
  * register_ftrace_direct - Call a custom trampoline directly
  * @ip: The address of the nop at the beginning of a function
@@ -5064,7 +5024,6 @@ struct ftrace_direct_func 
*ftrace_find_direct_func(unsigned long addr)
  */
 int register_ftrace_direct(unsigned long ip, unsigned long addr)
 {
-   struct ftrace_direct_func *direct;
struct ftrace_func_entry *entry;
struct ftrace_hash *free_hash = NULL;
struct dyn_ftrace *rec;
@@ -5118,19 +5077,7 @@ int register_ftrace_direct(unsigned long ip, unsigned 
long addr)
if (!entry)
goto out_unlock;
 
-   direct = ftrace_find_direct_func(addr);
-   if (!direct) {
-   direct = kmalloc(sizeof(*direct), GFP_KERNEL);
-   if (!direct) {
-   kfree(entry);
-   goto out_unlock;
-   }
-   direct->addr = addr;
-   direct->count = 0;
-   list_add_rcu(&direct->next, &ftrace_direct_funcs);
-   ftrace_direct_func_count++;
-   }
-
+   ftrace_direct_func_count++;
entry->ip = ip;
entry->direct = addr;
__add_hash_entry(direct_functions, entry);
@@ -5145,18 +5092,8 @@ int register_ftrace_direct(unsigned long ip, unsigned 
long addr)
 
if (ret) {
remove_hash_entry(direct_functions, entry);
+

[RFC PATCH 01/14] ftrace: Fix updating FTRACE_FL_TRAMP

2020-11-26 Thread Naveen N. Rao

On powerpc, kprobe-direct.tc triggered FTRACE_WARN_ON() in
ftrace_get_addr_new() followed by the below message:
  Bad trampoline accounting at: 4222522f (wake_up_process+0xc/0x20) 
(f001)

The set of steps leading to this involved:
- modprobe ftrace-direct-too
- enable_probe
- modprobe ftrace-direct
- rmmod ftrace-direct <-- trigger

The problem turned out to be that we were not updating flags in the
ftrace record properly. From the above message about the trampoline
accounting being bad, it can be seen that the ftrace record still has
FTRACE_FL_TRAMP set though ftrace-direct module is going away. This
happens because we are checking if any ftrace_ops has the
FTRACE_FL_TRAMP flag set _before_ updating the filter hash.

The fix for this is to look for any _other_ ftrace_ops that also needs
FTRACE_FL_TRAMP.

Signed-off-by: Naveen N. Rao 
---
 kernel/trace/ftrace.c | 22 +-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 8185f7240095f4..9c1bba8cc51b03 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1629,6 +1629,8 @@ static bool test_rec_ops_needs_regs(struct dyn_ftrace 
*rec)
 static struct ftrace_ops *
 ftrace_find_tramp_ops_any(struct dyn_ftrace *rec);
 static struct ftrace_ops *
+ftrace_find_tramp_ops_any_other(struct dyn_ftrace *rec, struct ftrace_ops 
*op_exclude);
+static struct ftrace_ops *
 ftrace_find_tramp_ops_next(struct dyn_ftrace *rec, struct ftrace_ops *ops);
 
 static bool __ftrace_hash_rec_update(struct ftrace_ops *ops,
@@ -1778,7 +1780,7 @@ static bool __ftrace_hash_rec_update(struct ftrace_ops 
*ops,
 * to it.
 */
if (ftrace_rec_count(rec) == 1 &&
-   ftrace_find_tramp_ops_any(rec))
+   ftrace_find_tramp_ops_any_other(rec, ops))
rec->flags |= FTRACE_FL_TRAMP;
else
rec->flags &= ~FTRACE_FL_TRAMP;
@@ -2244,6 +2246,24 @@ ftrace_find_tramp_ops_any(struct dyn_ftrace *rec)
return NULL;
 }
 
+static struct ftrace_ops *
+ftrace_find_tramp_ops_any_other(struct dyn_ftrace *rec, struct ftrace_ops 
*op_exclude)
+{
+   struct ftrace_ops *op;
+   unsigned long ip = rec->ip;
+
+   do_for_each_ftrace_op(op, ftrace_ops_list) {
+
+   if (op == op_exclude || !op->trampoline)
+   continue;
+
+   if (hash_contains_ip(ip, op->func_hash))
+   return op;
+   } while_for_each_ftrace_op(op);
+
+   return NULL;
+}
+
 static struct ftrace_ops *
 ftrace_find_tramp_ops_next(struct dyn_ftrace *rec,
   struct ftrace_ops *op)
-- 
2.25.4

[RFC PATCH 00/14] powerpc64: Add support for ftrace direct calls

2020-11-26 Thread Naveen N. Rao

This series adds support for DYNAMIC_FTRACE_WITH_DIRECT_CALLS for 
powerpc64le.

This is mostly working fine for me, except for a soft lockup I see with 
the ftrace direct selftest. It happens when irqsoff tracer is being 
tested with the ftrace direct modules. This appears to be an existing 
upstream issue since I am able to reproduce the lockup without these 
patches. I will be looking into that to see if I can figure out the 
cause of those lockups.

In the meantime, I would appreciate a review of these patches.


- Naveen


Naveen N. Rao (14):
  ftrace: Fix updating FTRACE_FL_TRAMP
  ftrace: Fix DYNAMIC_FTRACE_WITH_DIRECT_CALLS dependency
  ftrace: Fix cleanup in error path of register_ftrace_direct()
  ftrace: Remove ftrace_find_direct_func()
  ftrace: Add architectural helpers for [un]register_ftrace_direct()
  powerpc: Add support for CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
  powerpc/ftrace: Remove dead code
  powerpc/ftrace: Use FTRACE_REGS_ADDR to identify the correct ftrace
trampoline
  powerpc/ftrace: Use a hash table for tracking ftrace stubs
  powerpc/ftrace: Drop assumptions about ftrace trampoline target
  powerpc/ftrace: Use GPR save/restore macros in ftrace_graph_caller()
  powerpc/ftrace: Drop saving LR to stack save area for -mprofile-kernel
  powerpc/ftrace: Add support for register_ftrace_direct() for
MPROFILE_KERNEL
  samples/ftrace: Add powerpc support for ftrace direct samples

 arch/powerpc/Kconfig  |   2 +
 arch/powerpc/include/asm/ftrace.h |  14 +
 arch/powerpc/include/asm/ptrace.h |  31 ++
 arch/powerpc/kernel/trace/ftrace.c| 314 +-
 .../powerpc/kernel/trace/ftrace_64_mprofile.S |  70 ++--
 include/linux/ftrace.h|   7 +-
 kernel/trace/Kconfig  |   2 +-
 kernel/trace/ftrace.c | 130 +++-
 samples/Kconfig   |   2 +-
 samples/ftrace/ftrace-direct-modify.c |  58 
 samples/ftrace/ftrace-direct-too.c|  48 ++-
 samples/ftrace/ftrace-direct.c|  45 ++-
 12 files changed, 519 insertions(+), 204 deletions(-)


base-commit: 4c202167192a77481310a3cacae9f12618b92216
-- 
2.25.4

Re: [PATCH 1/3 v5] ftrace: Have the callbacks receive a struct ftrace_regs instead of pt_regs

2020-11-12 Thread Naveen N. Rao


Hi Steven,

Steven Rostedt wrote:

From: "Steven Rostedt (VMware)" 

In preparation to have arguments of a function passed to callbacks attached
to functions as default, change the default callback prototype to receive a
struct ftrace_regs as the forth parameter instead of a pt_regs.

For callbacks that set the FL_SAVE_REGS flag in their ftrace_ops flags, they
will now need to get the pt_regs via a ftrace_get_regs() helper call. If
this is called by a callback that their ftrace_ops did not have a
FL_SAVE_REGS flag set, it that helper function will return NULL.

This will allow the ftrace_regs to hold enough just to get the parameters
and stack pointer, but without the worry that callbacks may have a pt_regs
that is not completely filled.

Reviewed-by: Masami Hiramatsu 
Signed-off-by: Steven Rostedt (VMware) 
---
 arch/x86/kernel/kprobes/ftrace.c  |  3 ++-
 fs/pstore/ftrace.c|  2 +-
 include/linux/ftrace.h| 16 ++--
 include/linux/kprobes.h   |  2 +-
 kernel/livepatch/patch.c  |  3 ++-
 kernel/trace/ftrace.c | 27 +++
 kernel/trace/trace_event_perf.c   |  2 +-
 kernel/trace/trace_events.c   |  2 +-
 kernel/trace/trace_functions.c|  9 -
 kernel/trace/trace_irqsoff.c  |  2 +-
 kernel/trace/trace_sched_wakeup.c |  2 +-
 kernel/trace/trace_selftest.c | 20 +++-
 kernel/trace/trace_stack.c|  2 +-
 13 files changed, 55 insertions(+), 37 deletions(-)






diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 629abaf25681..be73350955e4 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -345,7 +345,7 @@ static inline void wait_for_kprobe_optimizer(void) 
{ }

 #endif /* CONFIG_OPTPROBES */
 #ifdef CONFIG_KPROBES_ON_FTRACE
 extern void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
- struct ftrace_ops *ops, struct pt_regs *regs);
+ struct ftrace_ops *ops, struct ftrace_regs 
*fregs);


This breaks the build on non-x86 architectures that support 
KPROBES_ON_FTRACE.


- Naveen

Re: [PATCH v3 8/9] perf mem: Return NULL for event 'ldst' on PowerPC

2020-10-28 Thread Naveen N. Rao


[+ Maddy]

Leo Yan wrote:

If user specifies event type "ldst", PowerPC's perf_mem_events__name()
will wrongly return the store event name "cpu/mem-stores/".

This patch changes to return NULL for the event "ldst" on PowerPC.

Signed-off-by: Leo Yan 
---
 tools/perf/arch/powerpc/util/mem-events.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tools/perf/arch/powerpc/util/mem-events.c 
b/tools/perf/arch/powerpc/util/mem-events.c
index 07fb5e049488..90c5a0760685 100644
--- a/tools/perf/arch/powerpc/util/mem-events.c
+++ b/tools/perf/arch/powerpc/util/mem-events.c
@@ -7,6 +7,8 @@ char *perf_mem_events__name(int i)
 {
if (i == PERF_MEM_EVENTS__LOAD)
return (char *) "cpu/mem-loads/";
-
-   return (char *) "cpu/mem-stores/";
+   else if (i == PERF_MEM_EVENTS__STORE)
+   return (char *) "cpu/mem-stores/";
+   else
+   return NULL;
 }
--
2.17.1

Re: [PATCH v1] powerpc/process: Remove unnecessary #ifdef CONFIG_FUNCTION_GRAPH_TRACER

2020-08-17 Thread Naveen N. Rao


Christophe Leroy wrote:

ftrace_graph_ret_addr() is always defined and returns 'ip' when
CONFIG_FUNCTION GRAPH_TRACER is not set.

So the #ifdef is not needed, remove it.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/process.c | 4 
 1 file changed, 4 deletions(-)


LGTM.
Acked-by: Naveen N. Rao 

- Naveen

Re: [PATCH] MAINTAINERS: adjust kprobes.rst entry to new location

2020-07-26 Thread Naveen N. Rao


Lukas Bulwahn wrote:

Commit 2165b82fde82 ("docs: Move kprobes.rst from staging/ to trace/")
moved kprobes.rst, but missed to adjust the MAINTAINERS entry.

Hence, ./scripts/get_maintainer.pl --self-test=patterns complains:

  warning: no file matchesF:Documentation/staging/kprobes.rst

Adjust the entry to the new file location.

Signed-off-by: Lukas Bulwahn 
---
Naveen, Masami-san, please ack.
Jonathan, please pick this minor non-urgent patch into docs-next.

applies cleanly on next-20200724


Ah, sorry. Hadn't noticed this change from Mauro.
Acked-by: Naveen N. Rao 

Thanks!
- Naveen

[PATCH 2/3] docs: staging/kprobes.rst: Move references to a separate appendix

2020-07-21 Thread Naveen N. Rao

Kprobes references are currently listed right after kretprobes example,
and appears to be part of the same section. Move this out to a separate
appendix for clarity.

Signed-off-by: Naveen N. Rao 
---
 Documentation/staging/kprobes.rst | 14 +-
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/Documentation/staging/kprobes.rst 
b/Documentation/staging/kprobes.rst
index 620699f5df22..b757b6dfd3d4 100644
--- a/Documentation/staging/kprobes.rst
+++ b/Documentation/staging/kprobes.rst
@@ -20,6 +20,7 @@ Kernel Probes (Kprobes)
   10. Deprecated Features
   Appendix A: The kprobes debugfs interface
   Appendix B: The kprobes sysctl interface
+  Appendix C: References
 
 Concepts: Kprobes and Return Probes
 =
@@ -710,11 +711,6 @@ Kretprobes Example
 
 See samples/kprobes/kretprobe_example.c
 
-For additional information on Kprobes, refer to the following URLs:
-
-- https://www.ibm.com/developerworks/library/l-kprobes/index.html
-- https://www.kernel.org/doc/ols/2006/ols2006v2-pages-109-124.pdf
-
 Deprecated Features
 ===
 
@@ -797,3 +793,11 @@ Note that this knob *changes* the optimized state. This 
means that optimized
 probes (marked [OPTIMIZED]) will be unoptimized ([OPTIMIZED] tag will be
 removed). If the knob is turned on, they will be optimized again.
 
+References
+==
+
+For additional information on Kprobes, refer to the following URLs:
+
+- https://www.ibm.com/developerworks/library/l-kprobes/index.html
+- https://www.kernel.org/doc/ols/2006/ols2006v2-pages-109-124.pdf
+
-- 
2.25.4

[PATCH 3/3] docs: Move kprobes.rst from staging/ to trace/

2020-07-21 Thread Naveen N. Rao

Kprobes contitutes a dynamic tracing technology and as such can be
moved alongside documentation of other tracing technologies.

Signed-off-by: Naveen N. Rao 
---
 Documentation/staging/index.rst  | 1 -
 Documentation/trace/index.rst| 1 +
 Documentation/{staging => trace}/kprobes.rst | 0
 3 files changed, 1 insertion(+), 1 deletion(-)
 rename Documentation/{staging => trace}/kprobes.rst (100%)

diff --git a/Documentation/staging/index.rst b/Documentation/staging/index.rst
index 184e6aece0a7..abd0d18254d2 100644
--- a/Documentation/staging/index.rst
+++ b/Documentation/staging/index.rst
@@ -7,7 +7,6 @@ Unsorted Documentation
:maxdepth: 2
 
crc32
-   kprobes
lzo
remoteproc
rpmsg
diff --git a/Documentation/trace/index.rst b/Documentation/trace/index.rst
index 7d83156c9ac1..879ebb9f647d 100644
--- a/Documentation/trace/index.rst
+++ b/Documentation/trace/index.rst
@@ -9,6 +9,7 @@ Linux Tracing Technologies
tracepoint-analysis
ftrace
ftrace-uses
+   kprobes
kprobetrace
uprobetracer
tracepoints
diff --git a/Documentation/staging/kprobes.rst b/Documentation/trace/kprobes.rst
similarity index 100%
rename from Documentation/staging/kprobes.rst
rename to Documentation/trace/kprobes.rst
-- 
2.25.4

[PATCH 0/3] docs: kprobes: Update URLs and move under trace/

2020-07-21 Thread Naveen N. Rao

This series updates some of the URLs in the kprobes document and moves 
the same under trace/ directory.

- Naveen


Naveen N. Rao (3):
  docs: staging/kprobes.rst: Update some of the references
  docs: staging/kprobes.rst: Move references to a separate appendix
  docs: Move kprobes.rst from staging/ to trace/

 Documentation/staging/index.rst  |  1 -
 Documentation/trace/index.rst|  1 +
 Documentation/{staging => trace}/kprobes.rst | 16 +---
 3 files changed, 10 insertions(+), 8 deletions(-)
 rename Documentation/{staging => trace}/kprobes.rst (99%)


base-commit: f33d4075e512274c0c6edcd7602da2cf1d5a0341
-- 
2.25.4

[PATCH 1/3] docs: staging/kprobes.rst: Update some of the references

2020-07-21 Thread Naveen N. Rao

Some of the kprobes references are not valid anymore. Update the URLs to
point to their changed locations, where appropriate. Drop two URLs which
do not exist anymore.

Reported-by: Masami Hiramatsu 
Signed-off-by: Naveen N. Rao 
---
 Documentation/staging/kprobes.rst | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/Documentation/staging/kprobes.rst 
b/Documentation/staging/kprobes.rst
index 8baab8832c5b..620699f5df22 100644
--- a/Documentation/staging/kprobes.rst
+++ b/Documentation/staging/kprobes.rst
@@ -712,10 +712,8 @@ See samples/kprobes/kretprobe_example.c
 
 For additional information on Kprobes, refer to the following URLs:
 
-- 
http://www-106.ibm.com/developerworks/library/l-kprobes.html?ca=dgr-lnxw42Kprobe
-- http://www.redhat.com/magazine/005mar05/features/kprobes/
-- http://www-users.cs.umn.edu/~boutcher/kprobes/
-- http://www.linuxsymposium.org/2006/linuxsymposium_procv2.pdf (pages 101-115)
+- https://www.ibm.com/developerworks/library/l-kprobes/index.html
+- https://www.kernel.org/doc/ols/2006/ols2006v2-pages-109-124.pdf
 
 Deprecated Features
 ===
-- 
2.25.4

Re: [PATCH] Replace HTTP links with HTTPS ones: kprobes

2020-07-21 Thread Naveen N. Rao


Masami Hiramatsu wrote:

On Tue, 14 Jul 2020 00:02:49 +0200
"Alexander A. Klimov"  wrote:




Am 13.07.20 um 16:20 schrieb Masami Hiramatsu:
> Hi Naveen and Alexander,
> 
> On Fri, 10 Jul 2020 19:14:47 +0530

> "Naveen N. Rao"  wrote:
> 
>> Masami Hiramatsu wrote:

>>> On Tue,  7 Jul 2020 21:49:59 +0200
>>> "Alexander A. Klimov"  wrote:
>>>
>>>> Rationale:
>>>> Reduces attack surface on kernel devs opening the links for MITM
>>>> as HTTPS traffic is much harder to manipulate.
>>>>
>>>> Deterministic algorithm:
>>>> For each file:
>>>>If not .svg:
>>>>  For each line:
>>>>If doesn't contain `\bxmlns\b`:
>>>>  For each link, `\bhttp://[^# \t\r\n]*(?:\w|/)`:
>>>>If both the HTTP and HTTPS versions
>>>>return 200 OK and serve the same content:
>>>>  Replace HTTP with HTTPS.
>>>
>>> OK, but it seems that some of them are disappeared :(
>>>
>>>   
http://www-106.ibm.com/developerworks/library/l-kprobes.html?ca=dgr-lnxw42Kprobe
>>>
>>>   -> https://www.ibm.com/developerworks/library/l-kprobes/index.html
>>
>> That looks right.
>>
>>>
>>>   http://www.redhat.com/magazine/005mar05/features/kprobes/
>>>
>>>   -> I can not find that.
>>
>> Ditto, we should drop that.
>>
>>>
>>>>   - http://www-users.cs.umn.edu/~boutcher/kprobes/
>>>>   - http://www.linuxsymposium.org/2006/linuxsymposium_procv2.pdf (pages 
101-115)
>>>
>>> Both are not found.
>>
>> It looks like the first link is gone, but there seems to be a copy in
>> the web archive:
>> 
https://web.archive.org/web/20061106154519/http://www-users.cs.umn.edu/~boutcher/kprobes/
>>
>> I suppose we can drop that link.
>>
>>>
>>> (OT, it seems http://www.linuxsymposium.org/ has been left from historical
>>>   Linux Symposium, we must remove it asap)
>>
>> Indeed, I think that link pointed to the Kprobes paper:
>> https://www.kernel.org/doc/ols/2006/ols2006v2-pages-109-124.pdf
> 
> Ah, there is.

> Thank you for the confirmation.
> Alexander, can you update other urls instead of just replacing the http with 
https?
Sry, but I don't steal others' work (on principle).

If I didn't the work (e.g. searched the replacement URL), I don't 
deserve to author the respective commit.


Alexander,
Next time, please ask if you aren't sure about including others' 
suggestions -- no need to term it as "stealing". Masami asked if you can 
include this, and I shared what I thought are the correct URLs so that 
they can be included. We don't mind someone else doing this change.


Besides, there are ways to acknowledge others, through a Suggested-by 
tag, as an example.




Also my HTTPSifying task is not done yet.


Hmm, Naveen, then, can you make the update?


Sure, I will send a patch.


- Naveen

Re: [PATCH] Replace HTTP links with HTTPS ones: kprobes

2020-07-10 Thread Naveen N. Rao


Masami Hiramatsu wrote:

On Tue,  7 Jul 2020 21:49:59 +0200
"Alexander A. Klimov"  wrote:


Rationale:
Reduces attack surface on kernel devs opening the links for MITM
as HTTPS traffic is much harder to manipulate.

Deterministic algorithm:
For each file:
  If not .svg:
For each line:
  If doesn't contain `\bxmlns\b`:
For each link, `\bhttp://[^# \t\r\n]*(?:\w|/)`:
  If both the HTTP and HTTPS versions
  return 200 OK and serve the same content:
Replace HTTP with HTTPS.


OK, but it seems that some of them are disappeared :(

 
http://www-106.ibm.com/developerworks/library/l-kprobes.html?ca=dgr-lnxw42Kprobe

 -> https://www.ibm.com/developerworks/library/l-kprobes/index.html


That looks right.



 http://www.redhat.com/magazine/005mar05/features/kprobes/

 -> I can not find that.


Ditto, we should drop that.




 - http://www-users.cs.umn.edu/~boutcher/kprobes/
 - http://www.linuxsymposium.org/2006/linuxsymposium_procv2.pdf (pages 101-115)


Both are not found.


It looks like the first link is gone, but there seems to be a copy in 
the web archive:

https://web.archive.org/web/20061106154519/http://www-users.cs.umn.edu/~boutcher/kprobes/

I suppose we can drop that link.



(OT, it seems http://www.linuxsymposium.org/ has been left from historical
 Linux Symposium, we must remove it asap)


Indeed, I think that link pointed to the Kprobes paper:
https://www.kernel.org/doc/ols/2006/ols2006v2-pages-109-124.pdf


- Naveen

Re: [PATCH] perf powerpc: Don't ignore sym-handling.c file

2020-05-13 Thread Naveen N. Rao


Arnaldo Carvalho de Melo wrote:

Em Mon, May 11, 2020 at 11:45:09PM +0530, Sandipan Das escreveu:


On 09/05/20 4:51 pm, Ravi Bangoria wrote:
> Commit 7eec00a74720 ("perf symbols: Consolidate symbol fixup issue")
> removed powerpc specific sym-handling.c file from Build. This wasn't
> caught by build CI because all functions in this file are declared
> as __weak in common code. Fix it.
> 
> Fixes: 7eec00a74720 ("perf symbols: Consolidate symbol fixup issue")

> Reported-by: Sandipan Das 
> Signed-off-by: Ravi Bangoria 
> ---
>  tools/perf/arch/powerpc/util/Build | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build

> index e5c9504f8586..e86e210bf514 100644
> --- a/tools/perf/arch/powerpc/util/Build
> +++ b/tools/perf/arch/powerpc/util/Build
> @@ -2,6 +2,7 @@ perf-y += header.o
>  perf-y += kvm-stat.o
>  perf-y += perf_regs.o
>  perf-y += mem-events.o
> +perf-y += sym-handling.o
>  
>  perf-$(CONFIG_DWARF) += dwarf-regs.o

>  perf-$(CONFIG_DWARF) += skip-callchain-idx.o
> 


Thanks for fixing this!

Acked-by: Sandipan Das 


Leo, Naveen, can you comment on this?


Shoot -- this is a bad miss, I should have caught it. FWIW:
Reviewed-by: Naveen N. Rao 

Thanks,
Naveen

Re: [PATCH 12/14] docs: move remaining stuff under Documentation/*.txt to Documentation/staging

2020-05-04 Thread Naveen N. Rao


Masami Hiramatsu wrote:

On Fri,  1 May 2020 17:37:56 +0200
Mauro Carvalho Chehab  wrote:


There are several files that I was unable to find a proper place
for them, and 3 ones that are still in plain old text format.

Let's place those stuff behind the carpet, as we'd like to keep the
root directory clean.

We can later discuss and move those into better places.


Hi Mauro,

Thanks for cleaning it up! Tentatively moving kprobes.txt under
staging/ is good to me.

Acked-by: Masami Hiramatsu 

BTW, I think kprobes.txt is under trace/ or we may be better
making a new core-api/events/ directory and prepare other event
systems (PMU, uprobes, and hw_breakpoint.) 


I think it would be good to move kprobes.txt under trace/ -- all other 
tracing bits are already present there, including uprobes.



- Naveen

Re: [PATCH 0/2] pseries/hotplug: Change the default behaviour of cede_offline

2019-09-18 Thread Naveen N. Rao


Michael Ellerman wrote:

"Naveen N. Rao"  writes:

Michael Ellerman wrote:

"Gautham R. Shenoy"  writes:

From: "Gautham R. Shenoy" 

Currently on Pseries Linux Guests, the offlined CPU can be put to one
of the following two states:
   - Long term processor cede (also called extended cede)
   - Returned to the Hypervisor via RTAS "stop-self" call.

This is controlled by the kernel boot parameter "cede_offline=on/off".

By default the offlined CPUs enter extended cede.


Since commit 3aa565f53c39 ("powerpc/pseries: Add hooks to put the CPU into an 
appropriate offline state") (Nov 2009)

Which you wrote :)

Why was that wrong?


The PHYP hypervisor considers CPUs in extended cede to be "active"
since the CPUs are still under the control fo the Linux Guests. Hence, when we 
change the
SMT modes by offlining the secondary CPUs, the PURR and the RWMR SPRs
will continue to count the values for offlined CPUs in extended cede
as if they are online.

One of the expectations with PURR is that the for an interval of time,
the sum of the PURR increments across the online CPUs of a core should
equal the number of timebase ticks for that interval.

This is currently not the case.


But why does that matter? It's just some accounting stuff, does it
actually break something meaningful?


Yes, this broke lparstat at the very least (though its quite unfortunate 
we took so long to notice).


By "so long" you mean 10 years?

Also I've never heard of lparstat, but I assume it's one of these tools
that's meant to behave like the AIX equivalent?


Yes, and yes. lparstat is part of powerpc-utils.



If it's been "broken" for 10 years and no one noticed, I'd argue the
current behaviour is now "correct" and fixing it would actually be a
breakage :)


:)
More on this below...




With SMT disabled, and under load:
  $ sudo lparstat 1 10

  System Configuration
  type=Shared mode=Uncapped smt=Off lcpu=2 mem=7759616 kB cpus=6 ent=1.00 


  %user  %sys %wait%idlephysc %entc lbusy  vcsw phint
  - - --- - - - -
  100.00  0.00  0.00 0.00 1.10 110.00 100.00 128784460 0
  100.00  0.00  0.00 0.00 1.07 107.00 100.00 128784860 0
  100.00  0.00  0.00 0.00 1.07 107.00 100.00 128785260 0
  100.00  0.00  0.00 0.00 1.07 107.00 100.00 128785662 0
  100.00  0.00  0.00 0.00 1.07 107.00 100.00 128786062 0
  100.00  0.00  0.00 0.00 1.07 107.00 100.00 128786462 0
  100.00  0.00  0.00 0.00 1.07 107.00 100.00 128786862 0
  100.00  0.00  0.00 0.00 1.07 107.00 100.00 128787262 0
  100.00  0.00  0.00 0.00 1.07 107.00 100.00 128787664 0
  100.00  0.00  0.00 0.00 1.07 107.00 100.00 128788064 0


What about that is wrong?


The 'physc' column represents cpu usage in units of physical cores.  
With 2 virtual cores ('lcpu=2') in uncapped, shared processor mode, we 
expect this to be closer to 2 when fully loaded (and spare capacity 
elsewhere in the system).





With cede_offline=off:
  $ sudo lparstat 1 10

  System Configuration
  type=Shared mode=Uncapped smt=Off lcpu=2 mem=7759616 kB cpus=6 ent=1.00 


  %user  %sys %wait%idlephysc %entc lbusy  vcsw phint
  - - --- - - - -
  100.00  0.00  0.00 0.00 1.94 194.00 100.00 128961588 0
  100.00  0.00  0.00 0.00 1.91 191.00 100.00 128961988 0
  100.00  0.00  0.00 0.00  inf   inf 100.00 128962392 0
  100.00  0.00  0.00 0.00 1.91 191.00 100.00 128962792 0
  100.00  0.00  0.00 0.00 1.91 191.00 100.00 128963192 0
  100.00  0.00  0.00 0.00 1.91 191.00 100.00 128963592 0
  100.00  0.00  0.00 0.00 1.91 191.00 100.00 128963992 0
  100.00  0.00  0.00 0.00 1.91 191.00 100.00 128964392 0
  100.00  0.00  0.00 0.00 1.91 191.00 100.00 128964792 0
  100.00  0.00  0.00 0.00 1.91 191.00 100.00 128965194 0

[The 'inf' values there show a different bug]

Also, since we expose [S]PURR through sysfs, any tools that make use of 
that directly are also affected due to this.


But again if we've had the current behaviour for 10 years then arguably
that's now the correct behaviour.


That's a fair point, and probably again points to this area getting less 
tested. One of the main reasons for this being caught now though, is 
that there are workloads being tested under lower SMT levels now. So, I 
suspect no one has been relying on this behavior and we can consider 
this to be a bug.



Thanks,
Naveen

Re: [PATCH 0/2] pseries/hotplug: Change the default behaviour of cede_offline

2019-09-17 Thread Naveen N. Rao


Michael Ellerman wrote:

"Gautham R. Shenoy"  writes:

From: "Gautham R. Shenoy" 

Currently on Pseries Linux Guests, the offlined CPU can be put to one
of the following two states:
   - Long term processor cede (also called extended cede)
   - Returned to the Hypervisor via RTAS "stop-self" call.

This is controlled by the kernel boot parameter "cede_offline=on/off".

By default the offlined CPUs enter extended cede.


Since commit 3aa565f53c39 ("powerpc/pseries: Add hooks to put the CPU into an 
appropriate offline state") (Nov 2009)

Which you wrote :)

Why was that wrong?


The PHYP hypervisor considers CPUs in extended cede to be "active"
since the CPUs are still under the control fo the Linux Guests. Hence, when we 
change the
SMT modes by offlining the secondary CPUs, the PURR and the RWMR SPRs
will continue to count the values for offlined CPUs in extended cede
as if they are online.

One of the expectations with PURR is that the for an interval of time,
the sum of the PURR increments across the online CPUs of a core should
equal the number of timebase ticks for that interval.

This is currently not the case.


But why does that matter? It's just some accounting stuff, does it
actually break something meaningful?


Yes, this broke lparstat at the very least (though its quite unfortunate 
we took so long to notice).


With SMT disabled, and under load:
 $ sudo lparstat 1 10

 System Configuration
 type=Shared mode=Uncapped smt=Off lcpu=2 mem=7759616 kB cpus=6 ent=1.00 


 %user  %sys %wait%idlephysc %entc lbusy  vcsw phint
 - - --- - - - -
 100.00  0.00  0.00 0.00 1.10 110.00 100.00 128784460 0
 100.00  0.00  0.00 0.00 1.07 107.00 100.00 128784860 0
 100.00  0.00  0.00 0.00 1.07 107.00 100.00 128785260 0
 100.00  0.00  0.00 0.00 1.07 107.00 100.00 128785662 0
 100.00  0.00  0.00 0.00 1.07 107.00 100.00 128786062 0
 100.00  0.00  0.00 0.00 1.07 107.00 100.00 128786462 0
 100.00  0.00  0.00 0.00 1.07 107.00 100.00 128786862 0
 100.00  0.00  0.00 0.00 1.07 107.00 100.00 128787262 0
 100.00  0.00  0.00 0.00 1.07 107.00 100.00 128787664 0
 100.00  0.00  0.00 0.00 1.07 107.00 100.00 128788064 0


With cede_offline=off:
 $ sudo lparstat 1 10

 System Configuration
 type=Shared mode=Uncapped smt=Off lcpu=2 mem=7759616 kB cpus=6 ent=1.00 


 %user  %sys %wait%idlephysc %entc lbusy  vcsw phint
 - - --- - - - -
 100.00  0.00  0.00 0.00 1.94 194.00 100.00 128961588 0
 100.00  0.00  0.00 0.00 1.91 191.00 100.00 128961988 0
 100.00  0.00  0.00 0.00  inf   inf 100.00 128962392 0
 100.00  0.00  0.00 0.00 1.91 191.00 100.00 128962792 0
 100.00  0.00  0.00 0.00 1.91 191.00 100.00 128963192 0
 100.00  0.00  0.00 0.00 1.91 191.00 100.00 128963592 0
 100.00  0.00  0.00 0.00 1.91 191.00 100.00 128963992 0
 100.00  0.00  0.00 0.00 1.91 191.00 100.00 128964392 0
 100.00  0.00  0.00 0.00 1.91 191.00 100.00 128964792 0
 100.00  0.00  0.00 0.00 1.91 191.00 100.00 128965194 0

[The 'inf' values there show a different bug]

Also, since we expose [S]PURR through sysfs, any tools that make use of 
that directly are also affected due to this.



- Naveen

Re: [PATCH 2/2] powerpc/watchpoint: Disable watchpoint hit by larx/stcx instructions

2019-09-10 Thread Naveen N. Rao


Ravi Bangoria wrote:

If watchpoint exception is generated by larx/stcx instructions, the
reservation created by larx gets lost while handling exception, and
thus stcx instruction always fails. Generally these instructions are
used in a while(1) loop, for example spinlocks. And because stcx
never succeeds, it loops forever and ultimately hangs the system.

Note that ptrace anyway works in one-shot mode and thus for ptrace
we don't change the behaviour. It's up to ptrace user to take care
of this.

Signed-off-by: Ravi Bangoria 
---
 arch/powerpc/kernel/hw_breakpoint.c | 49 +++--
 1 file changed, 33 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/kernel/hw_breakpoint.c 
b/arch/powerpc/kernel/hw_breakpoint.c
index 28ad3171bb82..9fa496a598ce 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -195,14 +195,32 @@ void thread_change_pc(struct task_struct *tsk, struct 
pt_regs *regs)
tsk->thread.last_hit_ubp = NULL;
 }
 
+static bool is_larx_stcx_instr(struct pt_regs *regs, unsigned int instr)

+{
+   int ret, type;
+   struct instruction_op op;
+
+   ret = analyse_instr(&op, regs, instr);
+   type = GETTYPE(op.type);
+   return (!ret && (type == LARX || type == STCX));
+}
+
 /*
  * Handle debug exception notifications.
  */
 static bool stepping_handler(struct pt_regs *regs, struct perf_event *bp,
 unsigned long addr)
 {
-   int stepped;
-   unsigned int instr;
+   unsigned int instr = 0;
+
+   if (__get_user_inatomic(instr, (unsigned int *)regs->nip))
+   goto fail;
+
+   if (is_larx_stcx_instr(regs, instr)) {
+   printk_ratelimited("Watchpoint: Can't emulate/single-step larx/"
+  "stcx instructions. Disabling 
watchpoint.\n");


The below WARN() uses the term 'breakpoint'. Better to use consistent 
terminology. I would rewrite the above as:

printk_ratelimited("Breakpoint hit on instruction that can't be emulated. 
"
    "Breakpoint at 0x%lx will be disabled.\n", 
addr);

Otherwise:
Acked-by: Naveen N. Rao 

- Naveen


+   goto disable;
+   }
 
 	/* Do not emulate user-space instructions, instead single-step them */

if (user_mode(regs)) {
@@ -211,23 +229,22 @@ static bool stepping_handler(struct pt_regs *regs, struct 
perf_event *bp,
return false;
}
 
-	stepped = 0;

-   instr = 0;
-   if (!__get_user_inatomic(instr, (unsigned int *)regs->nip))
-   stepped = emulate_step(regs, instr);
+   if (!emulate_step(regs, instr))
+   goto fail;
 
+	return true;

+
+fail:
/*
-* emulate_step() could not execute it. We've failed in reliably
-* handling the hw-breakpoint. Unregister it and throw a warning
-* message to let the user know about it.
+* We've failed in reliably handling the hw-breakpoint. Unregister
+* it and throw a warning message to let the user know about it.
 */
-   if (!stepped) {
-   WARN(1, "Unable to handle hardware breakpoint. Breakpoint at "
-   "0x%lx will be disabled.", addr);
-   perf_event_disable_inatomic(bp);
-   return false;
-   }
-   return true;
+   WARN(1, "Unable to handle hardware breakpoint. Breakpoint at "
+   "0x%lx will be disabled.", addr);
+
+disable:
+   perf_event_disable_inatomic(bp);
+   return false;
 }
 
 int hw_breakpoint_handler(struct die_args *args)

--
2.21.0

[PATCH 3/3] powerpc: Use ftrace_graph_ret_addr() when unwinding

2019-09-05 Thread Naveen N. Rao

With support for HAVE_FUNCTION_GRAPH_RET_ADDR_PTR,
ftrace_graph_ret_addr() provides more robust unwinding when function
graph is in use. Update show_stack() to use the same.

With dump_stack() added to sysrq_sysctl_handler(), before this patch:
  root@(none):/sys/kernel/debug/tracing# cat /proc/sys/kernel/sysrq
  CPU: 0 PID: 218 Comm: cat Not tainted 5.3.0-rc7-00868-g8453ad4a078c-dirty #20
  Call Trace:
  [c000d1e13c30] [c006ab98] return_to_handler+0x0/0x40 
(dump_stack+0xe8/0x164) (unreliable)
  [c000d1e13c80] [c0145680] sysrq_sysctl_handler+0x48/0xb8
  [c000d1e13cd0] [c006ab98] return_to_handler+0x0/0x40 
(proc_sys_call_handler+0x274/0x2a0)
  [c000d1e13d60] [c006ab98] return_to_handler+0x0/0x40 
(return_to_handler+0x0/0x40)
  [c000d1e13d80] [c006ab98] return_to_handler+0x0/0x40 
(__vfs_read+0x3c/0x70)
  [c000d1e13dd0] [c006ab98] return_to_handler+0x0/0x40 
(vfs_read+0xb8/0x1b0)
  [c000d1e13e20] [c006ab98] return_to_handler+0x0/0x40 
(ksys_read+0x7c/0x140)

After this patch:
  Call Trace:
  [c000d1e33c30] [c006ab58] return_to_handler+0x0/0x40 
(dump_stack+0xe8/0x164) (unreliable)
  [c000d1e33c80] [c0145680] sysrq_sysctl_handler+0x48/0xb8
  [c000d1e33cd0] [c006ab58] return_to_handler+0x0/0x40 
(proc_sys_call_handler+0x274/0x2a0)
  [c000d1e33d60] [c006ab58] return_to_handler+0x0/0x40 
(__vfs_read+0x3c/0x70)
  [c000d1e33d80] [c006ab58] return_to_handler+0x0/0x40 
(vfs_read+0xb8/0x1b0)
  [c000d1e33dd0] [c006ab58] return_to_handler+0x0/0x40 
(ksys_read+0x7c/0x140)
  [c000d1e33e20] [c006ab58] return_to_handler+0x0/0x40 
(system_call+0x5c/0x68)

Signed-off-by: Naveen N. Rao 
---
 arch/powerpc/kernel/process.c | 19 ++-
 1 file changed, 6 insertions(+), 13 deletions(-)

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 24621e7e5033..f289bdd2b562 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -2047,10 +2047,8 @@ void show_stack(struct task_struct *tsk, unsigned long 
*stack)
int count = 0;
int firstframe = 1;
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
-   struct ftrace_ret_stack *ret_stack;
-   extern void return_to_handler(void);
-   unsigned long rth = (unsigned long)return_to_handler;
-   int curr_frame = 0;
+   unsigned long ret_addr;
+   int ftrace_idx = 0;
 #endif
 
if (tsk == NULL)
@@ -2079,15 +2077,10 @@ void show_stack(struct task_struct *tsk, unsigned long 
*stack)
if (!firstframe || ip != lr) {
printk("["REG"] ["REG"] %pS", sp, ip, (void *)ip);
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
-   if ((ip == rth) && curr_frame >= 0) {
-   ret_stack = ftrace_graph_get_ret_stack(current,
- curr_frame++);
-   if (ret_stack)
-   pr_cont(" (%pS)",
-   (void *)ret_stack->ret);
-   else
-   curr_frame = -1;
-   }
+   ret_addr = ftrace_graph_ret_addr(current,
+   &ftrace_idx, ip, stack);
+   if (ret_addr != ip)
+   pr_cont(" (%pS)", (void *)ret_addr);
 #endif
if (firstframe)
pr_cont(" (unreliable)");
-- 
2.23.0

[PATCH 2/3] powerpc/ftrace: Enable HAVE_FUNCTION_GRAPH_RET_ADDR_PTR

2019-09-05 Thread Naveen N. Rao

This associates entries in the ftrace_ret_stack with corresponding stack
frames, enabling more robust stack unwinding. Also update the only user
of ftrace_graph_ret_addr() to pass the stack pointer.

Signed-off-by: Naveen N. Rao 
---
 arch/powerpc/include/asm/asm-prototypes.h  | 3 ++-
 arch/powerpc/include/asm/ftrace.h  | 2 ++
 arch/powerpc/kernel/stacktrace.c   | 2 +-
 arch/powerpc/kernel/trace/ftrace.c | 5 +++--
 arch/powerpc/kernel/trace/ftrace_32.S  | 1 +
 arch/powerpc/kernel/trace/ftrace_64_mprofile.S | 1 +
 arch/powerpc/kernel/trace/ftrace_64_pg.S   | 1 +
 7 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/asm-prototypes.h 
b/arch/powerpc/include/asm/asm-prototypes.h
index 49196d35e3bb..8561498e653c 100644
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -134,7 +134,8 @@ extern int __ucmpdi2(u64, u64);
 
 /* tracing */
 void _mcount(void);
-unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip);
+unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip,
+   unsigned long sp);
 
 void pnv_power9_force_smt4_catch(void);
 void pnv_power9_force_smt4_release(void);
diff --git a/arch/powerpc/include/asm/ftrace.h 
b/arch/powerpc/include/asm/ftrace.h
index 3dfb80b86561..f54a08a2cd70 100644
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -8,6 +8,8 @@
 #define MCOUNT_ADDR((unsigned long)(_mcount))
 #define MCOUNT_INSN_SIZE   4 /* sizeof mcount call */
 
+#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
+
 #ifdef __ASSEMBLY__
 
 /* Based off of objdump optput from glibc */
diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c
index 1e2276963f6d..e2a46cfed5fd 100644
--- a/arch/powerpc/kernel/stacktrace.c
+++ b/arch/powerpc/kernel/stacktrace.c
@@ -182,7 +182,7 @@ static int __save_stack_trace_tsk_reliable(struct 
task_struct *tsk,
 * FIXME: IMHO these tests do not belong in
 * arch-dependent code, they are generic.
 */
-   ip = ftrace_graph_ret_addr(tsk, &graph_idx, ip, NULL);
+   ip = ftrace_graph_ret_addr(tsk, &graph_idx, ip, stack);
 #ifdef CONFIG_KPROBES
/*
 * Mark stacktraces with kretprobed functions on them
diff --git a/arch/powerpc/kernel/trace/ftrace.c 
b/arch/powerpc/kernel/trace/ftrace.c
index be1ca98fce5c..7ea0ca044b65 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -944,7 +944,8 @@ int ftrace_disable_ftrace_graph_caller(void)
  * Hook the return address and push it in the stack of return addrs
  * in current thread info. Return the address we want to divert to.
  */
-unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip)
+unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip,
+   unsigned long sp)
 {
unsigned long return_hooker;
 
@@ -956,7 +957,7 @@ unsigned long prepare_ftrace_return(unsigned long parent, 
unsigned long ip)
 
return_hooker = ppc_function_entry(return_to_handler);
 
-   if (!function_graph_enter(parent, ip, 0, NULL))
+   if (!function_graph_enter(parent, ip, 0, (unsigned long *)sp))
parent = return_hooker;
 out:
return parent;
diff --git a/arch/powerpc/kernel/trace/ftrace_32.S 
b/arch/powerpc/kernel/trace/ftrace_32.S
index 183f608efb81..e023ae59c429 100644
--- a/arch/powerpc/kernel/trace/ftrace_32.S
+++ b/arch/powerpc/kernel/trace/ftrace_32.S
@@ -50,6 +50,7 @@ _GLOBAL(ftrace_stub)
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 _GLOBAL(ftrace_graph_caller)
+   addir5, r1, 48
/* load r4 with local address */
lwz r4, 44(r1)
subir4, r4, MCOUNT_INSN_SIZE
diff --git a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S 
b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
index 74acbf16a666..f9fd5f743eba 100644
--- a/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
+++ b/arch/powerpc/kernel/trace/ftrace_64_mprofile.S
@@ -294,6 +294,7 @@ _GLOBAL(ftrace_graph_caller)
std r2, 24(r1)
ld  r2, PACATOC(r13)/* get kernel TOC in r2 */
 
+   addir5, r1, 112
mfctr   r4  /* ftrace_caller has moved local addr here */
std r4, 40(r1)
mflrr3  /* ftrace_caller has restored LR from stack */
diff --git a/arch/powerpc/kernel/trace/ftrace_64_pg.S 
b/arch/powerpc/kernel/trace/ftrace_64_pg.S
index e41a7d13c99c..6708e24db0ab 100644
--- a/arch/powerpc/kernel/trace/ftrace_64_pg.S
+++ b/arch/powerpc/kernel/trace/ftrace_64_pg.S
@@ -41,6 +41,7 @@ _GLOBAL(ftrace_stub)
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 _GLOBAL(ftrace_graph_caller)
+   addir5, r1, 112
/* load r4 with local address */
ld  r4

[PATCH 0/3] powerpc/ftrace: Enable HAVE_FUNCTION_GRAPH_RET_ADDR_PTR

2019-09-05 Thread Naveen N. Rao

Enable HAVE_FUNCTION_GRAPH_RET_ADDR_PTR for more robust stack unwinding 
when function graph tracer is in use. Convert powerpc show_stack() to 
use ftrace_graph_ret_addr() for better stack unwinding.

- Naveen

Naveen N. Rao (3):
  ftrace: Look up the address of return_to_handler() using helpers
  powerpc/ftrace: Enable HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
  powerpc: Use ftrace_graph_ret_addr() when unwinding

 arch/powerpc/include/asm/asm-prototypes.h |  3 ++-
 arch/powerpc/include/asm/ftrace.h |  2 ++
 arch/powerpc/kernel/process.c | 19 ++-
 arch/powerpc/kernel/stacktrace.c  |  2 +-
 arch/powerpc/kernel/trace/ftrace.c|  5 +++--
 arch/powerpc/kernel/trace/ftrace_32.S |  1 +
 .../powerpc/kernel/trace/ftrace_64_mprofile.S |  1 +
 arch/powerpc/kernel/trace/ftrace_64_pg.S  |  1 +
 kernel/trace/fgraph.c |  4 ++--
 9 files changed, 19 insertions(+), 19 deletions(-)

-- 
2.23.0

[PATCH 1/3] ftrace: Look up the address of return_to_handler() using helpers

2019-09-05 Thread Naveen N. Rao

This ensures that we use the right address on architectures that use
function descriptors.

Signed-off-by: Naveen N. Rao 
---
 kernel/trace/fgraph.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index 8dfd5021b933..7950a0356042 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -276,7 +276,7 @@ unsigned long ftrace_graph_ret_addr(struct task_struct 
*task, int *idx,
int index = task->curr_ret_stack;
int i;
 
-   if (ret != (unsigned long)return_to_handler)
+   if (ret != (unsigned 
long)dereference_kernel_function_descriptor(return_to_handler))
return ret;
 
if (index < 0)
@@ -294,7 +294,7 @@ unsigned long ftrace_graph_ret_addr(struct task_struct 
*task, int *idx,
 {
int task_idx;
 
-   if (ret != (unsigned long)return_to_handler)
+   if (ret != (unsigned 
long)dereference_kernel_function_descriptor(return_to_handler))
return ret;
 
task_idx = task->curr_ret_stack;
-- 
2.23.0

Re: [PATCH v3 2/3] Powerpc64/Watchpoint: Don't ignore extraneous exceptions

2019-09-04 Thread Naveen N. Rao


Ravi Bangoria wrote:

On Powerpc64, watchpoint match range is double-word granular. On
a watchpoint hit, DAR is set to the first byte of overlap between
actual access and watched range. And thus it's quite possible that
DAR does not point inside user specified range. Ex, say user creates
a watchpoint with address range 0x1004 to 0x1007. So hw would be
configured to watch from 0x1000 to 0x1007. If there is a 4 byte
access from 0x1002 to 0x1005, DAR will point to 0x1002 and thus
interrupt handler considers it as extraneous, but it's actually not,
because part of the access belongs to what user has asked. So, let
kernel pass it on to user and let user decide what to do with it
instead of silently ignoring it. The drawback is, it can generate
false positive events.


I think you should do the additional validation here, instead of 
generating false positives. You should be able to read the instruction, 
run it through analyse_instr(), and then use OP_IS_LOAD_STORE() and 
GETSIZE() to understand the access range. This can be used to then 
perform a better match against what the user asked for.


- Naveen

Re: [PATCH -tip v2] kprobes: Prohibit probing on BUG() and WARN() address

2019-09-04 Thread Naveen N. Rao


Masami Hiramatsu wrote:

Since BUG() and WARN() may use a trap (e.g. UD2 on x86) to
get the address where the BUG() has occurred, kprobes can not
do single-step out-of-line that instruction. So prohibit
probing on such address.

Without this fix, if someone put a kprobe on WARN(), the
kernel will crash with invalid opcode error instead of
outputing warning message, because kernel can not find
correct bug address.

Signed-off-by: Masami Hiramatsu 
---
  Changes in v2:
   - Add find_bug() stub function for !CONFIG_GENERIC_BUG
   - Cast the p->addr to unsigned long.
---
 include/linux/bug.h |5 +
 kernel/kprobes.c|3 ++-
 2 files changed, 7 insertions(+), 1 deletion(-)


Acked-by: Naveen N. Rao 

- Naveen

Re: [PATCH 1/2] ftrace: Fix NULL pointer dereference in t_probe_next()

2019-09-04 Thread Naveen N. Rao


Steven Rostedt wrote:

On Thu,  4 Jul 2019 20:04:41 +0530
"Naveen N. Rao"  wrote:



 kernel/trace/ftrace.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 7b037295a1f1..0791eafb693d 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3093,6 +3093,10 @@ t_probe_next(struct seq_file *m, loff_t *pos)
hnd = &iter->probe_entry->hlist;
 
 	hash = iter->probe->ops.func_hash->filter_hash;

+
+   if (!hash)
+   return NULL;
+
size = 1 << hash->size_bits;
 
  retry:


OK, I added this, but I'm also adding this on top:


Thanks, the additional comments do make this much clearer.

Regards,
Naveen

[tip: perf/core] perf arch powerpc: Sync powerpc syscall.tbl

2019-08-29 Thread tip-bot2 for Naveen N. Rao

The following commit has been merged into the perf/core branch of tip:

Commit-ID: 0a56e0603fa13af08816d673f6f71b68cda2fb2e
Gitweb:
https://git.kernel.org/tip/0a56e0603fa13af08816d673f6f71b68cda2fb2e
Author:Naveen N. Rao 
AuthorDate:Tue, 27 Aug 2019 12:44:58 +05:30
Committer: Arnaldo Carvalho de Melo 
CommitterDate: Wed, 28 Aug 2019 10:25:38 -03:00

perf arch powerpc: Sync powerpc syscall.tbl

Copy over powerpc syscall.tbl to grab changes from the below commits:

  commit cee3536d24a1 ("powerpc: Wire up clone3 syscall")
  commit 1a271a68e030 ("arch: mark syscall number 435 reserved for clone3")
  commit 7615d9e1780e ("arch: wire-up pidfd_open()")
  commit d8076bdb56af ("uapi: Wire up the mount API syscalls on non-x86 arches 
[ver #2]")
  commit 39036cd27273 ("arch: add pidfd and io_uring syscalls everywhere")
  commit 48166e6ea47d ("y2038: add 64-bit time_t syscalls to all 32-bit 
architectures")
  commit d33c577cccd0 ("y2038: rename old time and utime syscalls")
  commit 00bf25d693e7 ("y2038: use time32 syscall names on 32-bit")
  commit 8dabe7245bbc ("y2038: syscalls: rename y2038 compat syscalls")
  commit 0d6040d46817 ("arch: add split IPC system calls where needed")

Reported-by: Nicholas Piggin 
Signed-off-by: Naveen N. Rao 
Cc: Ravi Bangoria 
Cc: linuxppc-...@lists.ozlabs.org
Link: 
http://lkml.kernel.org/r/20190827071458.19897-1-naveen.n@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/arch/powerpc/entry/syscalls/syscall.tbl | 146 +---
 1 file changed, 119 insertions(+), 27 deletions(-)

diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl 
b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
index db3bbb8..43f736e 100644
--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -20,7 +20,9 @@
 10 common  unlink  sys_unlink
 11 nospu   execve  sys_execve  
compat_sys_execve
 12 common  chdir   sys_chdir
-13 common  timesys_time
compat_sys_time
+13 32  timesys_time32
+13 64  timesys_time
+13 spu timesys_time
 14 common  mknod   sys_mknod
 15 common  chmod   sys_chmod
 16 common  lchown  sys_lchown
@@ -36,14 +38,17 @@
 22 spu umount  sys_ni_syscall
 23 common  setuid  sys_setuid
 24 common  getuid  sys_getuid
-25 common  stime   sys_stime   
compat_sys_stime
+25 32  stime   sys_stime32
+25 64  stime   sys_stime
+25 spu stime   sys_stime
 26 nospu   ptrace  sys_ptrace  
compat_sys_ptrace
 27 common  alarm   sys_alarm
 28 32  oldfstatsys_fstat   
sys_ni_syscall
 28 64  oldfstatsys_ni_syscall
 28 spu oldfstatsys_ni_syscall
 29 nospu   pause   sys_pause
-30 nospu   utime   sys_utime   
compat_sys_utime
+30 32  utime   sys_utime32
+30 64  utime   sys_utime
 31 common  sttysys_ni_syscall
 32 common  gttysys_ni_syscall
 33 common  access  sys_access
@@ -157,7 +162,9 @@
 121common  setdomainname   sys_setdomainname
 122common  uname   sys_newuname
 123common  modify_ldt  sys_ni_syscall
-124common  adjtimexsys_adjtimex
compat_sys_adjtimex
+12432  adjtimexsys_adjtimex_time32
+12464  adjtimexsys_adjtimex
+124spu adjtimexsys_adjtimex
 125common  mprotectsys_mprotect
 12632  sigprocmask sys_sigprocmask 
compat_sys_sigprocmask
 12664  sigprocmask sys_ni_syscall
@@ -198,8 +205,12 @@
 158common  sched_yield sys_sched_yield
 159common  sched_get_priority_max  sys_sched_get_priority_max
 160common  sched_get_priority_min  sys_sched_get_priority_min
-161common  sched_rr_get_interval   sys_sched_rr_get_interval

[PATCH] perf arch powerpc: Sync powerpc syscall.tbl

2019-08-27 Thread Naveen N. Rao

Copy over powerpc syscall.tbl to grab changes from the below commits:
  commit cee3536d24a1 ("powerpc: Wire up clone3 syscall")
  commit 1a271a68e030 ("arch: mark syscall number 435 reserved for clone3")
  commit 7615d9e1780e ("arch: wire-up pidfd_open()")
  commit d8076bdb56af ("uapi: Wire up the mount API syscalls on non-x86 arches 
[ver #2]")
  commit 39036cd27273 ("arch: add pidfd and io_uring syscalls everywhere")
  commit 48166e6ea47d ("y2038: add 64-bit time_t syscalls to all 32-bit 
architectures")
  commit d33c577cccd0 ("y2038: rename old time and utime syscalls")
  commit 00bf25d693e7 ("y2038: use time32 syscall names on 32-bit")
  commit 8dabe7245bbc ("y2038: syscalls: rename y2038 compat syscalls")
  commit 0d6040d46817 ("arch: add split IPC system calls where needed")

Reported-by: Nicholas Piggin 
Signed-off-by: Naveen N. Rao 
---
 .../arch/powerpc/entry/syscalls/syscall.tbl   | 146 ++
 1 file changed, 119 insertions(+), 27 deletions(-)

diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl 
b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
index db3bbb8744af..43f736ed47f2 100644
--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -20,7 +20,9 @@
 10 common  unlink  sys_unlink
 11 nospu   execve  sys_execve  
compat_sys_execve
 12 common  chdir   sys_chdir
-13 common  timesys_time
compat_sys_time
+13 32  timesys_time32
+13 64  timesys_time
+13 spu timesys_time
 14 common  mknod   sys_mknod
 15 common  chmod   sys_chmod
 16 common  lchown  sys_lchown
@@ -36,14 +38,17 @@
 22 spu umount  sys_ni_syscall
 23 common  setuid  sys_setuid
 24 common  getuid  sys_getuid
-25 common  stime   sys_stime   
compat_sys_stime
+25 32  stime   sys_stime32
+25 64  stime   sys_stime
+25 spu stime   sys_stime
 26 nospu   ptrace  sys_ptrace  
compat_sys_ptrace
 27 common  alarm   sys_alarm
 28 32  oldfstatsys_fstat   
sys_ni_syscall
 28 64  oldfstatsys_ni_syscall
 28 spu oldfstatsys_ni_syscall
 29 nospu   pause   sys_pause
-30 nospu   utime   sys_utime   
compat_sys_utime
+30 32  utime   sys_utime32
+30 64  utime   sys_utime
 31 common  sttysys_ni_syscall
 32 common  gttysys_ni_syscall
 33 common  access  sys_access
@@ -157,7 +162,9 @@
 121common  setdomainname   sys_setdomainname
 122common  uname   sys_newuname
 123common  modify_ldt  sys_ni_syscall
-124common  adjtimexsys_adjtimex
compat_sys_adjtimex
+12432  adjtimexsys_adjtimex_time32
+12464  adjtimexsys_adjtimex
+124spu adjtimexsys_adjtimex
 125common  mprotectsys_mprotect
 12632  sigprocmask sys_sigprocmask 
compat_sys_sigprocmask
 12664  sigprocmask sys_ni_syscall
@@ -198,8 +205,12 @@
 158common  sched_yield sys_sched_yield
 159common  sched_get_priority_max  sys_sched_get_priority_max
 160common  sched_get_priority_min  sys_sched_get_priority_min
-161common  sched_rr_get_interval   sys_sched_rr_get_interval   
compat_sys_sched_rr_get_interval
-162common  nanosleep   sys_nanosleep   
compat_sys_nanosleep
+16132  sched_rr_get_interval   sys_sched_rr_get_interval_time32
+16164  sched_rr_get_interval   sys_sched_rr_get_interval
+161spu sched_rr_get_interval   sys_sched_rr_get_interval
+16232  nanosleep   sys_nanosleep_time32
+16264  nanosleep   sys_nanosleep
+162spu nanosleep   sys_nanosleep
 163common  mremap

Re: [PATCH] bpf: handle 32-bit zext during constant blinding

2019-08-25 Thread Naveen N. Rao


Jiong Wang wrote:


Naveen N. Rao writes:


Since BPF constant blinding is performed after the verifier pass, the
ALU32 instructions inserted for doubleword immediate loads don't have a
corresponding zext instruction. This is causing a kernel oops on powerpc
and can be reproduced by running 'test_cgroup_storage' with
bpf_jit_harden=2.

Fix this by emitting BPF_ZEXT during constant blinding if
prog->aux->verifier_zext is set.

Fixes: a4b1d3c1ddf6cb ("bpf: verifier: insert zero extension according to analysis 
result")
Reported-by: Michael Ellerman 
Signed-off-by: Naveen N. Rao 


Thanks for the fix.

Reviewed-by: Jiong Wang 

Just two other comments during review in case I am wrong on somewhere.

  - Use verifier_zext instead of bpf_jit_needs_zext() seems better, even
though the latter could avoid extending function argument.

Because JIT back-ends look at verifier_zext, true means zext inserted
by verifier so JITs won't do the code-gen.

Use verifier_zext is sort of keeping JIT blinding the same behaviour
has verifier even though blinding doesn't belong to verifier, but for
such insn patching, it could be seen as a extension of verifier,
therefore use verifier_zext seems better than bpf_jit_needs_zext() to
me.
   
  - JIT blinding is also escaping the HI32 randomization which happens

inside verifier, otherwise x86-64 regression should have caught this issue.


Jiong,
Thanks for the review.

Alexei, Daniel,
Can you please pick this up for v5.3. This is a regression and is 
causing a crash on powerpc.



- Naveen

[PATCH] bpf: handle 32-bit zext during constant blinding

2019-08-21 Thread Naveen N. Rao

Since BPF constant blinding is performed after the verifier pass, the
ALU32 instructions inserted for doubleword immediate loads don't have a
corresponding zext instruction. This is causing a kernel oops on powerpc
and can be reproduced by running 'test_cgroup_storage' with
bpf_jit_harden=2.

Fix this by emitting BPF_ZEXT during constant blinding if
prog->aux->verifier_zext is set.

Fixes: a4b1d3c1ddf6cb ("bpf: verifier: insert zero extension according to 
analysis result")
Reported-by: Michael Ellerman 
Signed-off-by: Naveen N. Rao 
---
Changes since RFC:
- Removed changes to ALU32 and JMP32 ops since those don't alter program 
  execution, and the verifier would have already accounted for them.  


 kernel/bpf/core.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 8191a7db2777..66088a9e9b9e 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -890,7 +890,8 @@ int bpf_jit_get_func_addr(const struct bpf_prog *prog,
 
 static int bpf_jit_blind_insn(const struct bpf_insn *from,
  const struct bpf_insn *aux,
- struct bpf_insn *to_buff)
+ struct bpf_insn *to_buff,
+ bool emit_zext)
 {
struct bpf_insn *to = to_buff;
u32 imm_rnd = get_random_int();
@@ -1005,6 +1006,8 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from,
case 0: /* Part 2 of BPF_LD | BPF_IMM | BPF_DW. */
*to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ 
aux[0].imm);
*to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
+   if (emit_zext)
+   *to++ = BPF_ZEXT_REG(BPF_REG_AX);
*to++ = BPF_ALU64_REG(BPF_OR,  aux[0].dst_reg, BPF_REG_AX);
break;
 
@@ -1088,7 +1091,8 @@ struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog 
*prog)
insn[1].code == 0)
memcpy(aux, insn, sizeof(aux));
 
-   rewritten = bpf_jit_blind_insn(insn, aux, insn_buff);
+   rewritten = bpf_jit_blind_insn(insn, aux, insn_buff,
+   clone->aux->verifier_zext);
if (!rewritten)
continue;
 
-- 
2.22.0

Re: Regression fix for bpf in v5.3 (was Re: [RFC PATCH] bpf: handle 32-bit zext during constant blinding)

2019-08-21 Thread Naveen N. Rao


Jiong Wang wrote:


Michael Ellerman writes:


"Naveen N. Rao"  writes:

Since BPF constant blinding is performed after the verifier pass, there
are certain ALU32 instructions inserted which don't have a corresponding
zext instruction inserted after. This is causing a kernel oops on
powerpc and can be reproduced by running 'test_cgroup_storage' with
bpf_jit_harden=2.

Fix this by emitting BPF_ZEXT during constant blinding if
prog->aux->verifier_zext is set.

Fixes: a4b1d3c1ddf6cb ("bpf: verifier: insert zero extension according to analysis 
result")
Reported-by: Michael Ellerman 
Signed-off-by: Naveen N. Rao 
---
This approach (the location where zext is being introduced below, in 
particular) works for powerpc, but I am not entirely sure if this is 
sufficient for other architectures as well. This is broken on v5.3-rc4.


Any comment on this?


Have commented on https://marc.info/?l=linux-netdev&m=156637836024743&w=2

The fix looks correct to me on "BPF_LD | BPF_IMM | BPF_DW", but looks
unnecessary on two other places. It would be great if you or Naveen could
confirm it.


Jiong,
Thanks for the review. I can now see why the other two changes are not 
necessary. I will post a follow-on patch.


Thanks!
- Naveen

Re: [RFC PATCH] bpf: handle 32-bit zext during constant blinding

2019-08-21 Thread Naveen N. Rao


Naveen N. Rao wrote:

Since BPF constant blinding is performed after the verifier pass, there
are certain ALU32 instructions inserted which don't have a corresponding
zext instruction inserted after. This is causing a kernel oops on
powerpc and can be reproduced by running 'test_cgroup_storage' with
bpf_jit_harden=2.

Fix this by emitting BPF_ZEXT during constant blinding if
prog->aux->verifier_zext is set.

Fixes: a4b1d3c1ddf6cb ("bpf: verifier: insert zero extension according to analysis 
result")
Reported-by: Michael Ellerman 
Signed-off-by: Naveen N. Rao 
---
This approach (the location where zext is being introduced below, in 
particular) works for powerpc, but I am not entirely sure if this is 
sufficient for other architectures as well. This is broken on v5.3-rc4.


Alexie, Daniel, Jiong,
Any feedback on this?

- Naveen

Re: [PATCH 13/16] include/asm-generic: prefer __section from compiler_attributes.h

2019-08-19 Thread Naveen N. Rao


Nick Desaulniers wrote:

Reported-by: Sedat Dilek 
Suggested-by: Josh Poimboeuf 
Signed-off-by: Nick Desaulniers 
---


Acked-by: Naveen N. Rao 

- Naveen

Re: [PATCH 4/4] arm64: implement KPROBES_ON_FTRACE

2019-08-19 Thread Naveen N. Rao


Jisheng Zhang wrote:

This patch implements KPROBES_ON_FTRACE for arm64.

~ # mount -t debugfs debugfs /sys/kernel/debug/
~ # cd /sys/kernel/debug/
/sys/kernel/debug # echo 'p _do_fork' > tracing/kprobe_events

before the patch:

/sys/kernel/debug # cat kprobes/list
ff801009ff7c  k  _do_fork+0x4[DISABLED]


This looks wrong -- we should not be allowing kprobe to be registered on 
ftrace address without KPROBES_ON_FTRACE. Is _do_fork+0x4 the location 
of ftrace entry on arm64?


- Naveen

Re: [PATCH 1/4] kprobes: adjust kprobe addr for KPROBES_ON_FTRACE

2019-08-19 Thread Naveen N. Rao


Jisheng Zhang wrote:

For KPROBES_ON_FTRACE case, we need to adjust the kprobe's addr
correspondingly.

Signed-off-by: Jisheng Zhang 
---
 kernel/kprobes.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 9873fc627d61..f8400753a8a9 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1560,6 +1560,9 @@ int register_kprobe(struct kprobe *p)
addr = kprobe_addr(p);
if (IS_ERR(addr))
return PTR_ERR(addr);
+#ifdef CONFIG_KPROBES_ON_FTRACE
+   addr = (kprobe_opcode_t *)ftrace_call_adjust((unsigned long)addr);
+#endif
p->addr = addr;


I'm not sure what this is achieving, but looks wrong to me.

If you intend to have kprobes default to using ftrace entry for probing 
functions, consider over-riding kprobe_lookup_name() -- see powerpc 
variant for example.



- Naveen

[RFC PATCH] bpf: handle 32-bit zext during constant blinding

2019-08-13 Thread Naveen N. Rao

Since BPF constant blinding is performed after the verifier pass, there
are certain ALU32 instructions inserted which don't have a corresponding
zext instruction inserted after. This is causing a kernel oops on
powerpc and can be reproduced by running 'test_cgroup_storage' with
bpf_jit_harden=2.

Fix this by emitting BPF_ZEXT during constant blinding if
prog->aux->verifier_zext is set.

Fixes: a4b1d3c1ddf6cb ("bpf: verifier: insert zero extension according to 
analysis result")
Reported-by: Michael Ellerman 
Signed-off-by: Naveen N. Rao 
---
This approach (the location where zext is being introduced below, in 
particular) works for powerpc, but I am not entirely sure if this is 
sufficient for other architectures as well. This is broken on v5.3-rc4.

- Naveen


 kernel/bpf/core.c | 14 --
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 8191a7db2777..d84146e6fd9e 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -890,7 +890,8 @@ int bpf_jit_get_func_addr(const struct bpf_prog *prog,
 
 static int bpf_jit_blind_insn(const struct bpf_insn *from,
  const struct bpf_insn *aux,
- struct bpf_insn *to_buff)
+ struct bpf_insn *to_buff,
+ bool emit_zext)
 {
struct bpf_insn *to = to_buff;
u32 imm_rnd = get_random_int();
@@ -939,6 +940,8 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from,
*to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
*to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
*to++ = BPF_ALU32_REG(from->code, from->dst_reg, BPF_REG_AX);
+   if (emit_zext)
+   *to++ = BPF_ZEXT_REG(from->dst_reg);
break;
 
case BPF_ALU64 | BPF_ADD | BPF_K:
@@ -992,6 +995,10 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from,
off -= 2;
*to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ from->imm);
*to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
+   if (emit_zext) {
+   *to++ = BPF_ZEXT_REG(BPF_REG_AX);
+   off--;
+   }
*to++ = BPF_JMP32_REG(from->code, from->dst_reg, BPF_REG_AX,
  off);
break;
@@ -1005,6 +1012,8 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from,
case 0: /* Part 2 of BPF_LD | BPF_IMM | BPF_DW. */
*to++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_AX, imm_rnd ^ 
aux[0].imm);
*to++ = BPF_ALU32_IMM(BPF_XOR, BPF_REG_AX, imm_rnd);
+   if (emit_zext)
+   *to++ = BPF_ZEXT_REG(BPF_REG_AX);
*to++ = BPF_ALU64_REG(BPF_OR,  aux[0].dst_reg, BPF_REG_AX);
break;
 
@@ -1088,7 +1097,8 @@ struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog 
*prog)
insn[1].code == 0)
memcpy(aux, insn, sizeof(aux));
 
-   rewritten = bpf_jit_blind_insn(insn, aux, insn_buff);
+   rewritten = bpf_jit_blind_insn(insn, aux, insn_buff,
+   clone->aux->verifier_zext);
if (!rewritten)
continue;
 
-- 
2.22.0

Re: [PATCH 0/2] ftrace: two fixes with func_probes handling

2019-08-08 Thread Naveen N. Rao


Naveen N. Rao wrote:
Two patches addressing bugs in ftrace function probe handling. The first 
patch addresses a NULL pointer dereference reported by LTP tests, while 
the second one is a trivial patch to address a missing check for return 
value, found by code inspection.


Steven,
Can you please take a look at these patches?

Thanks,
Naveen

[PATCH 2/2] ftrace: Check for successful allocation of hash

2019-07-04 Thread Naveen N. Rao

In register_ftrace_function_probe(), we are not checking the return
value of alloc_and_copy_ftrace_hash(). The subsequent call to
ftrace_match_records() may end up dereferencing the same. Add a check to
ensure this doesn't happen.

Signed-off-by: Naveen N. Rao 
---
 kernel/trace/ftrace.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 0791eafb693d..0d5f7d4a4936 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -4328,6 +4328,11 @@ register_ftrace_function_probe(char *glob, struct 
trace_array *tr,
old_hash = *orig_hash;
hash = alloc_and_copy_ftrace_hash(FTRACE_HASH_DEFAULT_BITS, old_hash);
 
+   if (!hash) {
+   ret = -ENOMEM;
+   goto out;
+   }
+
ret = ftrace_match_records(hash, glob, strlen(glob));
 
/* Nothing found? */
-- 
2.22.0

[PATCH 1/2] ftrace: Fix NULL pointer dereference in t_probe_next()

2019-07-04 Thread Naveen N. Rao

LTP testsuite on powerpc results in the below crash:

  Unable to handle kernel paging request for data at address 0x
  Faulting instruction address: 0xc029d800
  Oops: Kernel access of bad area, sig: 11 [#1]
  LE SMP NR_CPUS=2048 NUMA PowerNV
  ...
  CPU: 68 PID: 96584 Comm: cat Kdump: loaded Tainted: GW
  NIP:  c029d800 LR: c029dac4 CTR: c01e6ad0
  REGS: c0002017fae8ba10 TRAP: 0300   Tainted: GW
  MSR:  90009033   CR: 28022422  XER: 2004
  CFAR: c029d90c DAR:  DSISR: 4000 IRQMASK: 0
  ...
  NIP [c029d800] t_probe_next+0x60/0x180
  LR [c029dac4] t_mod_start+0x1a4/0x1f0
  Call Trace:
  [c0002017fae8bc90] [c0cdbc40] _cond_resched+0x10/0xb0 (unreliable)
  [c0002017fae8bce0] [c02a15b0] t_start+0xf0/0x1c0
  [c0002017fae8bd30] [c04ec2b4] seq_read+0x184/0x640
  [c0002017fae8bdd0] [c04a57bc] sys_read+0x10c/0x300
  [c0002017fae8be30] [c000b388] system_call+0x5c/0x70

The test (ftrace_set_ftrace_filter.sh) is part of ftrace stress tests
and the crash happens when the test does 'cat
$TRACING_PATH/set_ftrace_filter'.

The address points to the second line below, in t_probe_next(), where
filter_hash is dereferenced:
  hash = iter->probe->ops.func_hash->filter_hash;
  size = 1 << hash->size_bits;

This happens due to a race with register_ftrace_function_probe(). A new
ftrace_func_probe is created and added into the func_probes list in
trace_array under ftrace_lock. However, before initializing the filter,
we drop ftrace_lock, and re-acquire it after acquiring regex_lock. If
another process is trying to read set_ftrace_filter, it will be able to
acquire ftrace_lock during this window and it will end up seeing a NULL
filter_hash.

Fix this by just checking for a NULL filter_hash in t_probe_next(). If
the filter_hash is NULL, then this probe is just being added and we can
simply return from here.

Signed-off-by: Naveen N. Rao 
---
 kernel/trace/ftrace.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 7b037295a1f1..0791eafb693d 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3093,6 +3093,10 @@ t_probe_next(struct seq_file *m, loff_t *pos)
hnd = &iter->probe_entry->hlist;
 
hash = iter->probe->ops.func_hash->filter_hash;
+
+   if (!hash)
+   return NULL;
+
size = 1 << hash->size_bits;
 
  retry:
-- 
2.22.0

[PATCH 0/2] ftrace: two fixes with func_probes handling

2019-07-04 Thread Naveen N. Rao

Two patches addressing bugs in ftrace function probe handling. The first 
patch addresses a NULL pointer dereference reported by LTP tests, while 
the second one is a trivial patch to address a missing check for return 
value, found by code inspection.

- Naveen


Naveen N. Rao (2):
  ftrace: Fix NULL pointer dereference in t_probe_next()
  ftrace: Check for successful allocation of hash

 kernel/trace/ftrace.c | 9 +
 1 file changed, 9 insertions(+)

-- 
2.22.0

Re: [PATCH v2 4/7] powerpc/ftrace: Additionally nop out the preceding mflr with -mprofile-kernel

2019-07-01 Thread Naveen N. Rao

Steven Rostedt wrote:

On Thu, 27 Jun 2019 20:58:20 +0530
"Naveen N. Rao"  wrote:

> But interesting, I don't see a synchronize_rcu_tasks() call
> there.  

We felt we don't need it in this case. We patch the branch to ftrace 
with a nop first. Other cpus should see that first. But, now that I 
think about it, should we add a memory barrier to ensure the writes get 
ordered properly?

Do you send an ipi to the other CPUs. I would just to be safe.

We are handling this through ftrace_replace_code() and 
__ftrace_make_call_prep() below. For FTRACE_UPDATE_MAKE_CALL, we patch 
in the mflr, followed by smp_call_function(isync) and 
synchronize_rcu_tasks() before we proceed to patch the branch to ftrace.

I don't see any other scenario where we end up in 
__ftrace_make_nop_kernel() without going through ftrace_replace_code().  
For kernel modules, this can happen during module load/init and so, I 
patch out both instructions in __ftrace_make_call() above without any 
synchronization.

Am I missing anything?

No, I think I got confused ;-), it's the patch out that I was worried
about, but when I was going through the scenario, I somehow turned it
into the patching in (which I already audited :-p). I was going to
reply with just the top part of this email, but then the confusion
started :-/

OK, yes, patching out should be fine, and you already covered the
patching in. Sorry for the noise.

Just to confirm and totally remove the confusion, the patch does:

:
mflrr0 <-- preempt here
bl  _mcount

:
mflrr0
nop

And this is fine regardless.

OK, Reviewed-by: Steven Rostedt (VMware) 

Thanks for confirming! We do need an IPI to be sure, as you pointed out 
above. I will have the patching out take the same path to simplify 
things.

- Naveen

Re: [PATCH v2 4/7] powerpc/ftrace: Additionally nop out the preceding mflr with -mprofile-kernel

2019-06-27 Thread Naveen N. Rao


Hi Steven,
Thanks for the review!

Steven Rostedt wrote:

On Thu, 27 Jun 2019 16:53:52 +0530
"Naveen N. Rao"  wrote:


With -mprofile-kernel, gcc emits 'mflr r0', followed by 'bl _mcount' to
enable function tracing and profiling. So far, with dynamic ftrace, we
used to only patch out the branch to _mcount(). However, mflr is
executed by the branch unit that can only execute one per cycle on
POWER9 and shared with branches, so it would be nice to avoid it where
possible.

We cannot simply nop out the mflr either. When enabling function
tracing, there can be a race if tracing is enabled when some thread was
interrupted after executing a nop'ed out mflr. In this case, the thread
would execute the now-patched-in branch to _mcount() without having
executed the preceding mflr.

To solve this, we now enable function tracing in 2 steps: patch in the
mflr instruction, use 'smp_call_function(isync);
synchronize_rcu_tasks()' to ensure all existing threads make progress,
and then patch in the branch to _mcount(). We override
ftrace_replace_code() with a powerpc64 variant for this purpose.


You may want to explain that you do the reverse when patching it out.
That is, patch out the "bl _mcount" into a nop and then patch out the
"mflr r0".


Sure. I think we can add:
"When disabling function tracing, we can nop out the two instructions 
without need for any synchronization in between, as long as we nop out 
the branch to ftrace first. The lone 'mflr r0' is harmless. Finally, 
with FTRACE_UPDATE_MODIFY_CALL, no changes are needed since we are 
simply changing where the branch to ftrace goes."



But interesting, I don't see a synchronize_rcu_tasks() call
there.


We felt we don't need it in this case. We patch the branch to ftrace 
with a nop first. Other cpus should see that first. But, now that I 
think about it, should we add a memory barrier to ensure the writes get 
ordered properly?







Suggested-by: Nicholas Piggin 
Reviewed-by: Nicholas Piggin 
Signed-off-by: Naveen N. Rao 
---
 arch/powerpc/kernel/trace/ftrace.c | 258 ++---
 1 file changed, 236 insertions(+), 22 deletions(-)

diff --git a/arch/powerpc/kernel/trace/ftrace.c 
b/arch/powerpc/kernel/trace/ftrace.c
index 517662a56bdc..86c2b50dcaa9 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -125,7 +125,7 @@ __ftrace_make_nop(struct module *mod,
 {
unsigned long entry, ptr, tramp;
unsigned long ip = rec->ip;
-   unsigned int op, pop;
+   unsigned int op;
 
 	/* read where this goes */

if (probe_kernel_read(&op, (void *)ip, sizeof(int))) {
@@ -160,8 +160,6 @@ __ftrace_make_nop(struct module *mod,
 
 #ifdef CONFIG_MPROFILE_KERNEL

/* When using -mkernel_profile there is no load to jump over */
-   pop = PPC_INST_NOP;
-
if (probe_kernel_read(&op, (void *)(ip - 4), 4)) {
pr_err("Fetching instruction at %lx failed.\n", ip - 4);
return -EFAULT;
@@ -169,26 +167,23 @@ __ftrace_make_nop(struct module *mod,
 
 	/* We expect either a mflr r0, or a std r0, LRSAVE(r1) */

if (op != PPC_INST_MFLR && op != PPC_INST_STD_LR) {
-   pr_err("Unexpected instruction %08x around bl _mcount\n", op);
+   pr_err("Unexpected instruction %08x before bl _mcount\n", op);
return -EINVAL;
}
-#else
-   /*
-* Our original call site looks like:
-*
-* bl 
-* ld r2,XX(r1)
-*
-* Milton Miller pointed out that we can not simply nop the branch.
-* If a task was preempted when calling a trace function, the nops
-* will remove the way to restore the TOC in r2 and the r2 TOC will
-* get corrupted.
-*
-* Use a b +8 to jump over the load.
-*/
 
-	pop = PPC_INST_BRANCH | 8;	/* b +8 */

+   /* We should patch out the bl to _mcount first */
+   if (patch_instruction((unsigned int *)ip, PPC_INST_NOP)) {
+   pr_err("Patching NOP failed.\n");
+   return -EPERM;
+   }
 
+	/* then, nop out the preceding 'mflr r0' as an optimization */

+   if (op == PPC_INST_MFLR &&
+   patch_instruction((unsigned int *)(ip - 4), PPC_INST_NOP)) {
+   pr_err("Patching NOP failed.\n");
+   return -EPERM;
+   }
+#else
/*
 * Check what is in the next instruction. We can see ld r2,40(r1), but
 * on first pass after boot we will see mflr r0.
@@ -202,12 +197,25 @@ __ftrace_make_nop(struct module *mod,
pr_err("Expected %08x found %08x\n", PPC_INST_LD_TOC, op);
return -EINVAL;
}
-#endif /* CONFIG_MPROFILE_KERNEL */
 
-	if (patch_instruction((unsigned int *)ip, pop)) {

+   /*
+* Our

Re: [PATCH v2 4/7] powerpc/ftrace: Additionally nop out the preceding mflr with -mprofile-kernel

2019-06-27 Thread Naveen N. Rao


Naveen N. Rao wrote:

With -mprofile-kernel, gcc emits 'mflr r0', followed by 'bl _mcount' to
enable function tracing and profiling. So far, with dynamic ftrace, we
used to only patch out the branch to _mcount(). However, mflr is
executed by the branch unit that can only execute one per cycle on
POWER9 and shared with branches, so it would be nice to avoid it where
possible.

We cannot simply nop out the mflr either. When enabling function
tracing, there can be a race if tracing is enabled when some thread was
interrupted after executing a nop'ed out mflr. In this case, the thread
would execute the now-patched-in branch to _mcount() without having
executed the preceding mflr.

To solve this, we now enable function tracing in 2 steps: patch in the
mflr instruction, use 'smp_call_function(isync);
synchronize_rcu_tasks()' to ensure all existing threads make progress,
and then patch in the branch to _mcount(). We override
ftrace_replace_code() with a powerpc64 variant for this purpose.

Suggested-by: Nicholas Piggin 
Reviewed-by: Nicholas Piggin 
Signed-off-by: Naveen N. Rao 
---
 arch/powerpc/kernel/trace/ftrace.c | 258 ++---
 1 file changed, 236 insertions(+), 22 deletions(-)



I missed adding a comment here to explain the changes. As discussed in 
the previous series, I think we are ok with this patch from a CMODX 
perspective. For smp_call_function(), I decided to have it included in 
this patch since we know that we need it here for sure. I am not 
entirely sure we want to do that in patch_instruction() since ftrace 
doesn't seem to need it elsewhere. As Nick Piggin pointed out, we may 
want to have users of patch_instruction() (kprobes) add the necessary 
synchronization.



Thanks,
Naveen

Re: [PATCH v2 2/7] x86/ftrace: Fix use of flags in ftrace_replace_code()

2019-06-27 Thread Naveen N. Rao


Steven Rostedt wrote:

On Thu, 27 Jun 2019 16:53:50 +0530
"Naveen N. Rao"  wrote:


In commit a0572f687fb3c ("ftrace: Allow ftrace_replace_code() to be
schedulable), the generic ftrace_replace_code() function was modified to
accept a flags argument in place of a single 'enable' flag. However, the
x86 version of this function was not updated. Fix the same.

Fixes: a0572f687fb3c ("ftrace: Allow ftrace_replace_code() to be schedulable")


I don't mind this change, but it's not a bug, and I'm not sure it
should have the fixes tag. The reason being, the
FTRACE_MODIFY_ENABLE_FL is only set when ftrace is called by with the
command flag FTRACE_MAY_SLEEP, which is never done on x86.


I guess you meant to say that *FTRACE_MODIFY_MAY_SLEEP_FL* is only set 
with FTRACE_MAY_SLEEP.




That said, I'm fine with the change as it makes it more robust, but by
adding the fixes tag, you're going to get this into all the stable
code, and I'm not sure that's really necessary.


Agreed. Thanks for pointing this out. We can drop this patch from this 
series and I will re-post this as a simpler cleanup later on.



Thanks,
Naveen

Re: [PATCH v2 2/7] x86/ftrace: Fix use of flags in ftrace_replace_code()

2019-06-27 Thread Naveen N. Rao


Naveen N. Rao wrote:

In commit a0572f687fb3c ("ftrace: Allow ftrace_replace_code() to be
schedulable), the generic ftrace_replace_code() function was modified to
accept a flags argument in place of a single 'enable' flag. However, the
x86 version of this function was not updated. Fix the same.

Fixes: a0572f687fb3c ("ftrace: Allow ftrace_replace_code() to be schedulable")
Signed-off-by: Naveen N. Rao 
---
 arch/x86/kernel/ftrace.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)


If the rest of this series is ok, and if Ingo and Steven are ok to have 
this series go through the powerpc tree, then I can re-post this 
particular patch for x86 after -rc1.


Thanks,
Naveen

[PATCH v2 6/7] kprobes/ftrace: Use ftrace_location() when [dis]arming probes

2019-06-27 Thread Naveen N. Rao

Ftrace location could include more than a single instruction in case of
some architectures (powerpc64, for now). In this case, kprobe is
permitted on any of those instructions, and uses ftrace infrastructure
for functioning.

However, [dis]arm_kprobe_ftrace() uses the kprobe address when setting
up ftrace filter IP. This won't work if the address points to any
instruction apart from the one that has a branch to _mcount(). To
resolve this, have [dis]arm_kprobe_ftrace() use ftrace_function() to
identify the filter IP.

Acked-by: Masami Hiramatsu 
Signed-off-by: Naveen N. Rao 
---
 kernel/kprobes.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 445337c107e0..282ee704e2d8 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -978,10 +978,10 @@ static int prepare_kprobe(struct kprobe *p)
 /* Caller must lock kprobe_mutex */
 static int arm_kprobe_ftrace(struct kprobe *p)
 {
+   unsigned long ftrace_ip = ftrace_location((unsigned long)p->addr);
int ret = 0;
 
-   ret = ftrace_set_filter_ip(&kprobe_ftrace_ops,
-  (unsigned long)p->addr, 0, 0);
+   ret = ftrace_set_filter_ip(&kprobe_ftrace_ops, ftrace_ip, 0, 0);
if (ret) {
pr_debug("Failed to arm kprobe-ftrace at %pS (%d)\n",
 p->addr, ret);
@@ -1005,13 +1005,14 @@ static int arm_kprobe_ftrace(struct kprobe *p)
 * non-empty filter_hash for IPMODIFY ops, we're safe from an accidental
 * empty filter_hash which would undesirably trace all functions.
 */
-   ftrace_set_filter_ip(&kprobe_ftrace_ops, (unsigned long)p->addr, 1, 0);
+   ftrace_set_filter_ip(&kprobe_ftrace_ops, ftrace_ip, 1, 0);
return ret;
 }
 
 /* Caller must lock kprobe_mutex */
 static int disarm_kprobe_ftrace(struct kprobe *p)
 {
+   unsigned long ftrace_ip = ftrace_location((unsigned long)p->addr);
int ret = 0;
 
if (kprobe_ftrace_enabled == 1) {
@@ -1022,8 +1023,7 @@ static int disarm_kprobe_ftrace(struct kprobe *p)
 
kprobe_ftrace_enabled--;
 
-   ret = ftrace_set_filter_ip(&kprobe_ftrace_ops,
-  (unsigned long)p->addr, 1, 0);
+   ret = ftrace_set_filter_ip(&kprobe_ftrace_ops, ftrace_ip, 1, 0);
WARN_ONCE(ret < 0, "Failed to disarm kprobe-ftrace at %pS (%d)\n",
  p->addr, ret);
return ret;
-- 
2.22.0

[PATCH v2 5/7] ftrace: Update ftrace_location() for powerpc -mprofile-kernel

2019-06-27 Thread Naveen N. Rao

Now that we are patching the preceding 'mflr r0' instruction with
-mprofile-kernel, we need to update ftrace_location() to recognise that
as being part of ftrace.

To do this, we introduce FTRACE_IP_EXTENSION to denote the length (in
bytes) of the mcount caller. By default, this is set to 0. For powerpc
with CONFIG_MPROFILE_KERNEL, we set this to MCOUNT_INSN_SIZE so that
this works if ftrace_location() is called with the address of the actual
ftrace entry call, or with the address of the preceding 'mflr r0'
instruction.

Note that we do not check if the preceding instruction is indeed the
'mflr r0'. Earlier -mprofile-kernel ABI included a 'std r0,stack'
instruction between the 'mflr r0' and the 'bl _mcount'. This is harmless
as the 'std r0,stack' instruction is inconsequential and is not relied
upon.

Suggested-by: Steven Rostedt (VMware) 
Signed-off-by: Naveen N. Rao 
---
 arch/powerpc/include/asm/ftrace.h | 8 
 include/linux/ftrace.h| 9 +
 kernel/trace/ftrace.c | 2 +-
 3 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/ftrace.h 
b/arch/powerpc/include/asm/ftrace.h
index 3dfb80b86561..510a8ac8ac8d 100644
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -61,6 +61,14 @@ struct dyn_arch_ftrace {
 
 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
 #define ARCH_SUPPORTS_FTRACE_OPS 1
+
+#ifdef CONFIG_MPROFILE_KERNEL
+/*
+ * We consider two instructions -- 'mflr r0', 'bl _mcount' -- to be part
+ * of ftrace with -mprofile-kernel
+ */
+#define FTRACE_IP_EXTENSIONMCOUNT_INSN_SIZE
+#endif
 #endif
 #endif /* CONFIG_FUNCTION_TRACER */
 
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index fa653a561da5..310b514438cb 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -29,6 +29,15 @@
 #define ARCH_SUPPORTS_FTRACE_OPS 0
 #endif
 
+/*
+ * This denotes the number of instructions (in bytes) that is used by the
+ * arch mcount caller. All instructions in this range will be owned by
+ * ftrace.
+ */
+#ifndef FTRACE_IP_EXTENSION
+#define FTRACE_IP_EXTENSION 0
+#endif
+
 /*
  * If the arch's mcount caller does not support all of ftrace's
  * features, then it must call an indirect function that
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 21d8e201ee80..308555925b81 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1575,7 +1575,7 @@ unsigned long ftrace_location_range(unsigned long start, 
unsigned long end)
  */
 unsigned long ftrace_location(unsigned long ip)
 {
-   return ftrace_location_range(ip, ip);
+   return ftrace_location_range(ip, ip + FTRACE_IP_EXTENSION);
 }
 
 /**
-- 
2.22.0

[PATCH v2 2/7] x86/ftrace: Fix use of flags in ftrace_replace_code()

2019-06-27 Thread Naveen N. Rao

In commit a0572f687fb3c ("ftrace: Allow ftrace_replace_code() to be
schedulable), the generic ftrace_replace_code() function was modified to
accept a flags argument in place of a single 'enable' flag. However, the
x86 version of this function was not updated. Fix the same.

Fixes: a0572f687fb3c ("ftrace: Allow ftrace_replace_code() to be schedulable")
Signed-off-by: Naveen N. Rao 
---
 arch/x86/kernel/ftrace.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 0927bb158ffc..f34005a17051 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -573,8 +573,9 @@ static void run_sync(void)
local_irq_disable();
 }
 
-void ftrace_replace_code(int enable)
+void ftrace_replace_code(int mod_flags)
 {
+   int enable = mod_flags & FTRACE_MODIFY_ENABLE_FL;
struct ftrace_rec_iter *iter;
struct dyn_ftrace *rec;
const char *report = "adding breakpoints";
-- 
2.22.0

[PATCH v2 0/7] powerpc/ftrace: Patch out -mprofile-kernel instructions

2019-06-27 Thread Naveen N. Rao

Changes since v1 
(https://patchwork.ozlabs.org/project/linuxppc-dev/list/?series=114556):
- Patches 1,2,3 and 6: No changes
- Patch 4: Add smp_call_function() to flush icache on all cpus after 
  patching in the 'mflr r0' instruction.
- Patch 5: Changes as per Steven Rostedt's suggestions.
- Patch 7: Changes as per Masami and Joe Perches.

--
On powerpc64, -mprofile-kernel results in two instructions being 
emitted: 'mflr r0' and 'bl _mcount'. So far, we were only nop'ing out 
the branch to _mcount(). This series implements an approach to also nop 
out the preceding mflr.


- Naveen


Naveen N. Rao (7):
  ftrace: Expose flags used for ftrace_replace_code()
  x86/ftrace: Fix use of flags in ftrace_replace_code()
  ftrace: Expose __ftrace_replace_code()
  powerpc/ftrace: Additionally nop out the preceding mflr with
-mprofile-kernel
  ftrace: Update ftrace_location() for powerpc -mprofile-kernel
  kprobes/ftrace: Use ftrace_location() when [dis]arming probes
  powerpc/kprobes: Allow probing on any ftrace address

 arch/powerpc/include/asm/ftrace.h|   8 +
 arch/powerpc/kernel/kprobes-ftrace.c |  32 +++-
 arch/powerpc/kernel/trace/ftrace.c   | 258 ---
 arch/x86/kernel/ftrace.c |   3 +-
 include/linux/ftrace.h   |  15 ++
 kernel/kprobes.c |  10 +-
 kernel/trace/ftrace.c|  15 +-
 7 files changed, 302 insertions(+), 39 deletions(-)

-- 
2.22.0

[PATCH v2 1/7] ftrace: Expose flags used for ftrace_replace_code()

2019-06-27 Thread Naveen N. Rao

Since ftrace_replace_code() is a __weak function and can be overridden,
we need to expose the flags that can be set. So, move the flags enum to
the header file.

Reviewed-by: Steven Rostedt (VMware) 
Signed-off-by: Naveen N. Rao 
---
 include/linux/ftrace.h | 5 +
 kernel/trace/ftrace.c  | 5 -
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 25e2995d4a4c..e97789c95c4e 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -162,6 +162,11 @@ enum {
FTRACE_OPS_FL_TRACE_ARRAY   = 1 << 15,
 };
 
+enum {
+   FTRACE_MODIFY_ENABLE_FL = (1 << 0),
+   FTRACE_MODIFY_MAY_SLEEP_FL  = (1 << 1),
+};
+
 #ifdef CONFIG_DYNAMIC_FTRACE
 /* The hash used to know what functions callbacks trace */
 struct ftrace_ops_hash {
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 38277af44f5c..5710a6b3edc1 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -75,11 +75,6 @@
 #define INIT_OPS_HASH(opsname)
 #endif
 
-enum {
-   FTRACE_MODIFY_ENABLE_FL = (1 << 0),
-   FTRACE_MODIFY_MAY_SLEEP_FL  = (1 << 1),
-};
-
 struct ftrace_ops ftrace_list_end __read_mostly = {
.func   = ftrace_stub,
.flags  = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_STUB,
-- 
2.22.0

[PATCH v2 7/7] powerpc/kprobes: Allow probing on any ftrace address

2019-06-27 Thread Naveen N. Rao

With KPROBES_ON_FTRACE, kprobe is allowed to be inserted on instructions
that branch to _mcount (referred to as ftrace location). With
-mprofile-kernel, we now include the preceding 'mflr r0' as being part
of the ftrace location.

However, by default, probing on an instruction that is not actually the
branch to _mcount() is prohibited, as that is considered to not be at an
instruction boundary. This is not the case on powerpc, so allow the same
by overriding arch_check_ftrace_location()

In addition, we update kprobe_ftrace_handler() to detect this scenarios
and to pass the proper nip to the pre and post probe handlers.

Signed-off-by: Naveen N. Rao 
---
 arch/powerpc/kernel/kprobes-ftrace.c | 32 +++-
 1 file changed, 31 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/kprobes-ftrace.c 
b/arch/powerpc/kernel/kprobes-ftrace.c
index 972cb28174b2..23c840748183 100644
--- a/arch/powerpc/kernel/kprobes-ftrace.c
+++ b/arch/powerpc/kernel/kprobes-ftrace.c
@@ -12,15 +12,35 @@
 #include 
 #include 
 
+/*
+ * With -mprofile-kernel, we patch two instructions -- the branch to _mcount
+ * as well as the preceding 'mflr r0'. Both these instructions are claimed
+ * by ftrace and we should allow probing on either instruction.
+ */
+int arch_check_ftrace_location(struct kprobe *p)
+{
+   if (ftrace_location((unsigned long)p->addr))
+   p->flags |= KPROBE_FLAG_FTRACE;
+   return 0;
+}
+
 /* Ftrace callback handler for kprobes */
 void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip,
   struct ftrace_ops *ops, struct pt_regs *regs)
 {
struct kprobe *p;
+   int mflr_kprobe = 0;
struct kprobe_ctlblk *kcb;
 
p = get_kprobe((kprobe_opcode_t *)nip);
-   if (unlikely(!p) || kprobe_disabled(p))
+   if (!p) {
+   p = get_kprobe((kprobe_opcode_t *)(nip - MCOUNT_INSN_SIZE));
+   if (unlikely(!p))
+   return;
+   mflr_kprobe = 1;
+   }
+
+   if (kprobe_disabled(p))
return;
 
kcb = get_kprobe_ctlblk();
@@ -33,6 +53,9 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long 
parent_nip,
 */
regs->nip -= MCOUNT_INSN_SIZE;
 
+   if (mflr_kprobe)
+   regs->nip -= MCOUNT_INSN_SIZE;
+
__this_cpu_write(current_kprobe, p);
kcb->kprobe_status = KPROBE_HIT_ACTIVE;
if (!p->pre_handler || !p->pre_handler(p, regs)) {
@@ -45,6 +68,8 @@ void kprobe_ftrace_handler(unsigned long nip, unsigned long 
parent_nip,
kcb->kprobe_status = KPROBE_HIT_SSDONE;
p->post_handler(p, regs, 0);
}
+   if (mflr_kprobe)
+   regs->nip += MCOUNT_INSN_SIZE;
}
/*
 * If pre_handler returns !0, it changes regs->nip. We have to
@@ -57,6 +82,11 @@ NOKPROBE_SYMBOL(kprobe_ftrace_handler);
 
 int arch_prepare_kprobe_ftrace(struct kprobe *p)
 {
+   if ((unsigned long)p->addr & 0x03) {
+   pr_err("Attempt to register kprobe at an unaligned address\n");
+   return -EILSEQ;
+   }
+
p->ainsn.insn = NULL;
p->ainsn.boostable = -1;
return 0;
-- 
2.22.0

[PATCH v2 4/7] powerpc/ftrace: Additionally nop out the preceding mflr with -mprofile-kernel

2019-06-27 Thread Naveen N. Rao

With -mprofile-kernel, gcc emits 'mflr r0', followed by 'bl _mcount' to
enable function tracing and profiling. So far, with dynamic ftrace, we
used to only patch out the branch to _mcount(). However, mflr is
executed by the branch unit that can only execute one per cycle on
POWER9 and shared with branches, so it would be nice to avoid it where
possible.

We cannot simply nop out the mflr either. When enabling function
tracing, there can be a race if tracing is enabled when some thread was
interrupted after executing a nop'ed out mflr. In this case, the thread
would execute the now-patched-in branch to _mcount() without having
executed the preceding mflr.

To solve this, we now enable function tracing in 2 steps: patch in the
mflr instruction, use 'smp_call_function(isync);
synchronize_rcu_tasks()' to ensure all existing threads make progress,
and then patch in the branch to _mcount(). We override
ftrace_replace_code() with a powerpc64 variant for this purpose.

Suggested-by: Nicholas Piggin 
Reviewed-by: Nicholas Piggin 
Signed-off-by: Naveen N. Rao 
---
 arch/powerpc/kernel/trace/ftrace.c | 258 ++---
 1 file changed, 236 insertions(+), 22 deletions(-)

diff --git a/arch/powerpc/kernel/trace/ftrace.c 
b/arch/powerpc/kernel/trace/ftrace.c
index 517662a56bdc..86c2b50dcaa9 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -125,7 +125,7 @@ __ftrace_make_nop(struct module *mod,
 {
unsigned long entry, ptr, tramp;
unsigned long ip = rec->ip;
-   unsigned int op, pop;
+   unsigned int op;
 
/* read where this goes */
if (probe_kernel_read(&op, (void *)ip, sizeof(int))) {
@@ -160,8 +160,6 @@ __ftrace_make_nop(struct module *mod,
 
 #ifdef CONFIG_MPROFILE_KERNEL
/* When using -mkernel_profile there is no load to jump over */
-   pop = PPC_INST_NOP;
-
if (probe_kernel_read(&op, (void *)(ip - 4), 4)) {
pr_err("Fetching instruction at %lx failed.\n", ip - 4);
return -EFAULT;
@@ -169,26 +167,23 @@ __ftrace_make_nop(struct module *mod,
 
/* We expect either a mflr r0, or a std r0, LRSAVE(r1) */
if (op != PPC_INST_MFLR && op != PPC_INST_STD_LR) {
-   pr_err("Unexpected instruction %08x around bl _mcount\n", op);
+   pr_err("Unexpected instruction %08x before bl _mcount\n", op);
return -EINVAL;
}
-#else
-   /*
-* Our original call site looks like:
-*
-* bl 
-* ld r2,XX(r1)
-*
-* Milton Miller pointed out that we can not simply nop the branch.
-* If a task was preempted when calling a trace function, the nops
-* will remove the way to restore the TOC in r2 and the r2 TOC will
-* get corrupted.
-*
-* Use a b +8 to jump over the load.
-*/
 
-   pop = PPC_INST_BRANCH | 8;  /* b +8 */
+   /* We should patch out the bl to _mcount first */
+   if (patch_instruction((unsigned int *)ip, PPC_INST_NOP)) {
+   pr_err("Patching NOP failed.\n");
+   return -EPERM;
+   }
 
+   /* then, nop out the preceding 'mflr r0' as an optimization */
+   if (op == PPC_INST_MFLR &&
+   patch_instruction((unsigned int *)(ip - 4), PPC_INST_NOP)) {
+   pr_err("Patching NOP failed.\n");
+   return -EPERM;
+   }
+#else
/*
 * Check what is in the next instruction. We can see ld r2,40(r1), but
 * on first pass after boot we will see mflr r0.
@@ -202,12 +197,25 @@ __ftrace_make_nop(struct module *mod,
pr_err("Expected %08x found %08x\n", PPC_INST_LD_TOC, op);
return -EINVAL;
}
-#endif /* CONFIG_MPROFILE_KERNEL */
 
-   if (patch_instruction((unsigned int *)ip, pop)) {
+   /*
+* Our original call site looks like:
+*
+* bl 
+* ld r2,XX(r1)
+*
+* Milton Miller pointed out that we can not simply nop the branch.
+* If a task was preempted when calling a trace function, the nops
+* will remove the way to restore the TOC in r2 and the r2 TOC will
+* get corrupted.
+*
+* Use a b +8 to jump over the load.
+*/
+   if (patch_instruction((unsigned int *)ip, PPC_INST_BRANCH | 8)) {
pr_err("Patching NOP failed.\n");
return -EPERM;
}
+#endif /* CONFIG_MPROFILE_KERNEL */
 
return 0;
 }
@@ -421,6 +429,26 @@ static int __ftrace_make_nop_kernel(struct dyn_ftrace 
*rec, unsigned long addr)
return -EPERM;
}
 
+#ifdef CONFIG_MPROFILE_KERNEL
+   /* Nop out the preceding 'mflr r0' as an optimization */
+   if (probe_kernel_read(&op, (void *)(ip - 4), 4)) {
+

[PATCH v2 3/7] ftrace: Expose __ftrace_replace_code()

2019-06-27 Thread Naveen N. Rao

While over-riding ftrace_replace_code(), we still want to reuse the
existing __ftrace_replace_code() function. Rename the function and
make it available for other kernel code.

Signed-off-by: Naveen N. Rao 
---
 include/linux/ftrace.h | 1 +
 kernel/trace/ftrace.c  | 8 
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index e97789c95c4e..fa653a561da5 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -456,6 +456,7 @@ ftrace_set_early_filter(struct ftrace_ops *ops, char *buf, 
int enable);
 /* defined in arch */
 extern int ftrace_ip_converted(unsigned long ip);
 extern int ftrace_dyn_arch_init(void);
+extern int ftrace_replace_code_rec(struct dyn_ftrace *rec, int enable);
 extern void ftrace_replace_code(int enable);
 extern int ftrace_update_ftrace_func(ftrace_func_t func);
 extern void ftrace_caller(void);
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 5710a6b3edc1..21d8e201ee80 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -2351,8 +2351,8 @@ unsigned long ftrace_get_addr_curr(struct dyn_ftrace *rec)
return (unsigned long)FTRACE_ADDR;
 }
 
-static int
-__ftrace_replace_code(struct dyn_ftrace *rec, int enable)
+int
+ftrace_replace_code_rec(struct dyn_ftrace *rec, int enable)
 {
unsigned long ftrace_old_addr;
unsigned long ftrace_addr;
@@ -2403,7 +2403,7 @@ void __weak ftrace_replace_code(int mod_flags)
if (rec->flags & FTRACE_FL_DISABLED)
continue;
 
-   failed = __ftrace_replace_code(rec, enable);
+   failed = ftrace_replace_code_rec(rec, enable);
if (failed) {
ftrace_bug(failed, rec);
/* Stop processing */
@@ -5827,7 +5827,7 @@ void ftrace_module_enable(struct module *mod)
rec->flags = cnt;
 
if (ftrace_start_up && cnt) {
-   int failed = __ftrace_replace_code(rec, 1);
+   int failed = ftrace_replace_code_rec(rec, 1);
if (failed) {
ftrace_bug(failed, rec);
goto out_loop;
-- 
2.22.0

[PATCH] recordmcount: Fix spurious mcount entries on powerpc

2019-06-26 Thread Naveen N. Rao

The recent change enabling HAVE_C_RECORDMCOUNT on powerpc started
showing the following issue:

  # modprobe kprobe_example
   ftrace-powerpc: Not expected bl: opcode is 3c4c0001
   WARNING: CPU: 0 PID: 227 at kernel/trace/ftrace.c:2001 ftrace_bug+0x90/0x318
   Modules linked in:
   CPU: 0 PID: 227 Comm: modprobe Not tainted 5.2.0-rc6-00678-g1c329100b942 #2
   NIP:  c0264318 LR: c025d694 CTR: c0f5cd30
   REGS: c1f2b7b0 TRAP: 0700   Not tainted  
(5.2.0-rc6-00678-g1c329100b942)
   MSR:  90010282b033   CR: 
28228222  XER: 
   CFAR: c02642fc IRQMASK: 0
   
   NIP [c0264318] ftrace_bug+0x90/0x318
   LR [c025d694] ftrace_process_locs+0x4f4/0x5e0
   Call Trace:
   [c1f2ba40] [0004] 0x4 (unreliable)
   [c1f2bad0] [c025d694] ftrace_process_locs+0x4f4/0x5e0
   [c1f2bb90] [c020ff10] load_module+0x25b0/0x30c0
   [c1f2bd00] [c0210cb0] sys_finit_module+0xc0/0x130
   [c1f2be20] [c000bda4] system_call+0x5c/0x70
   Instruction dump:
   419e0018 2f83 419e00bc 2f83ffea 409e00cc 481c 0fe0 3c62ff96
   3901 3940 386386d0 48c4 <0fe0> 3ce20003 3901 3c62ff96
   ---[ end trace 4c438d5cebf78381 ]---
   ftrace failed to modify
   [] 0xc008012a0008
actual:   01:00:4c:3c
   Initializing ftrace call sites
   ftrace record flags: 200
(0)
expected tramp: c006af4c

Looking at the relocation records in __mcount_loc showed a few spurious
entries:
  RELOCATION RECORDS FOR [__mcount_loc]:
  OFFSET   TYPE  VALUE
   R_PPC64_ADDR64.text.unlikely+0x0008
  0008 R_PPC64_ADDR64.text.unlikely+0x0014
  0010 R_PPC64_ADDR64.text.unlikely+0x0060
  0018 R_PPC64_ADDR64.text.unlikely+0x00b4
  0020 R_PPC64_ADDR64.init.text+0x0008
  0028 R_PPC64_ADDR64.init.text+0x0014

The first entry in each section is incorrect. Looking at the relocation
records, the spurious entries correspond to the R_PPC64_ENTRY records:
  RELOCATION RECORDS FOR [.text.unlikely]:
  OFFSET   TYPE  VALUE
   R_PPC64_REL64 .TOC.-0x0008
  0008 R_PPC64_ENTRY *ABS*
  0014 R_PPC64_REL24 _mcount
  

The problem is that we are not validating the return value from
get_mcountsym() in sift_rel_mcount(). With this entry, mcountsym is 0,
but Elf_r_sym(relp) also ends up being 0. Fix this by ensuring mcountsym
is valid before processing the entry.

Fixes: c7d64b560ce80 ("powerpc/ftrace: Enable C Version of recordmcount")
Signed-off-by: Naveen N. Rao 
---
 scripts/recordmcount.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/scripts/recordmcount.h b/scripts/recordmcount.h
index 13c5e6c8829c..47fca2c69a73 100644
--- a/scripts/recordmcount.h
+++ b/scripts/recordmcount.h
@@ -325,7 +325,8 @@ static uint_t *sift_rel_mcount(uint_t *mlocp,
if (!mcountsym)
mcountsym = get_mcountsym(sym0, relp, str0);
 
-   if (mcountsym == Elf_r_sym(relp) && !is_fake_mcount(relp)) {
+   if (mcountsym && mcountsym == Elf_r_sym(relp) &&
+   !is_fake_mcount(relp)) {
uint_t const addend =
_w(_w(relp->r_offset) - recval + mcount_adjust);
mrelp->r_offset = _w(offbase
-- 
2.22.0

Re: [PATCH 7/7] powerpc/kprobes: Allow probing on any ftrace address

2019-06-26 Thread Naveen N. Rao


Masami Hiramatsu wrote:

On Tue, 18 Jun 2019 20:17:06 +0530
"Naveen N. Rao"  wrote:


With KPROBES_ON_FTRACE, kprobe is allowed to be inserted on instructions
that branch to _mcount (referred to as ftrace location). With
-mprofile-kernel, we now include the preceding 'mflr r0' as being part
of the ftrace location.

However, by default, probing on an instruction that is not actually the
branch to _mcount() is prohibited, as that is considered to not be at an
instruction boundary. This is not the case on powerpc, so allow the same
by overriding arch_check_ftrace_location()

In addition, we update kprobe_ftrace_handler() to detect this scenarios
and to pass the proper nip to the pre and post probe handlers.

Signed-off-by: Naveen N. Rao 
---
 arch/powerpc/kernel/kprobes-ftrace.c | 30 
 1 file changed, 30 insertions(+)

diff --git a/arch/powerpc/kernel/kprobes-ftrace.c 
b/arch/powerpc/kernel/kprobes-ftrace.c
index 972cb28174b2..6a0bd3c16cb6 100644
--- a/arch/powerpc/kernel/kprobes-ftrace.c
+++ b/arch/powerpc/kernel/kprobes-ftrace.c
@@ -12,14 +12,34 @@
 #include 
 #include 
 
+/*

+ * With -mprofile-kernel, we patch two instructions -- the branch to _mcount
+ * as well as the preceding 'mflr r0'. Both these instructions are claimed
+ * by ftrace and we should allow probing on either instruction.
+ */
+int arch_check_ftrace_location(struct kprobe *p)
+{
+   if (ftrace_location((unsigned long)p->addr))
+   p->flags |= KPROBE_FLAG_FTRACE;
+   return 0;
+}
+
 /* Ftrace callback handler for kprobes */
 void kprobe_ftrace_handler(unsigned long nip, unsigned long parent_nip,
   struct ftrace_ops *ops, struct pt_regs *regs)
 {
struct kprobe *p;
+   int mflr_kprobe = 0;
struct kprobe_ctlblk *kcb;
 
 	p = get_kprobe((kprobe_opcode_t *)nip);

+   if (unlikely(!p)) {


Hmm, is this really unlikely? If we put a kprobe on the second instruction 
address,
we will see p == NULL always.


+   p = get_kprobe((kprobe_opcode_t *)(nip - MCOUNT_INSN_SIZE));
+   if (!p)


Here will be unlikely, because we can not find kprobe at both of nip and
nip - MCOUNT_INSN_SIZE.


+   return;
+   mflr_kprobe = 1;
+   }
+
if (unlikely(!p) || kprobe_disabled(p))


"unlikely(!p)" is not needed here.


...

Joe Perches wrote:

On Fri, 2019-06-21 at 23:50 +0900, Masami Hiramatsu wrote:

On Tue, 18 Jun 2019 20:17:06 +0530
"Naveen N. Rao"  wrote:


trivia:


> diff --git a/arch/powerpc/kernel/kprobes-ftrace.c 
b/arch/powerpc/kernel/kprobes-ftrace.c

[]

> @@ -57,6 +82,11 @@ NOKPROBE_SYMBOL(kprobe_ftrace_handler);
>  
>  int arch_prepare_kprobe_ftrace(struct kprobe *p)

>  {
> +  if ((unsigned long)p->addr & 0x03) {
> +  printk("Attempt to register kprobe at an unaligned address\n");


Please use the appropriate KERN_ or pr_



All good points. Thanks for the review.


- Naveen

1 2 3 4 5 6 7 8 >

1 - 100 of 723 matches

Mail list logo