Re: [PATCH] powerpc/ftrace: Handle large kernel configs

2022-01-06 Thread Naveen N. Rao

Hi Christophe,
Sorry for the delay, catching up with some of the earlier emails now..


Christophe Leroy wrote:

Hi Naveen,

Le 16/10/2018 à 22:25, Naveen N. Rao a écrit :
...


+/*
+ * If this is a compiler generated long_branch trampoline (essentially, a
+ * trampoline that has a branch to _mcount()), we re-write the branch to
+ * instead go to ftrace_[regs_]caller() and note down the location of this
+ * trampoline.
+ */
+static int setup_mcount_compiler_tramp(unsigned long tramp)
+{
+   int i, op;
+   unsigned long ptr;
+   static unsigned long ftrace_plt_tramps[NUM_FTRACE_TRAMPS];
+
+   /* Is this a known long jump tramp? */
+   for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
+   if (!ftrace_tramps[i])
+   break;
+   else if (ftrace_tramps[i] == tramp)
+   return 0;
+
+   /* Is this a known plt tramp? */
+   for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
+   if (!ftrace_plt_tramps[i])
+   break;
+   else if (ftrace_plt_tramps[i] == tramp)
+   return -1;


I don't understand how this is supposed to work.
ftrace_plt_tramps[] being a static table, it is set to 0s at startup.
So the above loop breaks at first round.

Then ftrace_plt_tramps[i] is never/nowhere set.

So I just see it as useless.

Am I missing something ?


No, that's correct. I had posted a cleanup of this a year back as part 
of the ftrace_direct enablement. I have updated that series and will be 
posting it out soon (though I should rebase it atop your livepatch 
series):

http://lkml.kernel.org/r/bdc3710137c4bda8393532a789558bed22507cfe.1606412433.git.naveen.n@linux.vnet.ibm.com


- Naveen


Re: [PATCH] powerpc/ftrace: Handle large kernel configs

2021-11-29 Thread Christophe Leroy

Hi Naveen,

Le 16/10/2018 à 22:25, Naveen N. Rao a écrit :

Currently, we expect to be able to reach ftrace_caller() from all
ftrace-enabled functions through a single relative branch. With large
kernel configs, we see functions outside of 32MB of ftrace_caller()
causing ftrace_init() to bail.

In such configurations, gcc/ld emits two types of trampolines for mcount():
1. A long_branch, which has a single branch to mcount() for functions that
are one hop away from mcount():
c19e8544 <00031b56.long_branch._mcount>:
c19e8544:   4a 69 3f ac b   c007c4f0 
<._mcount>

2. A plt_branch, for functions that are farther away from mcount():
c51f33f8 <0008ba04.plt_branch._mcount>:
c51f33f8:   3d 82 ff a4 addis   r12,r2,-92
c51f33fc:   e9 8c 04 20 ld  r12,1056(r12)
c51f3400:   7d 89 03 a6 mtctr   r12
c51f3404:   4e 80 04 20 bctr

We can reuse those trampolines for ftrace if we can have those
trampolines go to ftrace_caller() instead. However, with ABIv2, we
cannot depend on r2 being valid. As such, we use only the long_branch
trampolines by patching those to instead branch to ftrace_caller or
ftrace_regs_caller.

In addition, we add additional trampolines around .text and .init.text
to catch locations that are covered by the plt branches. This allows
ftrace to work with most large kernel configurations.

For now, we always patch the trampolines to go to ftrace_regs_caller,
which is slightly inefficient. This can be optimized further at a later
point.

Signed-off-by: Naveen N. Rao 
---
Since RFC:
- Change to patch long_branch to go to ftrace_caller, rather than
   patching mcount()
- Stop using plt_branch since it can't be relied on for ABIv2
- Add trampolines around .text and .init.text to catch remaining
   locations

- Naveen

  arch/powerpc/kernel/trace/ftrace.c| 261 +-
  arch/powerpc/kernel/trace/ftrace_64.S |  12 ++
  arch/powerpc/kernel/vmlinux.lds.S |  13 +-
  3 files changed, 281 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/kernel/trace/ftrace.c 
b/arch/powerpc/kernel/trace/ftrace.c
index 4bfbb54dee51..4bf051d3e21e 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c


...


+/*
+ * If this is a compiler generated long_branch trampoline (essentially, a
+ * trampoline that has a branch to _mcount()), we re-write the branch to
+ * instead go to ftrace_[regs_]caller() and note down the location of this
+ * trampoline.
+ */
+static int setup_mcount_compiler_tramp(unsigned long tramp)
+{
+   int i, op;
+   unsigned long ptr;
+   static unsigned long ftrace_plt_tramps[NUM_FTRACE_TRAMPS];
+
+   /* Is this a known long jump tramp? */
+   for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
+   if (!ftrace_tramps[i])
+   break;
+   else if (ftrace_tramps[i] == tramp)
+   return 0;
+
+   /* Is this a known plt tramp? */
+   for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
+   if (!ftrace_plt_tramps[i])
+   break;
+   else if (ftrace_plt_tramps[i] == tramp)
+   return -1;


I don't understand how this is supposed to work.
ftrace_plt_tramps[] being a static table, it is set to 0s at startup.
So the above loop breaks at first round.

Then ftrace_plt_tramps[i] is never/nowhere set.

So I just see it as useless.

Am I missing something ?

Thanks
Christophe


+
+   /* New trampoline -- read where this goes */
+   if (probe_kernel_read(, (void *)tramp, sizeof(int))) {
+   pr_debug("Fetching opcode failed.\n");
+   return -1;
+   }
+
+   /* Is this a 24 bit branch? */
+   if (!is_b_op(op)) {
+   pr_debug("Trampoline is not a long branch tramp.\n");
+   return -1;
+   }
+
+   /* lets find where the pointer goes */
+   ptr = find_bl_target(tramp, op);
+
+   if (ptr != ppc_global_function_entry((void *)_mcount)) {
+   pr_debug("Trampoline target %p is not _mcount\n", (void *)ptr);
+   return -1;
+   }
+
+   /* Let's re-write the tramp to go to ftrace_[regs_]caller */
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+   ptr = ppc_global_function_entry((void *)ftrace_regs_caller);
+#else
+   ptr = ppc_global_function_entry((void *)ftrace_caller);
+#endif
+   if (!create_branch((void *)tramp, ptr, 0)) {
+   pr_debug("%ps is not reachable from existing mcount tramp\n",
+   (void *)ptr);
+   return -1;
+   }
+
+   if (patch_branch((unsigned int *)tramp, ptr, 0)) {
+   pr_debug("REL24 out of range!\n");
+   return -1;
+   }
+
+   if (add_ftrace_tramp(tramp)) {
+   pr_debug("No tramp locations left\n");
+   return -1;
+   }

[PATCH] powerpc/ftrace: Handle large kernel configs

2018-10-16 Thread Naveen N. Rao
Currently, we expect to be able to reach ftrace_caller() from all
ftrace-enabled functions through a single relative branch. With large
kernel configs, we see functions outside of 32MB of ftrace_caller()
causing ftrace_init() to bail.

In such configurations, gcc/ld emits two types of trampolines for mcount():
1. A long_branch, which has a single branch to mcount() for functions that
   are one hop away from mcount():
c19e8544 <00031b56.long_branch._mcount>:
c19e8544:   4a 69 3f ac b   c007c4f0 
<._mcount>

2. A plt_branch, for functions that are farther away from mcount():
c51f33f8 <0008ba04.plt_branch._mcount>:
c51f33f8:   3d 82 ff a4 addis   r12,r2,-92
c51f33fc:   e9 8c 04 20 ld  r12,1056(r12)
c51f3400:   7d 89 03 a6 mtctr   r12
c51f3404:   4e 80 04 20 bctr

We can reuse those trampolines for ftrace if we can have those
trampolines go to ftrace_caller() instead. However, with ABIv2, we
cannot depend on r2 being valid. As such, we use only the long_branch
trampolines by patching those to instead branch to ftrace_caller or
ftrace_regs_caller.

In addition, we add additional trampolines around .text and .init.text
to catch locations that are covered by the plt branches. This allows
ftrace to work with most large kernel configurations.

For now, we always patch the trampolines to go to ftrace_regs_caller,
which is slightly inefficient. This can be optimized further at a later
point.

Signed-off-by: Naveen N. Rao 
---
Since RFC:
- Change to patch long_branch to go to ftrace_caller, rather than 
  patching mcount()
- Stop using plt_branch since it can't be relied on for ABIv2
- Add trampolines around .text and .init.text to catch remaining 
  locations

- Naveen

 arch/powerpc/kernel/trace/ftrace.c| 261 +-
 arch/powerpc/kernel/trace/ftrace_64.S |  12 ++
 arch/powerpc/kernel/vmlinux.lds.S |  13 +-
 3 files changed, 281 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/kernel/trace/ftrace.c 
b/arch/powerpc/kernel/trace/ftrace.c
index 4bfbb54dee51..4bf051d3e21e 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -30,6 +30,16 @@
 
 
 #ifdef CONFIG_DYNAMIC_FTRACE
+
+/*
+ * We generally only have a single long_branch tramp and at most 2 or 3 plt
+ * tramps generated. But, we don't use the plt tramps currently. We also allot
+ * 2 tramps after .text and .init.text. So, we only end up with around 3 usable
+ * tramps in total. Set aside 8 just to be sure.
+ */
+#defineNUM_FTRACE_TRAMPS   8
+static unsigned long ftrace_tramps[NUM_FTRACE_TRAMPS];
+
 static unsigned int
 ftrace_call_replace(unsigned long ip, unsigned long addr, int link)
 {
@@ -85,13 +95,16 @@ static int test_24bit_addr(unsigned long ip, unsigned long 
addr)
return create_branch((unsigned int *)ip, addr, 0);
 }
 
-#ifdef CONFIG_MODULES
-
 static int is_bl_op(unsigned int op)
 {
return (op & 0xfc03) == 0x4801;
 }
 
+static int is_b_op(unsigned int op)
+{
+   return (op & 0xfc03) == 0x4800;
+}
+
 static unsigned long find_bl_target(unsigned long ip, unsigned int op)
 {
static int offset;
@@ -104,6 +117,7 @@ static unsigned long find_bl_target(unsigned long ip, 
unsigned int op)
return ip + (long)offset;
 }
 
+#ifdef CONFIG_MODULES
 #ifdef CONFIG_PPC64
 static int
 __ftrace_make_nop(struct module *mod,
@@ -270,6 +284,146 @@ __ftrace_make_nop(struct module *mod,
 #endif /* PPC64 */
 #endif /* CONFIG_MODULES */
 
+static unsigned long find_ftrace_tramp(unsigned long ip)
+{
+   int i;
+
+   /*
+* We have the compiler generated long_branch tramps at the end
+* and we prefer those
+*/
+   for (i = NUM_FTRACE_TRAMPS - 1; i >= 0; i--)
+   if (!ftrace_tramps[i])
+   continue;
+   else if (create_branch((void *)ip, ftrace_tramps[i], 0))
+   return ftrace_tramps[i];
+
+   return 0;
+}
+
+static int add_ftrace_tramp(unsigned long tramp)
+{
+   int i;
+
+   for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
+   if (!ftrace_tramps[i]) {
+   ftrace_tramps[i] = tramp;
+   return 0;
+   }
+
+   return -1;
+}
+
+/*
+ * If this is a compiler generated long_branch trampoline (essentially, a
+ * trampoline that has a branch to _mcount()), we re-write the branch to
+ * instead go to ftrace_[regs_]caller() and note down the location of this
+ * trampoline.
+ */
+static int setup_mcount_compiler_tramp(unsigned long tramp)
+{
+   int i, op;
+   unsigned long ptr;
+   static unsigned long ftrace_plt_tramps[NUM_FTRACE_TRAMPS];
+
+   /* Is this a known long jump tramp? */
+   for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
+   if (!ftrace_tramps[i])
+   break;
+