Re: [PATCH 2/3] arch/powerpc : optprobes for powerpc core

2016-09-09 Thread Masami Hiramatsu
Hi Anju,

On Fri, 9 Sep 2016 16:19:41 +0530
Anju T Sudhakar  wrote:
> >> +void arch_unoptimize_kprobe(struct optimized_kprobe *op)
> >> +{
> >> +  arch_arm_kprobe(>kp);
> >> +}
> >> +
> >> +void arch_unoptimize_kprobes(struct list_head *oplist,
> >> +   struct list_head *done_list)
> >> +{
> >> +  struct optimized_kprobe *op;
> >> +  struct optimized_kprobe *tmp;
> >> +
> >> +  list_for_each_entry_safe(op, tmp, oplist, list) {
> >> +  arch_unoptimize_kprobe(op);
> >> +  list_move(>list, done_list);
> >> +  }
> >> +}
> >> +
> >> +int arch_within_optimized_kprobe(struct optimized_kprobe *op,
> >> +   unsigned long addr)
> >> +{
> >> +  return 0;
> > Here, please check the address range as same as arm32 optprobe 
> > implementation.
> >
> > e.g.
> >  return ((unsigned long)op->kp.addr <= addr &&
> >  (unsigned long)op->kp.addr + RELATIVEJUMP_SIZE > addr);
> >
> >
> > Thank you,
> 
> Do we really need this? The only case this check will succeed is if  
> kp.addr is not a multiple of 4, which is not a valid address at all  
> onPower. So should we again check here for that?

Yes, since that is exported function, which means it can be used from
other part, other usage (e.g. for debug reason someone wants to use it).
Please do not optimize the code only for current implementation, but
for generic use case. 

Thank  you,

-- 
Masami Hiramatsu 


Re: [PATCH 2/3] arch/powerpc : optprobes for powerpc core

2016-09-09 Thread Anju T Sudhakar

Hi Masami,


Thank you for reviewing the patch.


On Thursday 08 September 2016 10:17 PM, Masami Hiramatsu wrote:

On Wed,  7 Sep 2016 15:03:11 +0530
Anju T Sudhakar  wrote:


Instructions which can be emulated are suppliants for optimization.
Before optimization ensure that the address range between the detour
buffer allocated and the instruction being probed is within ± 32MB.

Signed-off-by: Anju T Sudhakar 
---
  arch/powerpc/include/asm/sstep.h |   1 +
  arch/powerpc/kernel/optprobes.c  | 329 +++
  arch/powerpc/lib/sstep.c |  21 +++
  3 files changed, 351 insertions(+)
  create mode 100644 arch/powerpc/kernel/optprobes.c

diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h
index d3a42cc..cd5f6ab 100644
--- a/arch/powerpc/include/asm/sstep.h
+++ b/arch/powerpc/include/asm/sstep.h
@@ -25,6 +25,7 @@ struct pt_regs;
  
  /* Emulate instructions that cause a transfer of control. */

  extern int emulate_step(struct pt_regs *regs, unsigned int instr);
+extern int optprobe_conditional_branch_check(unsigned int instr);
  
  enum instruction_type {

COMPUTE,/* arith/logical/CR op, etc. */
diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c
new file mode 100644
index 000..7983d07
--- /dev/null
+++ b/arch/powerpc/kernel/optprobes.c
@@ -0,0 +1,329 @@
+/*
+ * Code for Kernel probes Jump optimization.
+ *
+ * Copyright 2016, Anju T, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+DEFINE_INSN_CACHE_OPS(ppc_optinsn)
+
+#define TMPL_CALL_HDLR_IDX \
+   (optprobe_template_call_handler - optprobe_template_entry)
+#define TMPL_EMULATE_IDX   \
+   (optprobe_template_call_emulate - optprobe_template_entry)
+#define TMPL_RET_IDX   \
+   (optprobe_template_ret - optprobe_template_entry)
+#define TMPL_KP_IDX\
+   (optprobe_template_kp_addr - optprobe_template_entry)
+#define TMPL_OP1_IDX   \
+   (optprobe_template_op_address1 - optprobe_template_entry)
+#define TMPL_INSN_IDX  \
+   (optprobe_template_insn - optprobe_template_entry)
+#define TMPL_END_IDX   \
+   (optprobe_template_end - optprobe_template_entry)
+
+static bool insn_page_in_use;
+
+static void *__ppc_alloc_insn_page(void)
+{
+   if (insn_page_in_use)
+   return NULL;
+   insn_page_in_use = true;
+   return _slot;
+}
+
+static void __ppc_free_insn_page(void *page __maybe_unused)
+{
+   insn_page_in_use = false;
+}
+
+struct kprobe_insn_cache kprobe_ppc_optinsn_slots = {
+   .mutex = __MUTEX_INITIALIZER(kprobe_ppc_optinsn_slots.mutex),
+   .pages = LIST_HEAD_INIT(kprobe_ppc_optinsn_slots.pages),
+   /* insn_size initialized later */
+   .alloc = __ppc_alloc_insn_page,
+   .free = __ppc_free_insn_page,
+   .nr_garbage = 0,
+};
+
+kprobe_opcode_t *ppc_get_optinsn_slot(struct optimized_kprobe *op)
+{
+   /*
+* The insn slot is allocated from the reserved
+* area(ie _slot).We are not optimizing probes
+* at module_addr now.
+*/
+   if (is_kernel_addr((unsigned long)op->kp.addr))
+   return get_ppc_optinsn_slot();
+   return NULL;
+}
+
+static void ppc_free_optinsn_slot(struct optimized_kprobe *op)
+{
+   if (!op->optinsn.insn)
+   return;
+   if (is_kernel_addr((unsigned long)op->kp.addr))
+   free_ppc_optinsn_slot(op->optinsn.insn, 0);
+}
+
+static unsigned long can_optimize(struct kprobe *p)
+{
+   struct pt_regs *regs;
+   unsigned int instr;
+
+   /*
+* Not optimizing the kprobe placed by
+* kretprobe during boot time
+*/
+   if (p->addr == (kprobe_opcode_t *)_trampoline)
+   return 0;
+
+   regs = kmalloc(sizeof(*regs), GFP_KERNEL);
+   if (!regs)
+   return -ENOMEM;
+   memset(regs, 0, sizeof(struct pt_regs));
+   memcpy(regs, current_pt_regs(), sizeof(struct pt_regs));
+   regs->nip = (unsigned long)p->addr;
+   instr = *p->ainsn.insn;
+
+   /* Ensure the instruction can be emulated */
+   if (emulate_step(regs, instr) != 1)
+   return 0;
+   /* Conditional branches are not optimized */
+   if (optprobe_conditional_branch_check(instr) != 1)
+   return 0;
+   return regs->nip;

Could you free regs here? Or allocate it on stack.


yes. 'regs' can be freed here.



+}
+
+static void
+optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
+{
+   struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+   unsigned long flags;
+
+ 

Re: [PATCH 2/3] arch/powerpc : optprobes for powerpc core

2016-09-08 Thread Masami Hiramatsu
On Wed,  7 Sep 2016 15:03:11 +0530
Anju T Sudhakar  wrote:

> Instructions which can be emulated are suppliants for optimization.
> Before optimization ensure that the address range between the detour
> buffer allocated and the instruction being probed is within ± 32MB.
> 
> Signed-off-by: Anju T Sudhakar 
> ---
>  arch/powerpc/include/asm/sstep.h |   1 +
>  arch/powerpc/kernel/optprobes.c  | 329 
> +++
>  arch/powerpc/lib/sstep.c |  21 +++
>  3 files changed, 351 insertions(+)
>  create mode 100644 arch/powerpc/kernel/optprobes.c
> 
> diff --git a/arch/powerpc/include/asm/sstep.h 
> b/arch/powerpc/include/asm/sstep.h
> index d3a42cc..cd5f6ab 100644
> --- a/arch/powerpc/include/asm/sstep.h
> +++ b/arch/powerpc/include/asm/sstep.h
> @@ -25,6 +25,7 @@ struct pt_regs;
>  
>  /* Emulate instructions that cause a transfer of control. */
>  extern int emulate_step(struct pt_regs *regs, unsigned int instr);
> +extern int optprobe_conditional_branch_check(unsigned int instr);
>  
>  enum instruction_type {
>   COMPUTE,/* arith/logical/CR op, etc. */
> diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c
> new file mode 100644
> index 000..7983d07
> --- /dev/null
> +++ b/arch/powerpc/kernel/optprobes.c
> @@ -0,0 +1,329 @@
> +/*
> + * Code for Kernel probes Jump optimization.
> + *
> + * Copyright 2016, Anju T, IBM Corp.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version
> + * 2 of the License, or (at your option) any later version.
> + */
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +DEFINE_INSN_CACHE_OPS(ppc_optinsn)
> +
> +#define TMPL_CALL_HDLR_IDX   \
> + (optprobe_template_call_handler - optprobe_template_entry)
> +#define TMPL_EMULATE_IDX \
> + (optprobe_template_call_emulate - optprobe_template_entry)
> +#define TMPL_RET_IDX \
> + (optprobe_template_ret - optprobe_template_entry)
> +#define TMPL_KP_IDX  \
> + (optprobe_template_kp_addr - optprobe_template_entry)
> +#define TMPL_OP1_IDX \
> + (optprobe_template_op_address1 - optprobe_template_entry)
> +#define TMPL_INSN_IDX\
> + (optprobe_template_insn - optprobe_template_entry)
> +#define TMPL_END_IDX \
> + (optprobe_template_end - optprobe_template_entry)
> +
> +static bool insn_page_in_use;
> +
> +static void *__ppc_alloc_insn_page(void)
> +{
> + if (insn_page_in_use)
> + return NULL;
> + insn_page_in_use = true;
> + return _slot;
> +}
> +
> +static void __ppc_free_insn_page(void *page __maybe_unused)
> +{
> + insn_page_in_use = false;
> +}
> +
> +struct kprobe_insn_cache kprobe_ppc_optinsn_slots = {
> + .mutex = __MUTEX_INITIALIZER(kprobe_ppc_optinsn_slots.mutex),
> + .pages = LIST_HEAD_INIT(kprobe_ppc_optinsn_slots.pages),
> + /* insn_size initialized later */
> + .alloc = __ppc_alloc_insn_page,
> + .free = __ppc_free_insn_page,
> + .nr_garbage = 0,
> +};
> +
> +kprobe_opcode_t *ppc_get_optinsn_slot(struct optimized_kprobe *op)
> +{
> + /*
> +  * The insn slot is allocated from the reserved
> +  * area(ie _slot).We are not optimizing probes
> +  * at module_addr now.
> +  */
> + if (is_kernel_addr((unsigned long)op->kp.addr))
> + return get_ppc_optinsn_slot();
> + return NULL;
> +}
> +
> +static void ppc_free_optinsn_slot(struct optimized_kprobe *op)
> +{
> + if (!op->optinsn.insn)
> + return;
> + if (is_kernel_addr((unsigned long)op->kp.addr))
> + free_ppc_optinsn_slot(op->optinsn.insn, 0);
> +}
> +
> +static unsigned long can_optimize(struct kprobe *p)
> +{
> + struct pt_regs *regs;
> + unsigned int instr;
> +
> + /*
> +  * Not optimizing the kprobe placed by
> +  * kretprobe during boot time
> +  */
> + if (p->addr == (kprobe_opcode_t *)_trampoline)
> + return 0;
> +
> + regs = kmalloc(sizeof(*regs), GFP_KERNEL);
> + if (!regs)
> + return -ENOMEM;
> + memset(regs, 0, sizeof(struct pt_regs));
> + memcpy(regs, current_pt_regs(), sizeof(struct pt_regs));
> + regs->nip = (unsigned long)p->addr;
> + instr = *p->ainsn.insn;
> +
> + /* Ensure the instruction can be emulated */
> + if (emulate_step(regs, instr) != 1)
> + return 0;
> + /* Conditional branches are not optimized */
> + if (optprobe_conditional_branch_check(instr) != 1)
> + return 0;
> + return regs->nip;

Could you free regs here? Or allocate it on stack.

> +}
> +
> +static void
> +optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
> +{
> + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
> + unsigned 

[PATCH 2/3] arch/powerpc : optprobes for powerpc core

2016-09-07 Thread Anju T Sudhakar
Instructions which can be emulated are suppliants for optimization.
Before optimization ensure that the address range between the detour
buffer allocated and the instruction being probed is within ?? 32MB.

Signed-off-by: Anju T Sudhakar 
---
 arch/powerpc/include/asm/sstep.h |   1 +
 arch/powerpc/kernel/optprobes.c  | 329 +++
 arch/powerpc/lib/sstep.c |  21 +++
 3 files changed, 351 insertions(+)
 create mode 100644 arch/powerpc/kernel/optprobes.c

diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h
index d3a42cc..cd5f6ab 100644
--- a/arch/powerpc/include/asm/sstep.h
+++ b/arch/powerpc/include/asm/sstep.h
@@ -25,6 +25,7 @@ struct pt_regs;
 
 /* Emulate instructions that cause a transfer of control. */
 extern int emulate_step(struct pt_regs *regs, unsigned int instr);
+extern int optprobe_conditional_branch_check(unsigned int instr);
 
 enum instruction_type {
COMPUTE,/* arith/logical/CR op, etc. */
diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c
new file mode 100644
index 000..7983d07
--- /dev/null
+++ b/arch/powerpc/kernel/optprobes.c
@@ -0,0 +1,329 @@
+/*
+ * Code for Kernel probes Jump optimization.
+ *
+ * Copyright 2016, Anju T, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+DEFINE_INSN_CACHE_OPS(ppc_optinsn)
+
+#define TMPL_CALL_HDLR_IDX \
+   (optprobe_template_call_handler - optprobe_template_entry)
+#define TMPL_EMULATE_IDX   \
+   (optprobe_template_call_emulate - optprobe_template_entry)
+#define TMPL_RET_IDX   \
+   (optprobe_template_ret - optprobe_template_entry)
+#define TMPL_KP_IDX\
+   (optprobe_template_kp_addr - optprobe_template_entry)
+#define TMPL_OP1_IDX   \
+   (optprobe_template_op_address1 - optprobe_template_entry)
+#define TMPL_INSN_IDX  \
+   (optprobe_template_insn - optprobe_template_entry)
+#define TMPL_END_IDX   \
+   (optprobe_template_end - optprobe_template_entry)
+
+static bool insn_page_in_use;
+
+static void *__ppc_alloc_insn_page(void)
+{
+   if (insn_page_in_use)
+   return NULL;
+   insn_page_in_use = true;
+   return _slot;
+}
+
+static void __ppc_free_insn_page(void *page __maybe_unused)
+{
+   insn_page_in_use = false;
+}
+
+struct kprobe_insn_cache kprobe_ppc_optinsn_slots = {
+   .mutex = __MUTEX_INITIALIZER(kprobe_ppc_optinsn_slots.mutex),
+   .pages = LIST_HEAD_INIT(kprobe_ppc_optinsn_slots.pages),
+   /* insn_size initialized later */
+   .alloc = __ppc_alloc_insn_page,
+   .free = __ppc_free_insn_page,
+   .nr_garbage = 0,
+};
+
+kprobe_opcode_t *ppc_get_optinsn_slot(struct optimized_kprobe *op)
+{
+   /*
+* The insn slot is allocated from the reserved
+* area(ie _slot).We are not optimizing probes
+* at module_addr now.
+*/
+   if (is_kernel_addr((unsigned long)op->kp.addr))
+   return get_ppc_optinsn_slot();
+   return NULL;
+}
+
+static void ppc_free_optinsn_slot(struct optimized_kprobe *op)
+{
+   if (!op->optinsn.insn)
+   return;
+   if (is_kernel_addr((unsigned long)op->kp.addr))
+   free_ppc_optinsn_slot(op->optinsn.insn, 0);
+}
+
+static unsigned long can_optimize(struct kprobe *p)
+{
+   struct pt_regs *regs;
+   unsigned int instr;
+
+   /*
+* Not optimizing the kprobe placed by
+* kretprobe during boot time
+*/
+   if (p->addr == (kprobe_opcode_t *)_trampoline)
+   return 0;
+
+   regs = kmalloc(sizeof(*regs), GFP_KERNEL);
+   if (!regs)
+   return -ENOMEM;
+   memset(regs, 0, sizeof(struct pt_regs));
+   memcpy(regs, current_pt_regs(), sizeof(struct pt_regs));
+   regs->nip = (unsigned long)p->addr;
+   instr = *p->ainsn.insn;
+
+   /* Ensure the instruction can be emulated */
+   if (emulate_step(regs, instr) != 1)
+   return 0;
+   /* Conditional branches are not optimized */
+   if (optprobe_conditional_branch_check(instr) != 1)
+   return 0;
+   return regs->nip;
+}
+
+static void
+optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
+{
+   struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+   unsigned long flags;
+
+   local_irq_save(flags);
+
+   if (kprobe_running()) {
+   kprobes_inc_nmissed_count(>kp);
+   } else {
+   __this_cpu_write(current_kprobe, >kp);
+   kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+   opt_pre_handler(>kp, regs);
+   

Re: [RFC PATCH 2/3] arch/powerpc : optprobes for powerpc core

2016-05-20 Thread Masami Hiramatsu
On Thu, 19 May 2016 13:19:42 +0530
Anju T  wrote:

> >> +void arch_unoptimize_kprobes(struct list_head *oplist,
> >> +   struct list_head *done_list)
> >> +{
> >> +  struct optimized_kprobe *op;
> >> +  struct optimized_kprobe *tmp;
> >> +
> >> +  list_for_each_entry_safe(op, tmp, oplist, list) {
> >> +  arch_unoptimize_kprobe(op);
> >> +  list_move(>list, done_list);
> >> +  }
> >> +}
> >> +
> >> +int arch_within_optimized_kprobe(struct optimized_kprobe *op,
> >> +   unsigned long addr)
> >> +{
> > Please make sure addr != op->kp.addr and addr is aligned.
> 
> The only case this check will succeed is if kp.addr is not a multiple of 4, 
> which is not a valid address at all on
> Power.So should we again check here for that?

Ah, right. OK, so we may not need that.

Thank you,
-- 
Masami Hiramatsu 
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [RFC PATCH 2/3] arch/powerpc : optprobes for powerpc core

2016-05-19 Thread Anju T

Hi Masami,

 Thank you for reviewing the patch.

On Wednesday 18 May 2016 08:43 PM, Masami Hiramatsu wrote:

On Wed, 18 May 2016 02:09:37 +0530
Anju T  wrote:


Instruction slot for detour buffer is allocated from
the reserved area. For the time being 64KB is reserved
in memory for this purpose. ppc_get_optinsn_slot() and
ppc_free_optinsn_slot() are geared towards the allocation and freeing
of memory from this area.

Thank you for porting optprobe on ppc!!

I have some comments on this patch.


Signed-off-by: Anju T 
---
  arch/powerpc/kernel/optprobes.c | 463 
  1 file changed, 463 insertions(+)
  create mode 100644 arch/powerpc/kernel/optprobes.c

diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c
new file mode 100644
index 000..50a60c1
--- /dev/null
+++ b/arch/powerpc/kernel/optprobes.c
@@ -0,0 +1,463 @@
+/*
+ * Code for Kernel probes Jump optimization.
+ *
+ * Copyright 2016, Anju T, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/* Reserve an area to allocate slots for detour buffer */
+extern void  optprobe_trampoline_holder(void)
+{
+   asm volatile(".global optinsn_slot\n"
+   "optinsn_slot:\n"
+   ".space 65536");
+}

Would we better move this into optprobes_head.S?


Yes. Will do.

+
+#define SLOT_SIZE 65536
+#define TMPL_CALL_HDLR_IDX \
+   (optprobe_template_call_handler - optprobe_template_entry)
+#define TMPL_EMULATE_IDX   \
+   (optprobe_template_call_emulate - optprobe_template_entry)
+#define TMPL_RET_BRANCH_IDX\
+   (optprobe_template_ret_branch - optprobe_template_entry)
+#define TMPL_RET_IDX   \
+   (optprobe_template_ret - optprobe_template_entry)
+#define TMPL_OP1_IDX   \
+   (optprobe_template_op_address1 - optprobe_template_entry)
+#define TMPL_OP2_IDX   \
+   (optprobe_template_op_address2 - optprobe_template_entry)
+#define TMPL_INSN_IDX  \
+   (optprobe_template_insn - optprobe_template_entry)
+#define TMPL_END_IDX   \
+   (optprobe_template_end - optprobe_template_entry)
+
+struct kprobe_ppc_insn_page {
+   struct list_head list;
+   kprobe_opcode_t *insns; /* Page of instruction slots */
+   struct kprobe_insn_cache *cache;
+   int nused;
+   int ngarbage;
+   char slot_used[];
+};
+
+#define PPC_KPROBE_INSN_PAGE_SIZE(slots)   \
+   (offsetof(struct kprobe_ppc_insn_page, slot_used) + \
+   (sizeof(char) * (slots)))
+
+enum ppc_kprobe_slot_state {
+   SLOT_CLEAN = 0,
+   SLOT_DIRTY = 1,
+   SLOT_USED = 2,
+};
+
+static struct kprobe_insn_cache kprobe_ppc_optinsn_slots = {
+   .mutex = __MUTEX_INITIALIZER(kprobe_ppc_optinsn_slots.mutex),
+   .pages = LIST_HEAD_INIT(kprobe_ppc_optinsn_slots.pages),
+   /* .insn_size is initialized later */
+   .nr_garbage = 0,
+};
+
+static int ppc_slots_per_page(struct kprobe_insn_cache *c)
+{
+   /*
+* Here the #slots per page differs from x86 as we have
+* only 64KB reserved.
+*/
+   return SLOT_SIZE / (c->insn_size * sizeof(kprobe_opcode_t));
+}
+
+/* Return 1 if all garbages are collected, otherwise 0. */
+static int collect_one_slot(struct kprobe_ppc_insn_page *kip, int idx)
+{
+   kip->slot_used[idx] = SLOT_CLEAN;
+   kip->nused--;
+   return 0;
+}
+
+static int collect_garbage_slots(struct kprobe_insn_cache *c)
+{
+   struct kprobe_ppc_insn_page *kip, *next;
+
+   /* Ensure no-one is interrupted on the garbages */
+   synchronize_sched();
+
+   list_for_each_entry_safe(kip, next, >pages, list) {
+   int i;
+
+   if (kip->ngarbage == 0)
+   continue;
+   kip->ngarbage = 0;   /* we will collect all garbages */
+   for (i = 0; i < ppc_slots_per_page(c); i++) {
+   if (kip->slot_used[i] == SLOT_DIRTY &&
+   collect_one_slot(kip, i))
+   break;
+   }
+   }
+   c->nr_garbage = 0;
+   return 0;
+}
+
+kprobe_opcode_t  *__ppc_get_optinsn_slot(struct kprobe_insn_cache *c)
+{
+   struct kprobe_ppc_insn_page *kip;
+   kprobe_opcode_t *slot = NULL;
+
+   mutex_lock(>mutex);
+   list_for_each_entry(kip, >pages, list) {
+   if (kip->nused < ppc_slots_per_page(c)) {
+   int i;
+
+   for (i = 0; i < ppc_slots_per_page(c); i++) {
+   if (kip->slot_used[i] == SLOT_CLEAN) {
+   kip->slot_used[i] = 

Re: [RFC PATCH 2/3] arch/powerpc : optprobes for powerpc core

2016-05-18 Thread Masami Hiramatsu
On Wed, 18 May 2016 02:09:37 +0530
Anju T  wrote:

> Instruction slot for detour buffer is allocated from
> the reserved area. For the time being 64KB is reserved
> in memory for this purpose. ppc_get_optinsn_slot() and
> ppc_free_optinsn_slot() are geared towards the allocation and freeing
> of memory from this area.

Thank you for porting optprobe on ppc!!

I have some comments on this patch.

> 
> Signed-off-by: Anju T 
> ---
>  arch/powerpc/kernel/optprobes.c | 463 
> 
>  1 file changed, 463 insertions(+)
>  create mode 100644 arch/powerpc/kernel/optprobes.c
> 
> diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c
> new file mode 100644
> index 000..50a60c1
> --- /dev/null
> +++ b/arch/powerpc/kernel/optprobes.c
> @@ -0,0 +1,463 @@
> +/*
> + * Code for Kernel probes Jump optimization.
> + *
> + * Copyright 2016, Anju T, IBM Corp.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; either version
> + * 2 of the License, or (at your option) any later version.
> + */
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +/* Reserve an area to allocate slots for detour buffer */
> +extern void  optprobe_trampoline_holder(void)
> +{
> + asm volatile(".global optinsn_slot\n"
> + "optinsn_slot:\n"
> + ".space 65536");
> +}

Would we better move this into optprobes_head.S?

> +
> +#define SLOT_SIZE 65536
> +#define TMPL_CALL_HDLR_IDX   \
> + (optprobe_template_call_handler - optprobe_template_entry)
> +#define TMPL_EMULATE_IDX \
> + (optprobe_template_call_emulate - optprobe_template_entry)
> +#define TMPL_RET_BRANCH_IDX  \
> + (optprobe_template_ret_branch - optprobe_template_entry)
> +#define TMPL_RET_IDX \
> + (optprobe_template_ret - optprobe_template_entry)
> +#define TMPL_OP1_IDX \
> + (optprobe_template_op_address1 - optprobe_template_entry)
> +#define TMPL_OP2_IDX \
> + (optprobe_template_op_address2 - optprobe_template_entry)
> +#define TMPL_INSN_IDX\
> + (optprobe_template_insn - optprobe_template_entry)
> +#define TMPL_END_IDX \
> + (optprobe_template_end - optprobe_template_entry)
> +
> +struct kprobe_ppc_insn_page {
> + struct list_head list;
> + kprobe_opcode_t *insns; /* Page of instruction slots */
> + struct kprobe_insn_cache *cache;
> + int nused;
> + int ngarbage;
> + char slot_used[];
> +};
> +
> +#define PPC_KPROBE_INSN_PAGE_SIZE(slots) \
> + (offsetof(struct kprobe_ppc_insn_page, slot_used) + \
> + (sizeof(char) * (slots)))
> +
> +enum ppc_kprobe_slot_state {
> + SLOT_CLEAN = 0,
> + SLOT_DIRTY = 1,
> + SLOT_USED = 2,
> +};
> +
> +static struct kprobe_insn_cache kprobe_ppc_optinsn_slots = {
> + .mutex = __MUTEX_INITIALIZER(kprobe_ppc_optinsn_slots.mutex),
> + .pages = LIST_HEAD_INIT(kprobe_ppc_optinsn_slots.pages),
> + /* .insn_size is initialized later */
> + .nr_garbage = 0,
> +};
> +
> +static int ppc_slots_per_page(struct kprobe_insn_cache *c)
> +{
> + /*
> +  * Here the #slots per page differs from x86 as we have
> +  * only 64KB reserved.
> +  */
> + return SLOT_SIZE / (c->insn_size * sizeof(kprobe_opcode_t));
> +}
> +
> +/* Return 1 if all garbages are collected, otherwise 0. */
> +static int collect_one_slot(struct kprobe_ppc_insn_page *kip, int idx)
> +{
> + kip->slot_used[idx] = SLOT_CLEAN;
> + kip->nused--;
> + return 0;
> +}
> +
> +static int collect_garbage_slots(struct kprobe_insn_cache *c)
> +{
> + struct kprobe_ppc_insn_page *kip, *next;
> +
> + /* Ensure no-one is interrupted on the garbages */
> + synchronize_sched();
> +
> + list_for_each_entry_safe(kip, next, >pages, list) {
> + int i;
> +
> + if (kip->ngarbage == 0)
> + continue;
> + kip->ngarbage = 0;  /* we will collect all garbages */
> + for (i = 0; i < ppc_slots_per_page(c); i++) {
> + if (kip->slot_used[i] == SLOT_DIRTY &&
> + collect_one_slot(kip, i))
> + break;
> + }
> + }
> + c->nr_garbage = 0;
> + return 0;
> +}
> +
> +kprobe_opcode_t  *__ppc_get_optinsn_slot(struct kprobe_insn_cache *c)
> +{
> + struct kprobe_ppc_insn_page *kip;
> + kprobe_opcode_t *slot = NULL;
> +
> + mutex_lock(>mutex);
> + list_for_each_entry(kip, >pages, list) {
> + if (kip->nused < ppc_slots_per_page(c)) {
> + int i;
> +
> + for (i = 0; i < ppc_slots_per_page(c); i++) {
> + if (kip->slot_used[i] == SLOT_CLEAN) {
> + 

[RFC PATCH 2/3] arch/powerpc : optprobes for powerpc core

2016-05-17 Thread Anju T
Instruction slot for detour buffer is allocated from
the reserved area. For the time being 64KB is reserved
in memory for this purpose. ppc_get_optinsn_slot() and
ppc_free_optinsn_slot() are geared towards the allocation and freeing
of memory from this area.

Signed-off-by: Anju T 
---
 arch/powerpc/kernel/optprobes.c | 463 
 1 file changed, 463 insertions(+)
 create mode 100644 arch/powerpc/kernel/optprobes.c

diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c
new file mode 100644
index 000..50a60c1
--- /dev/null
+++ b/arch/powerpc/kernel/optprobes.c
@@ -0,0 +1,463 @@
+/*
+ * Code for Kernel probes Jump optimization.
+ *
+ * Copyright 2016, Anju T, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/* Reserve an area to allocate slots for detour buffer */
+extern void  optprobe_trampoline_holder(void)
+{
+   asm volatile(".global optinsn_slot\n"
+   "optinsn_slot:\n"
+   ".space 65536");
+}
+
+#define SLOT_SIZE 65536
+#define TMPL_CALL_HDLR_IDX \
+   (optprobe_template_call_handler - optprobe_template_entry)
+#define TMPL_EMULATE_IDX   \
+   (optprobe_template_call_emulate - optprobe_template_entry)
+#define TMPL_RET_BRANCH_IDX\
+   (optprobe_template_ret_branch - optprobe_template_entry)
+#define TMPL_RET_IDX   \
+   (optprobe_template_ret - optprobe_template_entry)
+#define TMPL_OP1_IDX   \
+   (optprobe_template_op_address1 - optprobe_template_entry)
+#define TMPL_OP2_IDX   \
+   (optprobe_template_op_address2 - optprobe_template_entry)
+#define TMPL_INSN_IDX  \
+   (optprobe_template_insn - optprobe_template_entry)
+#define TMPL_END_IDX   \
+   (optprobe_template_end - optprobe_template_entry)
+
+struct kprobe_ppc_insn_page {
+   struct list_head list;
+   kprobe_opcode_t *insns; /* Page of instruction slots */
+   struct kprobe_insn_cache *cache;
+   int nused;
+   int ngarbage;
+   char slot_used[];
+};
+
+#define PPC_KPROBE_INSN_PAGE_SIZE(slots)   \
+   (offsetof(struct kprobe_ppc_insn_page, slot_used) + \
+   (sizeof(char) * (slots)))
+
+enum ppc_kprobe_slot_state {
+   SLOT_CLEAN = 0,
+   SLOT_DIRTY = 1,
+   SLOT_USED = 2,
+};
+
+static struct kprobe_insn_cache kprobe_ppc_optinsn_slots = {
+   .mutex = __MUTEX_INITIALIZER(kprobe_ppc_optinsn_slots.mutex),
+   .pages = LIST_HEAD_INIT(kprobe_ppc_optinsn_slots.pages),
+   /* .insn_size is initialized later */
+   .nr_garbage = 0,
+};
+
+static int ppc_slots_per_page(struct kprobe_insn_cache *c)
+{
+   /*
+* Here the #slots per page differs from x86 as we have
+* only 64KB reserved.
+*/
+   return SLOT_SIZE / (c->insn_size * sizeof(kprobe_opcode_t));
+}
+
+/* Return 1 if all garbages are collected, otherwise 0. */
+static int collect_one_slot(struct kprobe_ppc_insn_page *kip, int idx)
+{
+   kip->slot_used[idx] = SLOT_CLEAN;
+   kip->nused--;
+   return 0;
+}
+
+static int collect_garbage_slots(struct kprobe_insn_cache *c)
+{
+   struct kprobe_ppc_insn_page *kip, *next;
+
+   /* Ensure no-one is interrupted on the garbages */
+   synchronize_sched();
+
+   list_for_each_entry_safe(kip, next, >pages, list) {
+   int i;
+
+   if (kip->ngarbage == 0)
+   continue;
+   kip->ngarbage = 0;  /* we will collect all garbages */
+   for (i = 0; i < ppc_slots_per_page(c); i++) {
+   if (kip->slot_used[i] == SLOT_DIRTY &&
+   collect_one_slot(kip, i))
+   break;
+   }
+   }
+   c->nr_garbage = 0;
+   return 0;
+}
+
+kprobe_opcode_t  *__ppc_get_optinsn_slot(struct kprobe_insn_cache *c)
+{
+   struct kprobe_ppc_insn_page *kip;
+   kprobe_opcode_t *slot = NULL;
+
+   mutex_lock(>mutex);
+   list_for_each_entry(kip, >pages, list) {
+   if (kip->nused < ppc_slots_per_page(c)) {
+   int i;
+
+   for (i = 0; i < ppc_slots_per_page(c); i++) {
+   if (kip->slot_used[i] == SLOT_CLEAN) {
+   kip->slot_used[i] = SLOT_USED;
+   kip->nused++;
+   slot = kip->insns + (i * c->insn_size);
+   goto out;
+   }
+   }
+   /* kip->nused reached max value. */
+