[PATCH V3 2/4] powerpc: add helper to check if offset is within rel branch range

2016-12-19 Thread Anju T Sudhakar
From: "Naveen N. Rao" <naveen.n@linux.vnet.ibm.com>

To permit the use of relative branch instruction in powerpc, the target 
address has to be relatively nearby, since the address is specified in an 
immediate field (24 bit filed) in the instruction opcode itself. Here 
nearby refers to 32MB on either side of the current instruction.

This patch verifies whether the target address is within +/- 32MB
range or not.

Signed-off-by: Naveen N. Rao <naveen.n@linux.vnet.ibm.com>
Signed-off-by: Anju T Sudhakar <a...@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/code-patching.h |  1 +
 arch/powerpc/lib/code-patching.c | 24 +++-
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/code-patching.h 
b/arch/powerpc/include/asm/code-patching.h
index 2015b07..75ee4f4 100644
--- a/arch/powerpc/include/asm/code-patching.h
+++ b/arch/powerpc/include/asm/code-patching.h
@@ -22,6 +22,7 @@
 #define BRANCH_SET_LINK0x1
 #define BRANCH_ABSOLUTE0x2
 
+bool is_offset_in_branch_range(long offset);
 unsigned int create_branch(const unsigned int *addr,
   unsigned long target, int flags);
 unsigned int create_cond_branch(const unsigned int *addr,
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index d5edbeb..f643451 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -32,6 +32,28 @@ int patch_branch(unsigned int *addr, unsigned long target, 
int flags)
return patch_instruction(addr, create_branch(addr, target, flags));
 }
 
+bool is_offset_in_branch_range(long offset)
+{
+   /*
+* Powerpc branch instruction is :
+*
+*  0 6 30   31
+*  +-++---+---+
+*  | opcode  | LI |AA |LK |
+*  +-++---+---+
+*  Where AA = 0 and LK = 0
+*
+* LI is a signed 24 bits integer. The real branch offset is computed
+* by: imm32 = SignExtend(LI:'0b00', 32);
+*
+* So the maximum forward branch should be:
+*   (0x007f << 2) = 0x01fc =  0x1fc
+* The maximum backward branch should be:
+*   (0xff80 << 2) = 0xfe00 = -0x200
+*/
+   return (offset >= -0x200 && offset <= 0x1fc && !(offset & 0x3));
+}
+
 unsigned int create_branch(const unsigned int *addr,
   unsigned long target, int flags)
 {
@@ -43,7 +65,7 @@ unsigned int create_branch(const unsigned int *addr,
offset = offset - (unsigned long)addr;
 
/* Check we can represent the target in the instruction format */
-   if (offset < -0x200 || offset > 0x1fc || offset & 0x3)
+   if (!is_offset_in_branch_range(offset))
return 0;
 
/* Mask out the flags and target, so they don't step on each other. */
-- 
2.7.4



[PATCH V3 1/4] powerpc: asm/ppc-opcode.h: introduce __PPC_SH64()

2016-12-19 Thread Anju T Sudhakar
From: "Naveen N. Rao" 

Introduce __PPC_SH64() as a 64-bit variant to encode shift field in some
of the shift and rotate instructions operating on double-words. Convert
some of the BPF instruction macros to use the same.

Signed-off-by: Naveen N. Rao 
---
 arch/powerpc/include/asm/ppc-opcode.h |  1 +
 arch/powerpc/net/bpf_jit.h| 11 +--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/ppc-opcode.h 
b/arch/powerpc/include/asm/ppc-opcode.h
index 0132831..630127b 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -306,6 +306,7 @@
 #define __PPC_WC(w)(((w) & 0x3) << 21)
 #define __PPC_WS(w)(((w) & 0x1f) << 11)
 #define __PPC_SH(s)__PPC_WS(s)
+#define __PPC_SH64(s)  (__PPC_SH(s) | (((s) & 0x20) >> 4))
 #define __PPC_MB(s)(((s) & 0x1f) << 6)
 #define __PPC_ME(s)(((s) & 0x1f) << 1)
 #define __PPC_MB64(s)  (__PPC_MB(s) | ((s) & 0x20))
diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index 89f7007..30cf03f 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -157,8 +157,7 @@
 #define PPC_SRAD(d, a, s)  EMIT(PPC_INST_SRAD | ___PPC_RA(d) |   \
 ___PPC_RS(a) | ___PPC_RB(s))
 #define PPC_SRADI(d, a, i) EMIT(PPC_INST_SRADI | ___PPC_RA(d) |  \
-___PPC_RS(a) | __PPC_SH(i) | \
-(((i) & 0x20) >> 4))
+___PPC_RS(a) | __PPC_SH64(i))
 #define PPC_RLWINM(d, a, i, mb, me)EMIT(PPC_INST_RLWINM | ___PPC_RA(d) | \
___PPC_RS(a) | __PPC_SH(i) |  \
__PPC_MB(mb) | __PPC_ME(me))
@@ -166,11 +165,11 @@
___PPC_RS(a) | __PPC_SH(i) |  \
__PPC_MB(mb) | __PPC_ME(me))
 #define PPC_RLDICL(d, a, i, mb)EMIT(PPC_INST_RLDICL | 
___PPC_RA(d) | \
-   ___PPC_RS(a) | __PPC_SH(i) |  \
-   __PPC_MB64(mb) | (((i) & 0x20) >> 4))
+   ___PPC_RS(a) | __PPC_SH64(i) |\
+   __PPC_MB64(mb))
 #define PPC_RLDICR(d, a, i, me)EMIT(PPC_INST_RLDICR | 
___PPC_RA(d) | \
-   ___PPC_RS(a) | __PPC_SH(i) |  \
-   __PPC_ME64(me) | (((i) & 0x20) >> 4))
+   ___PPC_RS(a) | __PPC_SH64(i) |\
+   __PPC_ME64(me))
 
 /* slwi = rlwinm Rx, Ry, n, 0, 31-n */
 #define PPC_SLWI(d, a, i)  PPC_RLWINM(d, a, i, 0, 31-(i))
-- 
2.7.4



[PATCH V3 4/4] arch/powerpc: Optimize kprobe in kretprobe_trampoline

2016-12-19 Thread Anju T Sudhakar
Kprobe placed on the  kretprobe_trampoline during boot time can be 
optimized, since the instruction at probe point is a 'nop'.

Signed-off-by: Anju T Sudhakar <a...@linux.vnet.ibm.com>
Acked-by: Masami Hiramatsu <mhira...@kernel.org>
---
 arch/powerpc/kernel/kprobes.c   | 8 
 arch/powerpc/kernel/optprobes.c | 7 +++
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index e785cc9..5b0fd07 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -282,6 +282,7 @@ asm(".global kretprobe_trampoline\n"
".type kretprobe_trampoline, @function\n"
"kretprobe_trampoline:\n"
"nop\n"
+   "blr\n"
".size kretprobe_trampoline, .-kretprobe_trampoline\n");
 
 /*
@@ -334,6 +335,13 @@ static int __kprobes trampoline_probe_handler(struct 
kprobe *p,
 
kretprobe_assert(ri, orig_ret_address, trampoline_address);
regs->nip = orig_ret_address;
+   /*
+* Make LR point to the orig_ret_address.
+* When the 'nop' inside the kretprobe_trampoline
+* is optimized, we can do a 'blr' after executing the
+* detour buffer code.
+*/
+   regs->link = orig_ret_address;
 
reset_current_kprobe();
kretprobe_hash_unlock(current, );
diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c
index ecba221..5e4c254 100644
--- a/arch/powerpc/kernel/optprobes.c
+++ b/arch/powerpc/kernel/optprobes.c
@@ -72,12 +72,11 @@ static unsigned long can_optimize(struct kprobe *p)
 
/*
 * kprobe placed for kretprobe during boot time
-* is not optimizing now.
-*
-* TODO: Optimize kprobe in kretprobe_trampoline
+* has a 'nop' instruction, which can be emulated.
+* So further checks can be skipped.
 */
if (p->addr == (kprobe_opcode_t *)_trampoline)
-   return 0;
+   return (unsigned long)p->addr + sizeof(kprobe_opcode_t);
 
/*
 * We only support optimizing kernel addresses, but not
-- 
2.7.4



[PATCH V3 3/4] arch/powerpc: Implement Optprobes

2016-12-19 Thread Anju T Sudhakar
Detour buffer contains instructions to create an in memory pt_regs.
After the execution of the pre-handler, a call is made for instruction 
emulation.
The NIP is determined in advanced through dummy instruction emulation and a 
branch
instruction is created to the NIP at the end of the trampoline.

Instruction slot for detour buffer is allocated from the reserved area.
For the time being, 64KB is reserved in memory for this purpose.

Instructions which can be emulated using analyse_instr() are suppliants
for optimization. Before optimization ensure that the address range
between the detour buffer allocated and the instruction being probed
is within ?? 32MB.

Signed-off-by: Anju T Sudhakar <a...@linux.vnet.ibm.com>
Signed-off-by: Naveen N. Rao <naveen.n@linux.vnet.ibm.com>
---
 .../features/debug/optprobes/arch-support.txt  |   2 +-
 arch/powerpc/Kconfig   |   1 +
 arch/powerpc/include/asm/kprobes.h |  24 +-
 arch/powerpc/include/asm/sstep.h   |   1 +
 arch/powerpc/kernel/Makefile   |   1 +
 arch/powerpc/kernel/optprobes.c| 331 +
 arch/powerpc/kernel/optprobes_head.S   | 135 +
 arch/powerpc/lib/sstep.c   |  21 ++
 8 files changed, 514 insertions(+), 2 deletions(-)
 create mode 100644 arch/powerpc/kernel/optprobes.c
 create mode 100644 arch/powerpc/kernel/optprobes_head.S

diff --git a/Documentation/features/debug/optprobes/arch-support.txt 
b/Documentation/features/debug/optprobes/arch-support.txt
index b8999d8..45bc99d 100644
--- a/Documentation/features/debug/optprobes/arch-support.txt
+++ b/Documentation/features/debug/optprobes/arch-support.txt
@@ -27,7 +27,7 @@
 |   nios2: | TODO |
 |openrisc: | TODO |
 |  parisc: | TODO |
-| powerpc: | TODO |
+| powerpc: |  ok  |
 |s390: | TODO |
 |   score: | TODO |
 |  sh: | TODO |
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 65fba4c..f7e9296 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -98,6 +98,7 @@ config PPC
select HAVE_IOREMAP_PROT
select HAVE_EFFICIENT_UNALIGNED_ACCESS if !(CPU_LITTLE_ENDIAN && 
POWER7_CPU)
select HAVE_KPROBES
+   select HAVE_OPTPROBES if PPC64
select HAVE_ARCH_KGDB
select HAVE_KRETPROBES
select HAVE_ARCH_TRACEHOOK
diff --git a/arch/powerpc/include/asm/kprobes.h 
b/arch/powerpc/include/asm/kprobes.h
index 2c9759bd..0cf640b 100644
--- a/arch/powerpc/include/asm/kprobes.h
+++ b/arch/powerpc/include/asm/kprobes.h
@@ -38,7 +38,23 @@ struct pt_regs;
 struct kprobe;
 
 typedef ppc_opcode_t kprobe_opcode_t;
-#define MAX_INSN_SIZE 1
+
+extern kprobe_opcode_t optinsn_slot;
+
+/* Optinsn template address */
+extern kprobe_opcode_t optprobe_template_entry[];
+extern kprobe_opcode_t optprobe_template_op_address[];
+extern kprobe_opcode_t optprobe_template_call_handler[];
+extern kprobe_opcode_t optprobe_template_insn[];
+extern kprobe_opcode_t optprobe_template_call_emulate[];
+extern kprobe_opcode_t optprobe_template_ret[];
+extern kprobe_opcode_t optprobe_template_end[];
+
+/* Fixed instruction size for powerpc */
+#define MAX_INSN_SIZE  1
+#define MAX_OPTIMIZED_LENGTH   sizeof(kprobe_opcode_t) /* 4 bytes */
+#define MAX_OPTINSN_SIZE   (optprobe_template_end - 
optprobe_template_entry)
+#define RELATIVEJUMP_SIZE  sizeof(kprobe_opcode_t) /* 4 bytes */
 
 #ifdef PPC64_ELF_ABI_v2
 /* PPC64 ABIv2 needs local entry point */
@@ -124,6 +140,12 @@ struct kprobe_ctlblk {
struct prev_kprobe prev_kprobe;
 };
 
+struct arch_optimized_insn {
+   kprobe_opcode_t copied_insn[1];
+   /* detour buffer */
+   kprobe_opcode_t *insn;
+};
+
 extern int kprobe_exceptions_notify(struct notifier_block *self,
unsigned long val, void *data);
 extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h
index d3a42cc..f7ad425 100644
--- a/arch/powerpc/include/asm/sstep.h
+++ b/arch/powerpc/include/asm/sstep.h
@@ -87,3 +87,4 @@ struct instruction_op {
 
 extern int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
 unsigned int instr);
+extern bool is_conditional_branch(unsigned int instr);
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 1925341..54f0f47 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -98,6 +98,7 @@ obj-$(CONFIG_KGDB)+= kgdb.o
 obj-$(CONFIG_BOOTX_TEXT)   += btext.o
 obj-$(CONFIG_SMP)  += smp.o
 obj-$(CONFIG_KPROBES)  += kprobes.o
+obj-$(CONFIG_OPTPROBES)+= optprobes.o optprobes_head.o
 obj-$(CONFIG_UPROBES)  += uprobes.o
 obj-$(CONFIG_PPC_UDBG_16550)   += legacy_serial.o udbg_16550.o
 obj

[PATCH V3 0/4] OPTPROBES for powerpc

2016-12-19 Thread Anju T Sudhakar
This is the V3 patchset of the kprobes jump optimization
(a.k.a OPTPROBES)for powerpc. Kprobe being an inevitable tool
for kernel developers, enhancing the performance of kprobe has
got much importance.

Currently kprobes inserts a trap instruction to probe a running kernel.
Jump optimization allows kprobes to replace the trap with a branch,
reducing the probe overhead drastically.

In this series, conditional branch instructions are not considered for
optimization as they have to be assessed carefully in SMP systems.

The kprobe placed on the kretprobe_trampoline during boot time, is also
optimized in this series. Patch 4/4 furnishes this.

The first two patches can go independently of the series. The helper 
functions in these patches are invoked in patch 3/4.

Performance:

An optimized kprobe in powerpc is 1.05 to 4.7 times faster than a kprobe.
 
Example:
 
Placed a probe at an offset 0x50 in _do_fork().
*Time Diff here is, difference in time before hitting the probe and
after the probed instruction. mftb() is employed in kernel/fork.c for
this purpose.
 
# echo 0 > /proc/sys/debug/kprobes-optimization
Kprobes globally unoptimized
 [  233.607120] Time Diff = 0x1f0
 [  233.608273] Time Diff = 0x1ee
 [  233.609228] Time Diff = 0x203
 [  233.610400] Time Diff = 0x1ec
 [  233.611335] Time Diff = 0x200
 [  233.612552] Time Diff = 0x1f0
 [  233.613386] Time Diff = 0x1ee
 [  233.614547] Time Diff = 0x212
 [  233.615570] Time Diff = 0x206
 [  233.616819] Time Diff = 0x1f3
 [  233.617773] Time Diff = 0x1ec
 [  233.618944] Time Diff = 0x1fb
 [  233.619879] Time Diff = 0x1f0
 [  233.621066] Time Diff = 0x1f9
 [  233.621999] Time Diff = 0x283
 [  233.623281] Time Diff = 0x24d
 [  233.624172] Time Diff = 0x1ea
 [  233.625381] Time Diff = 0x1f0
 [  233.626358] Time Diff = 0x200
 [  233.627572] Time Diff = 0x1ed
 
# echo 1 > /proc/sys/debug/kprobes-optimization
Kprobes globally optimized
 [   70.797075] Time Diff = 0x103
 [   70.799102] Time Diff = 0x181
 [   70.801861] Time Diff = 0x15e
 [   70.803466] Time Diff = 0xf0
 [   70.804348] Time Diff = 0xd0
 [   70.805653] Time Diff = 0xad
 [   70.806477] Time Diff = 0xe0
 [   70.807725] Time Diff = 0xbe
 [   70.808541] Time Diff = 0xc3
 [   70.810191] Time Diff = 0xc7
 [   70.811007] Time Diff = 0xc0
 [   70.812629] Time Diff = 0xc0
 [   70.813640] Time Diff = 0xda
 [   70.814915] Time Diff = 0xbb
 [   70.815726] Time Diff = 0xc4
 [   70.816955] Time Diff = 0xc0
 [   70.817778] Time Diff = 0xcd
 [   70.818999] Time Diff = 0xcd
 [   70.820099] Time Diff = 0xcb
 [   70.821333] Time Diff = 0xf0

Implementation:
===
 
The trap instruction is replaced by a branch to a detour buffer. To address
the limitation of branch instruction in power architecture, detour buffer
slot is allocated from a reserved area. This will ensure that the branch
is within ?? 32 MB range. The existing generic approach for kprobes insn 
cache uses module_alloc() to allocate memory area for insn slots. This will 
always be beyond ?? 32MB range.
 
The detour buffer contains a call to optimized_callback() which in turn
call the pre_handler(). Once the pre-handler is run, the original
instruction is emulated from the detour buffer itself. Also the detour
buffer is equipped with a branch back to the normal work flow after the
probed instruction is emulated. Before preparing optimization, Kprobes
inserts original(breakpoint instruction)kprobe on the specified address.
So, even if the kprobe is not possible to be optimized, it just uses a
normal kprobe.
 
Limitations:
==
- Number of probes which can be optimized is limited by the size of the
  area reserved.
- Currently instructions which can be emulated using analyse_instr() are 
  the only candidates for optimization.
- Conditional branch instructions are not optimized.
- Probes on kernel module region are not considered for optimization now.
 
Link for the V1 patchset: 
https://lkml.org/lkml/2016/9/7/171
https://lkml.org/lkml/2016/9/7/174
https://lkml.org/lkml/2016/9/7/172
https://lkml.org/lkml/2016/9/7/173

Changes from v1:

- Merged the three patches in V1 into a single patch.
- Comments by Masami are addressed.
- Some helper functions are implemented in separate patches.
- Optimization for kprobe placed on the kretprobe_trampoline during
  boot time is implemented.

Changes from v2:

- Comments by Masami are addressed.
- Description in the cover letter is modified a bit.


Kindly let me know your suggestions and comments.

Thanks,
-Anju


Anju T Sudhakar (2):
  arch/powerpc: Implement Optprobes
  arch/powerpc: Optimize kprobe in kretprobe_trampoline

Naveen N. Rao (2):
  powerpc: asm/ppc-opcode.h: introduce __PPC_SH64()
  powerpc: add helper to check if offset is within rel branch range

 .../features/debug/optprobes/arch-support.txt  |   2 +-
 arch/powerpc/Kconfig   |   1 +

Re: [PATCH V2 0/4] OPTPROBES for powerpc

2016-12-16 Thread Anju T Sudhakar

Hi Balbir,



On Friday 16 December 2016 08:16 PM, Balbir Singh wrote:


On 15/12/16 03:18, Anju T Sudhakar wrote:

This is the V2 patchset of the kprobes jump optimization
(a.k.a OPTPROBES)for powerpc. Kprobe being an inevitable tool
for kernel developers, enhancing the performance of kprobe has
got much importance.

Currently kprobes inserts a trap instruction to probe a running kernel.
Jump optimization allows kprobes to replace the trap with a branch,
reducing the probe overhead drastically.

In this series, conditional branch instructions are not considered for
optimization as they have to be assessed carefully in SMP systems.

The kprobe placed on the kretprobe_trampoline during boot time, is also
optimized in this series. Patch 4/4 furnishes this.

The first two patches can go independently of the series. The helper
functions in these patches are invoked in patch 3/4.

Performance:

An optimized kprobe in powerpc is 1.05 to 4.7 times faster than a kprobe.
  
Example:
  
Placed a probe at an offset 0x50 in _do_fork().

*Time Diff here is, difference in time before hitting the probe and
after the probed instruction. mftb() is employed in kernel/fork.c for
this purpose.
  
# echo 0 > /proc/sys/debug/kprobes-optimization

Kprobes globally unoptimized
  [  233.607120] Time Diff = 0x1f0
  [  233.608273] Time Diff = 0x1ee
  [  233.609228] Time Diff = 0x203
  [  233.610400] Time Diff = 0x1ec
  [  233.611335] Time Diff = 0x200
  [  233.612552] Time Diff = 0x1f0
  [  233.613386] Time Diff = 0x1ee
  [  233.614547] Time Diff = 0x212
  [  233.615570] Time Diff = 0x206
  [  233.616819] Time Diff = 0x1f3
  [  233.617773] Time Diff = 0x1ec
  [  233.618944] Time Diff = 0x1fb
  [  233.619879] Time Diff = 0x1f0
  [  233.621066] Time Diff = 0x1f9
  [  233.621999] Time Diff = 0x283
  [  233.623281] Time Diff = 0x24d
  [  233.624172] Time Diff = 0x1ea
  [  233.625381] Time Diff = 0x1f0
  [  233.626358] Time Diff = 0x200
  [  233.627572] Time Diff = 0x1ed
  
# echo 1 > /proc/sys/debug/kprobes-optimization

Kprobes globally optimized
  [   70.797075] Time Diff = 0x103
  [   70.799102] Time Diff = 0x181
  [   70.801861] Time Diff = 0x15e
  [   70.803466] Time Diff = 0xf0
  [   70.804348] Time Diff = 0xd0
  [   70.805653] Time Diff = 0xad
  [   70.806477] Time Diff = 0xe0
  [   70.807725] Time Diff = 0xbe
  [   70.808541] Time Diff = 0xc3
  [   70.810191] Time Diff = 0xc7
  [   70.811007] Time Diff = 0xc0
  [   70.812629] Time Diff = 0xc0
  [   70.813640] Time Diff = 0xda
  [   70.814915] Time Diff = 0xbb
  [   70.815726] Time Diff = 0xc4
  [   70.816955] Time Diff = 0xc0
  [   70.817778] Time Diff = 0xcd
  [   70.818999] Time Diff = 0xcd
  [   70.820099] Time Diff = 0xcb
  [   70.821333] Time Diff = 0xf0

Implementation:
===
  
The trap instruction is replaced by a branch to a detour buffer. To address

the limitation of branch instruction in power architecture, detour buffer
slot is allocated from a reserved area . This will ensure that the branch
is within ± 32 MB range. The current kprobes insn caches allocate memory
area for insn slots with module_alloc(). This will always be beyond
± 32MB range.
  

The paragraph is a little confusing. We need the detour buffer to be within
+-32 MB, but then you say we always get memory from module_alloc() beyond
32MB.


The last two lines in the paragraph talks about the*current 
*method**which the regular kprobe uses
for allocating instruction slot. So in our case, we can't use 
module_alloc() since there is no guarantee that the slot allocated will 
be within +/- 32MB range.

The detour buffer contains a call to optimized_callback() which in turn
call the pre_handler(). Once the pre-handler is run, the original
instruction is emulated from the detour buffer itself. Also the detour
buffer is equipped with a branch back to the normal work flow after the
probed instruction is emulated.

Does the branch itself use registers that need to be saved? I presume
we are going to rely on the +-32MB, what are the guarantees of success
of such a mechanism?


For branching back to the next instruction, after the execution of the 
kprobe's pre-handler,
we place the branch instruction in the detour buffer itself. Hence we 
don't have to clobber any registers

after restoring them.
Before optimizing the kprobe we make sure that , 'branch to detour 
buffer' and 'branch back from detour buffer' is within +/- 32MB range. 
This ensures the working of optimized kprobe.



Thanks ,
Anju



Balbir Singh.





Re: [PATCH V2 3/4] arch/powerpc: Implement Optprobes

2016-12-16 Thread Anju T Sudhakar

Hi Masami,



On Friday 16 December 2016 07:32 PM, Masami Hiramatsu wrote:

On Wed, 14 Dec 2016 21:48:27 +0530
Anju T Sudhakar <a...@linux.vnet.ibm.com> wrote:


Detour buffer contains instructions to create an in memory pt_regs.
After the execution of the pre-handler, a call is made for instruction 
emulation.
The NIP is determined in advanced through dummy instruction emulation and a 
branch
instruction is created to the NIP at the end of the trampoline.

Instruction slot for detour buffer is allocated from the reserved area.
For the time being, 64KB is reserved in memory for this purpose.

Instructions which can be emulated using analyse_instr() are suppliants
for optimization. Before optimization ensure that the address range
between the detour buffer allocated and the instruction being probed
is within ± 32MB.

Thank you for updating!
I think this has no critical issue, but I have some comments on it.
(just cleanup and hardenings)
Please see below.


Signed-off-by: Anju T Sudhakar <a...@linux.vnet.ibm.com>
Signed-off-by: Naveen N. Rao <naveen.n@linux.vnet.ibm.com>
---
  .../features/debug/optprobes/arch-support.txt  |   2 +-
  arch/powerpc/Kconfig   |   1 +
  arch/powerpc/include/asm/kprobes.h |  23 +-
  arch/powerpc/include/asm/sstep.h   |   1 +
  arch/powerpc/kernel/Makefile   |   1 +
  arch/powerpc/kernel/optprobes.c| 333 +
  arch/powerpc/kernel/optprobes_head.S   | 135 +
  arch/powerpc/lib/sstep.c   |  22 ++
  8 files changed, 516 insertions(+), 2 deletions(-)
  create mode 100644 arch/powerpc/kernel/optprobes.c
  create mode 100644 arch/powerpc/kernel/optprobes_head.S

diff --git a/Documentation/features/debug/optprobes/arch-support.txt 
b/Documentation/features/debug/optprobes/arch-support.txt
index b8999d8..45bc99d 100644
--- a/Documentation/features/debug/optprobes/arch-support.txt
+++ b/Documentation/features/debug/optprobes/arch-support.txt
@@ -27,7 +27,7 @@
  |   nios2: | TODO |
  |openrisc: | TODO |
  |  parisc: | TODO |
-| powerpc: | TODO |
+| powerpc: |  ok  |
  |s390: | TODO |
  |   score: | TODO |
  |  sh: | TODO |
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index c7f120a..d563f0a 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -98,6 +98,7 @@ config PPC
select HAVE_IOREMAP_PROT
select HAVE_EFFICIENT_UNALIGNED_ACCESS if !(CPU_LITTLE_ENDIAN && 
POWER7_CPU)
select HAVE_KPROBES
+   select HAVE_OPTPROBES if PPC64
select HAVE_ARCH_KGDB
select HAVE_KRETPROBES
select HAVE_ARCH_TRACEHOOK
diff --git a/arch/powerpc/include/asm/kprobes.h 
b/arch/powerpc/include/asm/kprobes.h
index 2c9759bd..739ddc5 100644
--- a/arch/powerpc/include/asm/kprobes.h
+++ b/arch/powerpc/include/asm/kprobes.h
@@ -38,7 +38,22 @@ struct pt_regs;
  struct kprobe;
  
  typedef ppc_opcode_t kprobe_opcode_t;

-#define MAX_INSN_SIZE 1
+
+extern kprobe_opcode_t optinsn_slot;
+
+/* Optinsn template address */
+extern kprobe_opcode_t optprobe_template_entry[];
+extern kprobe_opcode_t optprobe_template_op_address[];
+extern kprobe_opcode_t optprobe_template_call_handler[];
+extern kprobe_opcode_t optprobe_template_insn[];
+extern kprobe_opcode_t optprobe_template_call_emulate[];
+extern kprobe_opcode_t optprobe_template_ret[];
+extern kprobe_opcode_t optprobe_template_end[];
+
+#define MAX_INSN_SIZE  1
+#define MAX_OPTIMIZED_LENGTH   4
+#define MAX_OPTINSN_SIZE   (optprobe_template_end - 
optprobe_template_entry)
+#define RELATIVEJUMP_SIZE  4

These size/length macros seems a bit odd. I guess MAX_INSN_SIZE
is based on sizeof(MAX_INSN_SIZE), but others are in byte.
Could you fix that? For example, define it with
sizeof(kprobe_opcode_t), and add comments on it, etc.


Sure. I will look into this and define it in  a consistent way.

  
  #ifdef PPC64_ELF_ABI_v2

  /* PPC64 ABIv2 needs local entry point */
@@ -124,6 +139,12 @@ struct kprobe_ctlblk {
struct prev_kprobe prev_kprobe;
  };
  
+struct arch_optimized_insn {

+   kprobe_opcode_t copied_insn[1];
+   /* detour buffer */
+   kprobe_opcode_t *insn;
+};
+
  extern int kprobe_exceptions_notify(struct notifier_block *self,
unsigned long val, void *data);
  extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h
index d3a42cc..f7ad425 100644
--- a/arch/powerpc/include/asm/sstep.h
+++ b/arch/powerpc/include/asm/sstep.h
@@ -87,3 +87,4 @@ struct instruction_op {
  
  extern int analyse_instr(struct instruction_op *op, struct pt_regs *regs,

 unsigned int instr);
+extern bool is_conditional_branch(unsigned int instr);
diff --git a/arch/powerpc/kern

Re: [PATCH V2 2/4] powerpc: add helper to check if offset is within rel branch range

2016-12-16 Thread Anju T Sudhakar

Hi Masami,


Thank you for reviewing the patch set.


On Friday 16 December 2016 05:22 PM, Masami Hiramatsu wrote:

On Wed, 14 Dec 2016 21:48:30 +0530
Anju T Sudhakar <a...@linux.vnet.ibm.com> wrote:


From: "Naveen N. Rao" <naveen.n@linux.vnet.ibm.com>


The coding is OK to me. Please add a description for this patch
here, e.g. what is done by this patch, what kind of branch
instruction will be covered, and why thse checks are needed etc.



Sure. I will give a description for this patch.


Thanks and Regards,

-Anju



Thank you,


Signed-off-by: Naveen N. Rao <naveen.n@linux.vnet.ibm.com>
Signed-off-by: Anju T Sudhakar <a...@linux.vnet.ibm.com>
---
  arch/powerpc/include/asm/code-patching.h |  1 +
  arch/powerpc/lib/code-patching.c | 24 +++-
  2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/code-patching.h 
b/arch/powerpc/include/asm/code-patching.h
index 2015b07..75ee4f4 100644
--- a/arch/powerpc/include/asm/code-patching.h
+++ b/arch/powerpc/include/asm/code-patching.h
@@ -22,6 +22,7 @@
  #define BRANCH_SET_LINK   0x1
  #define BRANCH_ABSOLUTE   0x2
  
+bool is_offset_in_branch_range(long offset);

  unsigned int create_branch(const unsigned int *addr,
   unsigned long target, int flags);
  unsigned int create_cond_branch(const unsigned int *addr,
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index d5edbeb..f643451 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -32,6 +32,28 @@ int patch_branch(unsigned int *addr, unsigned long target, 
int flags)
return patch_instruction(addr, create_branch(addr, target, flags));
  }
  
+bool is_offset_in_branch_range(long offset)

+{
+   /*
+* Powerpc branch instruction is :
+*
+*  0 6 30   31
+*  +-++---+---+
+*  | opcode  | LI |AA |LK |
+*  +-++---+---+
+*  Where AA = 0 and LK = 0
+*
+* LI is a signed 24 bits integer. The real branch offset is computed
+* by: imm32 = SignExtend(LI:'0b00', 32);
+*
+* So the maximum forward branch should be:
+*   (0x007f << 2) = 0x01fc =  0x1fc
+* The maximum backward branch should be:
+*   (0xff80 << 2) = 0xfe00 = -0x200
+*/
+   return (offset >= -0x200 && offset <= 0x1fc && !(offset & 0x3));
+}
+
  unsigned int create_branch(const unsigned int *addr,
   unsigned long target, int flags)
  {
@@ -43,7 +65,7 @@ unsigned int create_branch(const unsigned int *addr,
offset = offset - (unsigned long)addr;
  
  	/* Check we can represent the target in the instruction format */

-   if (offset < -0x200 || offset > 0x1fc || offset & 0x3)
+   if (!is_offset_in_branch_range(offset))
return 0;
  
  	/* Mask out the flags and target, so they don't step on each other. */

--
2.7.4







[PATCH V2 2/4] powerpc: add helper to check if offset is within rel branch range

2016-12-14 Thread Anju T Sudhakar
From: "Naveen N. Rao" <naveen.n@linux.vnet.ibm.com>

Signed-off-by: Naveen N. Rao <naveen.n@linux.vnet.ibm.com>
Signed-off-by: Anju T Sudhakar <a...@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/code-patching.h |  1 +
 arch/powerpc/lib/code-patching.c | 24 +++-
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/code-patching.h 
b/arch/powerpc/include/asm/code-patching.h
index 2015b07..75ee4f4 100644
--- a/arch/powerpc/include/asm/code-patching.h
+++ b/arch/powerpc/include/asm/code-patching.h
@@ -22,6 +22,7 @@
 #define BRANCH_SET_LINK0x1
 #define BRANCH_ABSOLUTE0x2
 
+bool is_offset_in_branch_range(long offset);
 unsigned int create_branch(const unsigned int *addr,
   unsigned long target, int flags);
 unsigned int create_cond_branch(const unsigned int *addr,
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index d5edbeb..f643451 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -32,6 +32,28 @@ int patch_branch(unsigned int *addr, unsigned long target, 
int flags)
return patch_instruction(addr, create_branch(addr, target, flags));
 }
 
+bool is_offset_in_branch_range(long offset)
+{
+   /*
+* Powerpc branch instruction is :
+*
+*  0 6 30   31
+*  +-++---+---+
+*  | opcode  | LI |AA |LK |
+*  +-++---+---+
+*  Where AA = 0 and LK = 0
+*
+* LI is a signed 24 bits integer. The real branch offset is computed
+* by: imm32 = SignExtend(LI:'0b00', 32);
+*
+* So the maximum forward branch should be:
+*   (0x007f << 2) = 0x01fc =  0x1fc
+* The maximum backward branch should be:
+*   (0xff80 << 2) = 0xfe00 = -0x200
+*/
+   return (offset >= -0x200 && offset <= 0x1fc && !(offset & 0x3));
+}
+
 unsigned int create_branch(const unsigned int *addr,
   unsigned long target, int flags)
 {
@@ -43,7 +65,7 @@ unsigned int create_branch(const unsigned int *addr,
offset = offset - (unsigned long)addr;
 
/* Check we can represent the target in the instruction format */
-   if (offset < -0x200 || offset > 0x1fc || offset & 0x3)
+   if (!is_offset_in_branch_range(offset))
return 0;
 
/* Mask out the flags and target, so they don't step on each other. */
-- 
2.7.4



[PATCH V2 1/4] powerpc: asm/ppc-opcode.h: introduce __PPC_SH64()

2016-12-14 Thread Anju T Sudhakar
From: "Naveen N. Rao" 

Introduce __PPC_SH64() as a 64-bit variant to encode shift field in some
of the shift and rotate instructions operating on double-words. Convert
some of the BPF instruction macros to use the same.

Signed-off-by: Naveen N. Rao 
---
 arch/powerpc/include/asm/ppc-opcode.h |  1 +
 arch/powerpc/net/bpf_jit.h| 11 +--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/ppc-opcode.h 
b/arch/powerpc/include/asm/ppc-opcode.h
index 0132831..630127b 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -306,6 +306,7 @@
 #define __PPC_WC(w)(((w) & 0x3) << 21)
 #define __PPC_WS(w)(((w) & 0x1f) << 11)
 #define __PPC_SH(s)__PPC_WS(s)
+#define __PPC_SH64(s)  (__PPC_SH(s) | (((s) & 0x20) >> 4))
 #define __PPC_MB(s)(((s) & 0x1f) << 6)
 #define __PPC_ME(s)(((s) & 0x1f) << 1)
 #define __PPC_MB64(s)  (__PPC_MB(s) | ((s) & 0x20))
diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index 89f7007..30cf03f 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -157,8 +157,7 @@
 #define PPC_SRAD(d, a, s)  EMIT(PPC_INST_SRAD | ___PPC_RA(d) |   \
 ___PPC_RS(a) | ___PPC_RB(s))
 #define PPC_SRADI(d, a, i) EMIT(PPC_INST_SRADI | ___PPC_RA(d) |  \
-___PPC_RS(a) | __PPC_SH(i) | \
-(((i) & 0x20) >> 4))
+___PPC_RS(a) | __PPC_SH64(i))
 #define PPC_RLWINM(d, a, i, mb, me)EMIT(PPC_INST_RLWINM | ___PPC_RA(d) | \
___PPC_RS(a) | __PPC_SH(i) |  \
__PPC_MB(mb) | __PPC_ME(me))
@@ -166,11 +165,11 @@
___PPC_RS(a) | __PPC_SH(i) |  \
__PPC_MB(mb) | __PPC_ME(me))
 #define PPC_RLDICL(d, a, i, mb)EMIT(PPC_INST_RLDICL | 
___PPC_RA(d) | \
-   ___PPC_RS(a) | __PPC_SH(i) |  \
-   __PPC_MB64(mb) | (((i) & 0x20) >> 4))
+   ___PPC_RS(a) | __PPC_SH64(i) |\
+   __PPC_MB64(mb))
 #define PPC_RLDICR(d, a, i, me)EMIT(PPC_INST_RLDICR | 
___PPC_RA(d) | \
-   ___PPC_RS(a) | __PPC_SH(i) |  \
-   __PPC_ME64(me) | (((i) & 0x20) >> 4))
+   ___PPC_RS(a) | __PPC_SH64(i) |\
+   __PPC_ME64(me))
 
 /* slwi = rlwinm Rx, Ry, n, 0, 31-n */
 #define PPC_SLWI(d, a, i)  PPC_RLWINM(d, a, i, 0, 31-(i))
-- 
2.7.4



[PATCH V2 4/4] arch/powerpc: Optimize kprobe in kretprobe_trampoline

2016-12-14 Thread Anju T Sudhakar
Kprobe placed on the  kretprobe_trampoline during boot time can be 
optimized, since the instruction at probe point is a 'nop'.

Signed-off-by: Anju T Sudhakar <a...@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/kprobes.c   | 8 
 arch/powerpc/kernel/optprobes.c | 7 +++
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index e785cc9..5b0fd07 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -282,6 +282,7 @@ asm(".global kretprobe_trampoline\n"
".type kretprobe_trampoline, @function\n"
"kretprobe_trampoline:\n"
"nop\n"
+   "blr\n"
".size kretprobe_trampoline, .-kretprobe_trampoline\n");
 
 /*
@@ -334,6 +335,13 @@ static int __kprobes trampoline_probe_handler(struct 
kprobe *p,
 
kretprobe_assert(ri, orig_ret_address, trampoline_address);
regs->nip = orig_ret_address;
+   /*
+* Make LR point to the orig_ret_address.
+* When the 'nop' inside the kretprobe_trampoline
+* is optimized, we can do a 'blr' after executing the
+* detour buffer code.
+*/
+   regs->link = orig_ret_address;
 
reset_current_kprobe();
kretprobe_hash_unlock(current, );
diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c
index ecba221..5e4c254 100644
--- a/arch/powerpc/kernel/optprobes.c
+++ b/arch/powerpc/kernel/optprobes.c
@@ -72,12 +72,11 @@ static unsigned long can_optimize(struct kprobe *p)
 
/*
 * kprobe placed for kretprobe during boot time
-* is not optimizing now.
-*
-* TODO: Optimize kprobe in kretprobe_trampoline
+* has a 'nop' instruction, which can be emulated.
+* So further checks can be skipped.
 */
if (p->addr == (kprobe_opcode_t *)_trampoline)
-   return 0;
+   return (unsigned long)p->addr + sizeof(kprobe_opcode_t);
 
/*
 * We only support optimizing kernel addresses, but not
-- 
2.7.4



[PATCH V2 3/4] arch/powerpc: Implement Optprobes

2016-12-14 Thread Anju T Sudhakar
Detour buffer contains instructions to create an in memory pt_regs.
After the execution of the pre-handler, a call is made for instruction 
emulation.
The NIP is determined in advanced through dummy instruction emulation and a 
branch
instruction is created to the NIP at the end of the trampoline.

Instruction slot for detour buffer is allocated from the reserved area.
For the time being, 64KB is reserved in memory for this purpose.

Instructions which can be emulated using analyse_instr() are suppliants
for optimization. Before optimization ensure that the address range
between the detour buffer allocated and the instruction being probed
is within ?? 32MB.

Signed-off-by: Anju T Sudhakar <a...@linux.vnet.ibm.com>
Signed-off-by: Naveen N. Rao <naveen.n@linux.vnet.ibm.com>
---
 .../features/debug/optprobes/arch-support.txt  |   2 +-
 arch/powerpc/Kconfig   |   1 +
 arch/powerpc/include/asm/kprobes.h |  23 +-
 arch/powerpc/include/asm/sstep.h   |   1 +
 arch/powerpc/kernel/Makefile   |   1 +
 arch/powerpc/kernel/optprobes.c| 333 +
 arch/powerpc/kernel/optprobes_head.S   | 135 +
 arch/powerpc/lib/sstep.c   |  22 ++
 8 files changed, 516 insertions(+), 2 deletions(-)
 create mode 100644 arch/powerpc/kernel/optprobes.c
 create mode 100644 arch/powerpc/kernel/optprobes_head.S

diff --git a/Documentation/features/debug/optprobes/arch-support.txt 
b/Documentation/features/debug/optprobes/arch-support.txt
index b8999d8..45bc99d 100644
--- a/Documentation/features/debug/optprobes/arch-support.txt
+++ b/Documentation/features/debug/optprobes/arch-support.txt
@@ -27,7 +27,7 @@
 |   nios2: | TODO |
 |openrisc: | TODO |
 |  parisc: | TODO |
-| powerpc: | TODO |
+| powerpc: |  ok  |
 |s390: | TODO |
 |   score: | TODO |
 |  sh: | TODO |
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index c7f120a..d563f0a 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -98,6 +98,7 @@ config PPC
select HAVE_IOREMAP_PROT
select HAVE_EFFICIENT_UNALIGNED_ACCESS if !(CPU_LITTLE_ENDIAN && 
POWER7_CPU)
select HAVE_KPROBES
+   select HAVE_OPTPROBES if PPC64
select HAVE_ARCH_KGDB
select HAVE_KRETPROBES
select HAVE_ARCH_TRACEHOOK
diff --git a/arch/powerpc/include/asm/kprobes.h 
b/arch/powerpc/include/asm/kprobes.h
index 2c9759bd..739ddc5 100644
--- a/arch/powerpc/include/asm/kprobes.h
+++ b/arch/powerpc/include/asm/kprobes.h
@@ -38,7 +38,22 @@ struct pt_regs;
 struct kprobe;
 
 typedef ppc_opcode_t kprobe_opcode_t;
-#define MAX_INSN_SIZE 1
+
+extern kprobe_opcode_t optinsn_slot;
+
+/* Optinsn template address */
+extern kprobe_opcode_t optprobe_template_entry[];
+extern kprobe_opcode_t optprobe_template_op_address[];
+extern kprobe_opcode_t optprobe_template_call_handler[];
+extern kprobe_opcode_t optprobe_template_insn[];
+extern kprobe_opcode_t optprobe_template_call_emulate[];
+extern kprobe_opcode_t optprobe_template_ret[];
+extern kprobe_opcode_t optprobe_template_end[];
+
+#define MAX_INSN_SIZE  1
+#define MAX_OPTIMIZED_LENGTH   4
+#define MAX_OPTINSN_SIZE   (optprobe_template_end - 
optprobe_template_entry)
+#define RELATIVEJUMP_SIZE  4
 
 #ifdef PPC64_ELF_ABI_v2
 /* PPC64 ABIv2 needs local entry point */
@@ -124,6 +139,12 @@ struct kprobe_ctlblk {
struct prev_kprobe prev_kprobe;
 };
 
+struct arch_optimized_insn {
+   kprobe_opcode_t copied_insn[1];
+   /* detour buffer */
+   kprobe_opcode_t *insn;
+};
+
 extern int kprobe_exceptions_notify(struct notifier_block *self,
unsigned long val, void *data);
 extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h
index d3a42cc..f7ad425 100644
--- a/arch/powerpc/include/asm/sstep.h
+++ b/arch/powerpc/include/asm/sstep.h
@@ -87,3 +87,4 @@ struct instruction_op {
 
 extern int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
 unsigned int instr);
+extern bool is_conditional_branch(unsigned int instr);
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 1925341..54f0f47 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -98,6 +98,7 @@ obj-$(CONFIG_KGDB)+= kgdb.o
 obj-$(CONFIG_BOOTX_TEXT)   += btext.o
 obj-$(CONFIG_SMP)  += smp.o
 obj-$(CONFIG_KPROBES)  += kprobes.o
+obj-$(CONFIG_OPTPROBES)+= optprobes.o optprobes_head.o
 obj-$(CONFIG_UPROBES)  += uprobes.o
 obj-$(CONFIG_PPC_UDBG_16550)   += legacy_serial.o udbg_16550.o
 obj-$(CONFIG_STACKTRACE)   += stacktrace.o
diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c

[PATCH V2 0/4] OPTPROBES for powerpc

2016-12-14 Thread Anju T Sudhakar
This is the V2 patchset of the kprobes jump optimization
(a.k.a OPTPROBES)for powerpc. Kprobe being an inevitable tool
for kernel developers, enhancing the performance of kprobe has
got much importance.

Currently kprobes inserts a trap instruction to probe a running kernel.
Jump optimization allows kprobes to replace the trap with a branch,
reducing the probe overhead drastically.

In this series, conditional branch instructions are not considered for
optimization as they have to be assessed carefully in SMP systems.

The kprobe placed on the kretprobe_trampoline during boot time, is also
optimized in this series. Patch 4/4 furnishes this.

The first two patches can go independently of the series. The helper 
functions in these patches are invoked in patch 3/4.

Performance:

An optimized kprobe in powerpc is 1.05 to 4.7 times faster than a kprobe.
 
Example:
 
Placed a probe at an offset 0x50 in _do_fork().
*Time Diff here is, difference in time before hitting the probe and
after the probed instruction. mftb() is employed in kernel/fork.c for
this purpose.
 
# echo 0 > /proc/sys/debug/kprobes-optimization
Kprobes globally unoptimized
 [  233.607120] Time Diff = 0x1f0
 [  233.608273] Time Diff = 0x1ee
 [  233.609228] Time Diff = 0x203
 [  233.610400] Time Diff = 0x1ec
 [  233.611335] Time Diff = 0x200
 [  233.612552] Time Diff = 0x1f0
 [  233.613386] Time Diff = 0x1ee
 [  233.614547] Time Diff = 0x212
 [  233.615570] Time Diff = 0x206
 [  233.616819] Time Diff = 0x1f3
 [  233.617773] Time Diff = 0x1ec
 [  233.618944] Time Diff = 0x1fb
 [  233.619879] Time Diff = 0x1f0
 [  233.621066] Time Diff = 0x1f9
 [  233.621999] Time Diff = 0x283
 [  233.623281] Time Diff = 0x24d
 [  233.624172] Time Diff = 0x1ea
 [  233.625381] Time Diff = 0x1f0
 [  233.626358] Time Diff = 0x200
 [  233.627572] Time Diff = 0x1ed
 
# echo 1 > /proc/sys/debug/kprobes-optimization
Kprobes globally optimized
 [   70.797075] Time Diff = 0x103
 [   70.799102] Time Diff = 0x181
 [   70.801861] Time Diff = 0x15e
 [   70.803466] Time Diff = 0xf0
 [   70.804348] Time Diff = 0xd0
 [   70.805653] Time Diff = 0xad
 [   70.806477] Time Diff = 0xe0
 [   70.807725] Time Diff = 0xbe
 [   70.808541] Time Diff = 0xc3
 [   70.810191] Time Diff = 0xc7
 [   70.811007] Time Diff = 0xc0
 [   70.812629] Time Diff = 0xc0
 [   70.813640] Time Diff = 0xda
 [   70.814915] Time Diff = 0xbb
 [   70.815726] Time Diff = 0xc4
 [   70.816955] Time Diff = 0xc0
 [   70.817778] Time Diff = 0xcd
 [   70.818999] Time Diff = 0xcd
 [   70.820099] Time Diff = 0xcb
 [   70.821333] Time Diff = 0xf0

Implementation:
===
 
The trap instruction is replaced by a branch to a detour buffer. To address
the limitation of branch instruction in power architecture, detour buffer
slot is allocated from a reserved area . This will ensure that the branch
is within ?? 32 MB range. The current kprobes insn caches allocate memory 
area for insn slots with module_alloc(). This will always be beyond 
?? 32MB range.
 
The detour buffer contains a call to optimized_callback() which in turn
call the pre_handler(). Once the pre-handler is run, the original
instruction is emulated from the detour buffer itself. Also the detour
buffer is equipped with a branch back to the normal work flow after the
probed instruction is emulated. Before preparing optimization, Kprobes
inserts original(breakpoint instruction)kprobe on the specified address.
So, even if the kprobe is not possible to be optimized, it just uses a
normal kprobe.
 
Limitations:
==
- Number of probes which can be optimized is limited by the size of the
  area reserved.
- Currently instructions which can be emulated using analyse_instr() are 
  the only candidates for optimization.
- Conditional branch instructions are not optimized.
- Probes on kernel module region are not considered for optimization now.
 
Link for the V1 patchset: 
https://lkml.org/lkml/2016/9/7/171
https://lkml.org/lkml/2016/9/7/174
https://lkml.org/lkml/2016/9/7/172
https://lkml.org/lkml/2016/9/7/173

Changes from v1:

- Merged the three patches in V1 into a single patch.
- Comments by Masami are addressed.
- Some helper functions are implemented in separate patches.
- Optimization for kprobe placed on the kretprobe_trampoline during
  boot time is implemented.


Kindly let me know your suggestions and comments.

Thanks,
-Anju


Anju T Sudhakar (2):
  arch/powerpc: Implement Optprobes
  arch/powerpc: Optimize kprobe in kretprobe_trampoline

Naveen N. Rao (2):
  powerpc: asm/ppc-opcode.h: introduce __PPC_SH64()
  powerpc: add helper to check if offset is within rel branch range

 .../features/debug/optprobes/arch-support.txt  |   2 +-
 arch/powerpc/Kconfig   |   1 +
 arch/powerpc/include/asm/code-patching.h   |   1 +
 arch/powerpc/include/asm/kprobes.h |  23 +-
 arch/powerpc/i

Re: [PATCH 2/3] arch/powerpc : optprobes for powerpc core

2016-09-09 Thread Anju T Sudhakar

Hi Masami,


Thank you for reviewing the patch.


On Thursday 08 September 2016 10:17 PM, Masami Hiramatsu wrote:

On Wed,  7 Sep 2016 15:03:11 +0530
Anju T Sudhakar <a...@linux.vnet.ibm.com> wrote:


Instructions which can be emulated are suppliants for optimization.
Before optimization ensure that the address range between the detour
buffer allocated and the instruction being probed is within ± 32MB.

Signed-off-by: Anju T Sudhakar <a...@linux.vnet.ibm.com>
---
  arch/powerpc/include/asm/sstep.h |   1 +
  arch/powerpc/kernel/optprobes.c  | 329 +++
  arch/powerpc/lib/sstep.c |  21 +++
  3 files changed, 351 insertions(+)
  create mode 100644 arch/powerpc/kernel/optprobes.c

diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h
index d3a42cc..cd5f6ab 100644
--- a/arch/powerpc/include/asm/sstep.h
+++ b/arch/powerpc/include/asm/sstep.h
@@ -25,6 +25,7 @@ struct pt_regs;
  
  /* Emulate instructions that cause a transfer of control. */

  extern int emulate_step(struct pt_regs *regs, unsigned int instr);
+extern int optprobe_conditional_branch_check(unsigned int instr);
  
  enum instruction_type {

COMPUTE,/* arith/logical/CR op, etc. */
diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c
new file mode 100644
index 000..7983d07
--- /dev/null
+++ b/arch/powerpc/kernel/optprobes.c
@@ -0,0 +1,329 @@
+/*
+ * Code for Kernel probes Jump optimization.
+ *
+ * Copyright 2016, Anju T, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+DEFINE_INSN_CACHE_OPS(ppc_optinsn)
+
+#define TMPL_CALL_HDLR_IDX \
+   (optprobe_template_call_handler - optprobe_template_entry)
+#define TMPL_EMULATE_IDX   \
+   (optprobe_template_call_emulate - optprobe_template_entry)
+#define TMPL_RET_IDX   \
+   (optprobe_template_ret - optprobe_template_entry)
+#define TMPL_KP_IDX\
+   (optprobe_template_kp_addr - optprobe_template_entry)
+#define TMPL_OP1_IDX   \
+   (optprobe_template_op_address1 - optprobe_template_entry)
+#define TMPL_INSN_IDX  \
+   (optprobe_template_insn - optprobe_template_entry)
+#define TMPL_END_IDX   \
+   (optprobe_template_end - optprobe_template_entry)
+
+static bool insn_page_in_use;
+
+static void *__ppc_alloc_insn_page(void)
+{
+   if (insn_page_in_use)
+   return NULL;
+   insn_page_in_use = true;
+   return _slot;
+}
+
+static void __ppc_free_insn_page(void *page __maybe_unused)
+{
+   insn_page_in_use = false;
+}
+
+struct kprobe_insn_cache kprobe_ppc_optinsn_slots = {
+   .mutex = __MUTEX_INITIALIZER(kprobe_ppc_optinsn_slots.mutex),
+   .pages = LIST_HEAD_INIT(kprobe_ppc_optinsn_slots.pages),
+   /* insn_size initialized later */
+   .alloc = __ppc_alloc_insn_page,
+   .free = __ppc_free_insn_page,
+   .nr_garbage = 0,
+};
+
+kprobe_opcode_t *ppc_get_optinsn_slot(struct optimized_kprobe *op)
+{
+   /*
+* The insn slot is allocated from the reserved
+* area(ie _slot).We are not optimizing probes
+* at module_addr now.
+*/
+   if (is_kernel_addr((unsigned long)op->kp.addr))
+   return get_ppc_optinsn_slot();
+   return NULL;
+}
+
+static void ppc_free_optinsn_slot(struct optimized_kprobe *op)
+{
+   if (!op->optinsn.insn)
+   return;
+   if (is_kernel_addr((unsigned long)op->kp.addr))
+   free_ppc_optinsn_slot(op->optinsn.insn, 0);
+}
+
+static unsigned long can_optimize(struct kprobe *p)
+{
+   struct pt_regs *regs;
+   unsigned int instr;
+
+   /*
+* Not optimizing the kprobe placed by
+* kretprobe during boot time
+*/
+   if (p->addr == (kprobe_opcode_t *)_trampoline)
+   return 0;
+
+   regs = kmalloc(sizeof(*regs), GFP_KERNEL);
+   if (!regs)
+   return -ENOMEM;
+   memset(regs, 0, sizeof(struct pt_regs));
+   memcpy(regs, current_pt_regs(), sizeof(struct pt_regs));
+   regs->nip = (unsigned long)p->addr;
+   instr = *p->ainsn.insn;
+
+   /* Ensure the instruction can be emulated */
+   if (emulate_step(regs, instr) != 1)
+   return 0;
+   /* Conditional branches are not optimized */
+   if (optprobe_conditional_branch_check(instr) != 1)
+   return 0;
+   return regs->nip;

Could you free regs here? Or allocate it on stack.


yes. 'regs' can be freed here.



+}
+
+static void
+optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
+{
+   struct kprobe_ctlblk *kcb = get_kprob

[PATCH 2/3] arch/powerpc : optprobes for powerpc core

2016-09-07 Thread Anju T Sudhakar
Instructions which can be emulated are suppliants for optimization.
Before optimization ensure that the address range between the detour
buffer allocated and the instruction being probed is within ?? 32MB.

Signed-off-by: Anju T Sudhakar <a...@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/sstep.h |   1 +
 arch/powerpc/kernel/optprobes.c  | 329 +++
 arch/powerpc/lib/sstep.c |  21 +++
 3 files changed, 351 insertions(+)
 create mode 100644 arch/powerpc/kernel/optprobes.c

diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h
index d3a42cc..cd5f6ab 100644
--- a/arch/powerpc/include/asm/sstep.h
+++ b/arch/powerpc/include/asm/sstep.h
@@ -25,6 +25,7 @@ struct pt_regs;
 
 /* Emulate instructions that cause a transfer of control. */
 extern int emulate_step(struct pt_regs *regs, unsigned int instr);
+extern int optprobe_conditional_branch_check(unsigned int instr);
 
 enum instruction_type {
COMPUTE,/* arith/logical/CR op, etc. */
diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c
new file mode 100644
index 000..7983d07
--- /dev/null
+++ b/arch/powerpc/kernel/optprobes.c
@@ -0,0 +1,329 @@
+/*
+ * Code for Kernel probes Jump optimization.
+ *
+ * Copyright 2016, Anju T, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+DEFINE_INSN_CACHE_OPS(ppc_optinsn)
+
+#define TMPL_CALL_HDLR_IDX \
+   (optprobe_template_call_handler - optprobe_template_entry)
+#define TMPL_EMULATE_IDX   \
+   (optprobe_template_call_emulate - optprobe_template_entry)
+#define TMPL_RET_IDX   \
+   (optprobe_template_ret - optprobe_template_entry)
+#define TMPL_KP_IDX\
+   (optprobe_template_kp_addr - optprobe_template_entry)
+#define TMPL_OP1_IDX   \
+   (optprobe_template_op_address1 - optprobe_template_entry)
+#define TMPL_INSN_IDX  \
+   (optprobe_template_insn - optprobe_template_entry)
+#define TMPL_END_IDX   \
+   (optprobe_template_end - optprobe_template_entry)
+
+static bool insn_page_in_use;
+
+static void *__ppc_alloc_insn_page(void)
+{
+   if (insn_page_in_use)
+   return NULL;
+   insn_page_in_use = true;
+   return _slot;
+}
+
+static void __ppc_free_insn_page(void *page __maybe_unused)
+{
+   insn_page_in_use = false;
+}
+
+struct kprobe_insn_cache kprobe_ppc_optinsn_slots = {
+   .mutex = __MUTEX_INITIALIZER(kprobe_ppc_optinsn_slots.mutex),
+   .pages = LIST_HEAD_INIT(kprobe_ppc_optinsn_slots.pages),
+   /* insn_size initialized later */
+   .alloc = __ppc_alloc_insn_page,
+   .free = __ppc_free_insn_page,
+   .nr_garbage = 0,
+};
+
+kprobe_opcode_t *ppc_get_optinsn_slot(struct optimized_kprobe *op)
+{
+   /*
+* The insn slot is allocated from the reserved
+* area(ie _slot).We are not optimizing probes
+* at module_addr now.
+*/
+   if (is_kernel_addr((unsigned long)op->kp.addr))
+   return get_ppc_optinsn_slot();
+   return NULL;
+}
+
+static void ppc_free_optinsn_slot(struct optimized_kprobe *op)
+{
+   if (!op->optinsn.insn)
+   return;
+   if (is_kernel_addr((unsigned long)op->kp.addr))
+   free_ppc_optinsn_slot(op->optinsn.insn, 0);
+}
+
+static unsigned long can_optimize(struct kprobe *p)
+{
+   struct pt_regs *regs;
+   unsigned int instr;
+
+   /*
+* Not optimizing the kprobe placed by
+* kretprobe during boot time
+*/
+   if (p->addr == (kprobe_opcode_t *)_trampoline)
+   return 0;
+
+   regs = kmalloc(sizeof(*regs), GFP_KERNEL);
+   if (!regs)
+   return -ENOMEM;
+   memset(regs, 0, sizeof(struct pt_regs));
+   memcpy(regs, current_pt_regs(), sizeof(struct pt_regs));
+   regs->nip = (unsigned long)p->addr;
+   instr = *p->ainsn.insn;
+
+   /* Ensure the instruction can be emulated */
+   if (emulate_step(regs, instr) != 1)
+   return 0;
+   /* Conditional branches are not optimized */
+   if (optprobe_conditional_branch_check(instr) != 1)
+   return 0;
+   return regs->nip;
+}
+
+static void
+optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
+{
+   struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+   unsigned long flags;
+
+   local_irq_save(flags);
+
+   if (kprobe_running()) {
+   kprobes_inc_nmissed_count(>kp);
+   } else {
+   __this_cpu_write(current_kprobe, >kp);
+   kcb->kprobe_status = KPROBE_HIT_ACTIVE;

[PATCH 3/3] arch/powerpc : Enable optprobes support in powerpc

2016-09-07 Thread Anju T Sudhakar
Mark optprobe 'ok' for powerpc

Signed-off-by: Anju T Sudhakar <a...@linux.vnet.ibm.com>
---
 Documentation/features/debug/optprobes/arch-support.txt | 2 +-
 arch/powerpc/Kconfig| 1 +
 arch/powerpc/kernel/Makefile| 1 +
 3 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/Documentation/features/debug/optprobes/arch-support.txt 
b/Documentation/features/debug/optprobes/arch-support.txt
index b8999d8..45bc99d 100644
--- a/Documentation/features/debug/optprobes/arch-support.txt
+++ b/Documentation/features/debug/optprobes/arch-support.txt
@@ -27,7 +27,7 @@
 |   nios2: | TODO |
 |openrisc: | TODO |
 |  parisc: | TODO |
-| powerpc: | TODO |
+| powerpc: |  ok  |
 |s390: | TODO |
 |   score: | TODO |
 |  sh: | TODO |
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index a5e0b47..136ca35 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -104,6 +104,7 @@ config PPC
select HAVE_IOREMAP_PROT
select HAVE_EFFICIENT_UNALIGNED_ACCESS if !CPU_LITTLE_ENDIAN
select HAVE_KPROBES
+   select HAVE_OPTPROBES
select HAVE_ARCH_KGDB
select HAVE_KRETPROBES
select HAVE_ARCH_TRACEHOOK
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index fe4c075..33667d3 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -98,6 +98,7 @@ endif
 obj-$(CONFIG_BOOTX_TEXT)   += btext.o
 obj-$(CONFIG_SMP)  += smp.o
 obj-$(CONFIG_KPROBES)  += kprobes.o
+obj-$(CONFIG_OPTPROBES)+= optprobes.o optprobes_head.o
 obj-$(CONFIG_UPROBES)  += uprobes.o
 obj-$(CONFIG_PPC_UDBG_16550)   += legacy_serial.o udbg_16550.o
 obj-$(CONFIG_STACKTRACE)   += stacktrace.o
-- 
2.7.4



[PATCH 1/3] arch/powerpc : Add detour buffer support for optprobes

2016-09-07 Thread Anju T Sudhakar
Detour buffer contains instructions to create an in memory pt_regs.
After the execution of prehandler a call is made for instruction emulation.
The NIP is decided after the probed instruction is executed. Hence a branch
instruction is created to the NIP returned by emulate_step().

Instruction slot for detour buffer is allocated from the reserved area.
For the time being 64KB is reserved in memory for this purpose.

Signed-off-by: Anju T Sudhakar <a...@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/kprobes.h   |  24 +++
 arch/powerpc/kernel/optprobes_head.S | 119 +++
 2 files changed, 143 insertions(+)
 create mode 100644 arch/powerpc/kernel/optprobes_head.S

diff --git a/arch/powerpc/include/asm/kprobes.h 
b/arch/powerpc/include/asm/kprobes.h
index 2c9759bd..2109ce03 100644
--- a/arch/powerpc/include/asm/kprobes.h
+++ b/arch/powerpc/include/asm/kprobes.h
@@ -38,7 +38,25 @@ struct pt_regs;
 struct kprobe;
 
 typedef ppc_opcode_t kprobe_opcode_t;
+
+extern kprobe_opcode_t optinsn_slot;
+/* Optinsn template address */
+extern kprobe_opcode_t optprobe_template_entry[];
+extern kprobe_opcode_t optprobe_template_call_handler[];
+extern kprobe_opcode_t optprobe_template_call_emulate[];
+extern kprobe_opcode_t optprobe_template_ret[];
+extern kprobe_opcode_t optprobe_template_insn[];
+extern kprobe_opcode_t optprobe_template_kp_addr[];
+extern kprobe_opcode_t optprobe_template_op_address1[];
+extern kprobe_opcode_t optprobe_template_end[];
+
 #define MAX_INSN_SIZE 1
+#define MAX_OPTIMIZED_LENGTH   4
+#define MAX_OPTINSN_SIZE   \
+   (((unsigned long)_template_end -   \
+   (unsigned long)_template_entry) /  \
+   sizeof(kprobe_opcode_t))
+#define RELATIVEJUMP_SIZE  4
 
 #ifdef PPC64_ELF_ABI_v2
 /* PPC64 ABIv2 needs local entry point */
@@ -124,6 +142,12 @@ struct kprobe_ctlblk {
struct prev_kprobe prev_kprobe;
 };
 
+struct arch_optimized_insn {
+   kprobe_opcode_t copied_insn[1];
+   /* detour buffer */
+   kprobe_opcode_t *insn;
+};
+
 extern int kprobe_exceptions_notify(struct notifier_block *self,
unsigned long val, void *data);
 extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
diff --git a/arch/powerpc/kernel/optprobes_head.S 
b/arch/powerpc/kernel/optprobes_head.S
new file mode 100644
index 000..73db1df
--- /dev/null
+++ b/arch/powerpc/kernel/optprobes_head.S
@@ -0,0 +1,119 @@
+/*
+ * Code to prepare detour buffer for optprobes in Kernel.
+ *
+ * Copyright 2016, Anju T, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include 
+#include 
+#include 
+
+#defineOPT_SLOT_SIZE   65536
+
+.balign2
+.global optinsn_slot
+optinsn_slot:
+   /* Reserve an area to allocate slots for detour buffer */
+   .space  OPT_SLOT_SIZE
+
+/* Create an in-memory pt_regs */
+.global optprobe_template_entry
+optprobe_template_entry:
+   stdur1,-INT_FRAME_SIZE(r1)
+   SAVE_GPR(0,r1)
+   /* Save the previous SP into stack */
+   addir0,r1,INT_FRAME_SIZE
+   std r0,GPR1(r1)
+   SAVE_10GPRS(2,r1)
+   SAVE_10GPRS(12,r1)
+   SAVE_10GPRS(22,r1)
+   /* Save SPRS */
+   mfmsr   r5
+   std r5,_MSR(r1)
+   li  r5,0
+   std r5,ORIG_GPR3(r1)
+   std r5,_TRAP(r1)
+   std r5,RESULT(r1)
+   mfctr   r5
+   std r5,_CTR(r1)
+   mflrr5
+   std r5,_LINK(r1)
+   mfspr   r5,SPRN_XER
+   std r5,_XER(r1)
+   mfcrr5
+   std r5,_CCR(r1)
+   lbz r5,PACASOFTIRQEN(r13)
+   std r5,SOFTE(r1)
+   mfdar   r5
+   std r5,_DAR(r1)
+   mfdsisr r5
+   std r5,_DSISR(r1)
+
+/* Save p->addr into stack */
+.global optprobe_template_kp_addr
+optprobe_template_kp_addr:
+   nop
+   nop
+   nop
+   nop
+   nop
+   std r3,_NIP(r1)
+
+/* Pass parameters for optimized_callback */
+.global optprobe_template_op_address1
+optprobe_template_op_address1:
+   nop
+   nop
+   nop
+   nop
+   nop
+   addir4,r1,STACK_FRAME_OVERHEAD
+
+/* Branch to optimized_callback() */
+.global optprobe_template_call_handler
+optprobe_template_call_handler:
+   nop
+   /* Pass parameters for instruction emulation */
+   addir3,r1,STACK_FRAME_OVERHEAD
+.global optprobe_template_insn
+optprobe_template_insn:
+   nop
+   nop
+
+/* Branch to instruction emulation  */
+.global optprobe_template_call_emulate
+optprobe_template_call_emulate:
+   nop
+   /* Restore the registers */
+   ld  r5,_MSR(r1)
+   mtmsr   r5
+   ld  r5,_CTR(r1)
+   mtctr   r5
+   ld  r5,_LINK(r1)
+   m

[PATCH 0/3] OPTPROBES for powerpc

2016-09-07 Thread Anju T Sudhakar
This is the patchset of the kprobes jump optimization
(a.k.a OPTPROBES)for powerpc. Kprobe being an inevitable tool
for kernel developers, enhancing the performance of kprobe has
got much importance.

Currently kprobes inserts a trap instruction to probe a running kernel.
Jump optimization allows kprobes to replace the trap with a branch,
reducing the probe overhead drastically.

In this series, conditional branch instructions are not considered for
optimization as they have to be assessed carefully in SMP systems.


Performance:
=
An optimized kprobe in powerpc is 1.05 to 4.7 times faster than a kprobe.

Example:

Placed a probe at an offset 0x50 in _do_fork().
*Time Diff here is, difference in time before hitting the probe and
after the probed instruction. mftb() is employed in kernel/fork.c for
this purpose.

# echo 0 > /proc/sys/debug/kprobes-optimization
Kprobes globally unoptimized
[  233.607120] Time Diff = 0x1f0
[  233.608273] Time Diff = 0x1ee
[  233.609228] Time Diff = 0x203
[  233.610400] Time Diff = 0x1ec
[  233.611335] Time Diff = 0x200
[  233.612552] Time Diff = 0x1f0
[  233.613386] Time Diff = 0x1ee
[  233.614547] Time Diff = 0x212
[  233.615570] Time Diff = 0x206
[  233.616819] Time Diff = 0x1f3
[  233.617773] Time Diff = 0x1ec
[  233.618944] Time Diff = 0x1fb
[  233.619879] Time Diff = 0x1f0
[  233.621066] Time Diff = 0x1f9
[  233.621999] Time Diff = 0x283
[  233.623281] Time Diff = 0x24d
[  233.624172] Time Diff = 0x1ea
[  233.625381] Time Diff = 0x1f0
[  233.626358] Time Diff = 0x200
[  233.627572] Time Diff = 0x1ed

# echo 1 > /proc/sys/debug/kprobes-optimization
Kprobes globally optimized
[   70.797075] Time Diff = 0x103
[   70.799102] Time Diff = 0x181
[   70.801861] Time Diff = 0x15e
[   70.803466] Time Diff = 0xf0
[   70.804348] Time Diff = 0xd0
[   70.805653] Time Diff = 0xad
[   70.806477] Time Diff = 0xe0
[   70.807725] Time Diff = 0xbe
[   70.808541] Time Diff = 0xc3
[   70.810191] Time Diff = 0xc7
[   70.811007] Time Diff = 0xc0
[   70.812629] Time Diff = 0xc0
[   70.813640] Time Diff = 0xda
[   70.814915] Time Diff = 0xbb
[   70.815726] Time Diff = 0xc4
[   70.816955] Time Diff = 0xc0
[   70.817778] Time Diff = 0xcd
[   70.818999] Time Diff = 0xcd
[   70.820099] Time Diff = 0xcb
[   70.821333] Time Diff = 0xf0

Implementation:
===

The trap instruction is replaced by a branch to a detour buffer. To address
the limitation of branch instruction in power architecture detour buffer
slot is allocated from a reserved area . This will ensure that the branch
is within ?? 32 MB range. Patch 2/3 furnishes this. The current kprobes
insn caches allocate memory area for insn slots with module_alloc(). This
will always be beyond ?? 32MB range.

The detour buffer contains a call to optimized_callback() which in turn
call the pre_handler(). Once the pre-handler is run, the original
instruction is emulated from the detour buffer itself. Also the detour
buffer is equipped with a branch back to the normal work flow after the
probed instruction is emulated. Before preparing optimization, Kprobes
inserts original(breakpoint instruction)kprobe on the specified address.
So, even if the kprobe is not possible to be optimized, it just uses a
normal kprobe.

Limitations:
==
- Number of probes which can be optimized is limited by the size of the
  area reserved.
- Currently instructions which can be emulated are the only candidates for
  optimization.
- Conditional branch instructions are not optimized.
- Probes on kernel module region are not considered for optimization now.

RFC patchset for optprobes: https://lkml.org/lkml/2016/5/31/375
https://lkml.org/lkml/2016/5/31/376
https://lkml.org/lkml/2016/5/31/377
https://lkml.org/lkml/2016/5/31/378 

Changes from RFC-v3 :

- Optimization for kporbe(in case of branch instructions) is limited to
  unconditional branch instructions only, since the conditional
  branches are to be assessed carefully in SMP systems.
- create_return_branch() is omitted.
- Comments by Masami are addressed.
 

Anju T Sudhakar (3):
  arch/powerpc : Add detour buffer support for optprobes
  arch/powerpc : optprobes for powerpc core
  arch/powerpc : Enable optprobes support in powerpc

 .../features/debug/optprobes/arch-support.txt  |   2 +-
 arch/powerpc/Kconfig   |   1 +
 arch/powerpc/include/asm/kprobes.h |  24 ++
 arch/powerpc/include/asm/sstep.h   |   1 +
 arch/powerpc/kernel/Makefile   |   1 +
 arch/powerpc/kernel/optprobes.c| 329 +
 arch/powerpc/kernel/optprobes_head.S   | 119 
 arch/powerpc/lib/sstep.c   |  21 ++
 8 files changed, 497 insertions(+), 1 deletion(-)
 create mode 100644 arch/powerpc/kernel/optprobes.c
 create mode 100644 arch/powerpc/kernel/optprobes_head.S

-- 
2.7.4



[RFC PATCH v4 3/3] arch/powerpc : Enable optprobes support in powerpc

2016-05-31 Thread Anju T
Signed-off-by: Anju T <a...@linux.vnet.ibm.com>
---
 Documentation/features/debug/optprobes/arch-support.txt | 2 +-
 arch/powerpc/Kconfig| 1 +
 arch/powerpc/kernel/Makefile| 1 +
 3 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/Documentation/features/debug/optprobes/arch-support.txt 
b/Documentation/features/debug/optprobes/arch-support.txt
index b8999d8..45bc99d 100644
--- a/Documentation/features/debug/optprobes/arch-support.txt
+++ b/Documentation/features/debug/optprobes/arch-support.txt
@@ -27,7 +27,7 @@
 |   nios2: | TODO |
 |openrisc: | TODO |
 |  parisc: | TODO |
-| powerpc: | TODO |
+| powerpc: |  ok  |
 |s390: | TODO |
 |   score: | TODO |
 |  sh: | TODO |
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 7cd32c0..a87c9b1 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -104,6 +104,7 @@ config PPC
select HAVE_IOREMAP_PROT
select HAVE_EFFICIENT_UNALIGNED_ACCESS if !CPU_LITTLE_ENDIAN
select HAVE_KPROBES
+   select HAVE_OPTPROBES
select HAVE_ARCH_KGDB
select HAVE_KRETPROBES
select HAVE_ARCH_TRACEHOOK
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 2da380f..7994e22 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -99,6 +99,7 @@ endif
 obj-$(CONFIG_BOOTX_TEXT)   += btext.o
 obj-$(CONFIG_SMP)  += smp.o
 obj-$(CONFIG_KPROBES)  += kprobes.o
+obj-$(CONFIG_OPTPROBES)+= optprobes.o optprobes_head.o
 obj-$(CONFIG_UPROBES)  += uprobes.o
 obj-$(CONFIG_PPC_UDBG_16550)   += legacy_serial.o udbg_16550.o
 obj-$(CONFIG_STACKTRACE)   += stacktrace.o
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[RFC PATCH v3 2/3] arch/powerpc : optprobes for powerpc core

2016-05-31 Thread Anju T
Instructions which can be emulated are suppliants for
optimization. Before optimization ensure that the address range
between the detour buffer allocated and the instruction being probed
is within +/- 32MB.

Signed-off-by: Anju T <a...@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/optprobes.c | 351 
 1 file changed, 351 insertions(+)
 create mode 100644 arch/powerpc/kernel/optprobes.c

diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c
new file mode 100644
index 000..c4253b6
--- /dev/null
+++ b/arch/powerpc/kernel/optprobes.c
@@ -0,0 +1,351 @@
+/*
+ * Code for Kernel probes Jump optimization.
+ *
+ * Copyright 2016, Anju T, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+DEFINE_INSN_CACHE_OPS(ppc_optinsn)
+
+#define TMPL_CALL_HDLR_IDX \
+   (optprobe_template_call_handler - optprobe_template_entry)
+#define TMPL_EMULATE_IDX   \
+   (optprobe_template_call_emulate - optprobe_template_entry)
+#define TMPL_RET_BRANCH_IDX\
+   (optprobe_template_ret_branch - optprobe_template_entry)
+#define TMPL_RET_IDX   \
+   (optprobe_template_ret - optprobe_template_entry)
+#define TMPL_KP_IDX\
+   (optprobe_template_kp_addr - optprobe_template_entry)
+#define TMPL_OP1_IDX   \
+   (optprobe_template_op_address1 - optprobe_template_entry)
+#define TMPL_OP2_IDX   \
+   (optprobe_template_op_address2 - optprobe_template_entry)
+#define TMPL_INSN_IDX  \
+   (optprobe_template_insn - optprobe_template_entry)
+#define TMPL_END_IDX   \
+   (optprobe_template_end - optprobe_template_entry)
+
+static unsigned long val_nip;
+
+static void *__ppc_alloc_insn_page(void)
+{
+   return _slot;
+}
+
+static void *__ppc_free_insn_page(void *page __maybe_unused)
+{
+   return;
+}
+
+struct kprobe_insn_cache kprobe_ppc_optinsn_slots = {
+   .mutex = __MUTEX_INITIALIZER(kprobe_ppc_optinsn_slots.mutex),
+   .pages = LIST_HEAD_INIT(kprobe_ppc_optinsn_slots.pages),
+   /* insn_size initialized later */
+   .alloc = __ppc_alloc_insn_page,
+   .free = __ppc_free_insn_page,
+   .nr_garbage = 0,
+};
+
+kprobe_opcode_t *ppc_get_optinsn_slot(struct optimized_kprobe *op)
+{
+   /*
+* The insn slot is allocated from the reserved
+* area(ie _slot).We are not optimizing probes
+* at module_addr now.
+*/
+   kprobe_opcode_t *slot = NULL;
+
+   if (is_kernel_addr(op->kp.addr))
+   slot = get_ppc_optinsn_slot();
+   return slot;
+}
+
+static void ppc_free_optinsn_slot(struct optimized_kprobe *op)
+{
+   if (!op->optinsn.insn)
+   return;
+   if (is_kernel_addr((unsigned long)op->kp.addr))
+   free_ppc_optinsn_slot(op->optinsn.insn, 0);
+}
+
+static void
+__arch_remove_optimized_kprobe(struct optimized_kprobe *op, int dirty)
+{
+   ppc_free_optinsn_slot(op);
+   op->optinsn.insn = NULL;
+}
+
+static int can_optimize(struct kprobe *p)
+{
+   struct pt_regs *regs;
+   unsigned int instr;
+   int r;
+
+   /*
+* Not optimizing the kprobe placed by
+* kretprobe during boot time
+*/
+   if ((kprobe_opcode_t)p->addr == (kprobe_opcode_t)_trampoline)
+   return 0;
+
+   regs = kmalloc(sizeof(*regs), GFP_KERNEL);
+   if (!regs)
+   return -ENOMEM;
+   memset(regs, 0, sizeof(struct pt_regs));
+   memcpy(regs, current_pt_regs(), sizeof(struct pt_regs));
+   regs->nip = p->addr;
+   instr = *(p->ainsn.insn);
+
+   /* Ensure the instruction can be emulated*/
+   r = emulate_step(regs, instr);
+   val_nip = regs->nip;
+   if (r != 1)
+   return 0;
+
+   return 1;
+}
+
+static void
+create_return_branch(struct optimized_kprobe *op, struct pt_regs *regs)
+{
+   /*
+* Create a branch back to the return address
+* after the probed instruction is emulated
+*/
+
+   kprobe_opcode_t branch, *buff;
+   unsigned long ret;
+
+   ret = regs->nip;
+   buff = op->optinsn.insn;
+   /*
+* TODO: For conditional branch instructions, the return
+* address may differ in SMP systems.This has to be addressed.
+*/
+
+   branch = create_branch((unsigned int *)buff + TMPL_RET_IDX,
+  (unsigned long)ret, 0);
+   buff[TMPL_RET_IDX] = branch;
+   isync();
+}
+
+static void
+optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
+{
+   struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+   unsigned 

[RFC PATCH v3 1/3] arch/powerpc : Add detour buffer support for optprobes

2016-05-31 Thread Anju T
Detour buffer contains instructions to create an in memory pt_regs.
After the execution of prehandler a call is made for instruction emulation.
The NIP is decided after the probed instruction is executed. Hence a branch
instruction is created to the NIP returned by emulate_step().

Instruction slot for detour buffer is allocated from
the reserved area. For the time being 64KB is reserved
in memory for this purpose.

Signed-off-by: Anju T <a...@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/kprobes.h   |  27 +++
 arch/powerpc/kernel/optprobes_head.S | 136 +++
 2 files changed, 163 insertions(+)
 create mode 100644 arch/powerpc/kernel/optprobes_head.S

diff --git a/arch/powerpc/include/asm/kprobes.h 
b/arch/powerpc/include/asm/kprobes.h
index 039b583..1cb2527 100644
--- a/arch/powerpc/include/asm/kprobes.h
+++ b/arch/powerpc/include/asm/kprobes.h
@@ -38,7 +38,27 @@ struct pt_regs;
 struct kprobe;
 
 typedef ppc_opcode_t kprobe_opcode_t;
+
+extern kprobe_opcode_t optinsn_slot;
+/* Optinsn template address */
+extern kprobe_opcode_t optprobe_template_entry[];
+extern kprobe_opcode_t optprobe_template_call_handler[];
+extern kprobe_opcode_t optprobe_template_call_emulate[];
+extern kprobe_opcode_t optprobe_template_ret_branch[];
+extern kprobe_opcode_t optprobe_template_ret[];
+extern kprobe_opcode_t optprobe_template_insn[];
+extern kprobe_opcode_t optprobe_template_kp_addr[];
+extern kprobe_opcode_t optprobe_template_op_address1[];
+extern kprobe_opcode_t optprobe_template_op_address2[];
+extern kprobe_opcode_t optprobe_template_end[];
+
 #define MAX_INSN_SIZE 1
+#define MAX_OPTIMIZED_LENGTH4
+#defineMAX_OPTINSN_SIZE\
+   (((unsigned long)_template_end -   \
+   (unsigned long)_template_entry) /  \
+   sizeof(kprobe_opcode_t))
+#define RELATIVEJUMP_SIZE   4
 
 #ifdef CONFIG_PPC64
 #if defined(_CALL_ELF) && _CALL_ELF == 2
@@ -129,5 +149,12 @@ struct kprobe_ctlblk {
 extern int kprobe_exceptions_notify(struct notifier_block *self,
unsigned long val, void *data);
 extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
+
+struct arch_optimized_insn {
+   kprobe_opcode_t copied_insn[1];
+   /* detour buffer */
+   kprobe_opcode_t *insn;
+};
+
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_KPROBES_H */
diff --git a/arch/powerpc/kernel/optprobes_head.S 
b/arch/powerpc/kernel/optprobes_head.S
new file mode 100644
index 000..b2536bc
--- /dev/null
+++ b/arch/powerpc/kernel/optprobes_head.S
@@ -0,0 +1,136 @@
+/*
+ * Code to prepare detour buffer for optprobes in Kernel.
+ *
+ * Copyright 2016, Anju T, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include 
+#include 
+#include 
+
+#defineOPT_SLOT_SIZE   65536
+
+.align 2
+.global optinsn_slot
+optinsn_slot:
+   /* Reserve an area to allocate slots for detour buffer */
+   .space  OPT_SLOT_SIZE
+
+/* Create an in-memory pt_regs */
+.global optprobe_template_entry
+optprobe_template_entry:
+   stdur1,-INT_FRAME_SIZE(r1)
+   SAVE_GPR(0,r1)
+   /* Save the previous SP into stack */
+   addir0,r1,INT_FRAME_SIZE
+   std 0,GPR1(r1)
+   SAVE_2GPRS(2,r1)
+   SAVE_8GPRS(4,r1)
+   SAVE_10GPRS(12,r1)
+   SAVE_10GPRS(22,r1)
+   /* Save SPRS */
+   mfmsr   r5
+   std r5,_MSR(r1)
+   li  r5,0
+   std r5,ORIG_GPR3(r1)
+   mfctr   r5
+   std r5,_CTR(r1)
+   mflrr5
+   std r5,_LINK(r1)
+   mfspr   r5,SPRN_XER
+   std r5,_XER(r1)
+   mfcrr5
+   std r5,_CCR(r1)
+   lbz r5,PACASOFTIRQEN(r13)
+   std r5,SOFTE(r1)
+   li  r5,0
+   std r5,_TRAP(r1)
+   mfdar   r5
+   std r5,_DAR(r1)
+   mfdsisr r5
+   std r5,_DSISR(r1)
+   li  r5,0
+   std r5,RESULT(r1)
+
+/* Save p->addr into stack */
+.global optprobe_template_kp_addr
+optprobe_template_kp_addr:
+   nop
+   nop
+   nop
+   nop
+   nop
+   std r3,_NIP(r1)
+
+/* Pass parameters for optimized_callback */
+.global optprobe_template_op_address1
+optprobe_template_op_address1:
+   nop
+   nop
+   nop
+   nop
+   nop
+   addir4,r1,STACK_FRAME_OVERHEAD
+
+/* Branch to the prehandler */
+.global optprobe_template_call_handler
+optprobe_template_call_handler:
+   nop
+   /* Pass parameters for instruction emulation */
+   addir3,r1,STACK_FRAME_OVERHEAD
+.global optprobe_template_insn
+optprobe_template_insn:
+   nop
+   nop
+
+/* Branch to instruction emulation  */
+.global optprobe_template_call_emulate
+optprobe_template_call_emu

[RFC PATCH v3 0/3] OPTPROBES for powerpc

2016-05-31 Thread Anju T
Here are the RFC V3 patchset of the kprobes jump optimization
(a.k.a OPTPROBES)for powerpc. Kprobe being an inevitable tool
for kernel developers,enhancing the performance of kprobe has
got much importance.

Currently kprobes inserts a trap instruction to probe a running kernel.
Jump optimization allows kprobes to replace the trap with a branch,reducing
the probe overhead drastically.

Performance:
=
An optimized kprobe in powerpc is 1.05 to 4.7 times faster than a kprobe.

Example:

Placed a probe at an offset 0x50 in _do_fork().
*Time Diff here is, difference in time before hitting the probe and after the 
probed instruction.
mftb() is employed in kernel/fork.c for this purpose.

# echo 0 > /proc/sys/debug/kprobes-optimization 
Kprobes globally unoptimized

[  233.607120] Time Diff = 0x1f0
[  233.608273] Time Diff = 0x1ee
[  233.609228] Time Diff = 0x203
[  233.610400] Time Diff = 0x1ec
[  233.611335] Time Diff = 0x200
[  233.612552] Time Diff = 0x1f0
[  233.613386] Time Diff = 0x1ee
[  233.614547] Time Diff = 0x212
[  233.615570] Time Diff = 0x206
[  233.616819] Time Diff = 0x1f3
[  233.617773] Time Diff = 0x1ec
[  233.618944] Time Diff = 0x1fb
[  233.619879] Time Diff = 0x1f0
[  233.621066] Time Diff = 0x1f9
[  233.621999] Time Diff = 0x283
[  233.623281] Time Diff = 0x24d
[  233.624172] Time Diff = 0x1ea
[  233.625381] Time Diff = 0x1f0
[  233.626358] Time Diff = 0x200
[  233.627572] Time Diff = 0x1ed

# echo 1 > /proc/sys/debug/kprobes-optimization 
Kprobes globally optimized

[   70.797075] Time Diff = 0x103
[   70.799102] Time Diff = 0x181
[   70.801861] Time Diff = 0x15e
[   70.803466] Time Diff = 0xf0
[   70.804348] Time Diff = 0xd0
[   70.805653] Time Diff = 0xad
[   70.806477] Time Diff = 0xe0
[   70.807725] Time Diff = 0xbe
[   70.808541] Time Diff = 0xc3
[   70.810191] Time Diff = 0xc7
[   70.811007] Time Diff = 0xc0
[   70.812629] Time Diff = 0xc0
[   70.813640] Time Diff = 0xda
[   70.814915] Time Diff = 0xbb
[   70.815726] Time Diff = 0xc4
[   70.816955] Time Diff = 0xc0
[   70.817778] Time Diff = 0xcd
[   70.818999] Time Diff = 0xcd
[   70.820099] Time Diff = 0xcb
[   70.821333] Time Diff = 0xf0


Implementation:
===

The trap instruction is replaced by a branch to a detour buffer.
To address the limitation of branch instruction in power architecture
detour buffer slot is allocated from a reserved area . This will ensure
that the branch is within +/- 32 MB range. Patch 2/3 furnishes this.
The current kprobes insn caches  allocate memory area for insn slots
with module_alloc(). This will always be beyond +/- 32MB range.

The detour buffer contains a call to optimized_callback() which in turn
call the pre_handler(). Once the pre-handler is run, the original instruction
is emulated from the detour buffer itself. Also the detour buffer is equipped
with a branch back to the normal work flow after the probed instruction is 
emulated.
Before preparing optimization, Kprobes inserts original(breakpoint instruction) 
kprobe on the
specified address. So, even if the kprobe is not possible to be optimized, it 
just uses
a normal kprobe.

Limitations:
==
- Number of probes which can be optimized is limited by the size of the area 
reserved.
- Currently instructions which can be emulated are the only candidates for 
optimization.
- Probes on kernel module region are not considered for optimization now.

Changes from RFC-v1:

- Detour buffer memory reservation code moved to optprobes.c
- optimized_callback() is marked as NOKPROBE_SYMBOL.
- Return NULL when there is no more slots to allocate from detour buffer.
- Other comments by Masami are addressed.


Changes from RFC-v2:

- The Come-From Address Register (CFAR) is a 64-bit
  register. When an rfebb, rfid, or rfscv instruction is
  executed, the register is set to the effective address of
  the instruction . Hence cfar register cannot be used to
  store the probed instruction address into the in memory pt_regs.
  The NIP value is stored into the stack using load instructions
  as suggested by Naveen.
- For allocating and freeing memory from the reserved area the existing 
  _get_insn_slot() and _free_insn_slot() are used with the approach suggested
  by Masami.
- CR register is also stored in the stack as suggested by Maddy.
- create_load_address_insn() in patch 2/3 is modified.
- SOFTE,ORIG_GPR3 and RESULT are also stored in stack.
- Other comments regarding the coding style are addressed.



Kindly let me know your suggestions and comments.

Thanks
-Anju


Anju T (3):
  arch/powerpc : Add detour buffer support for optprobes
  arch/powerpc : optprobes for powerpc core
  arch/powerpc : Enable optprobes support in powerpc

 .../features/debug/optprobes/arch-support.txt  |   2 +-
 arch/powerpc/Kconfig   |   1 +
 arch/powerpc/include/asm/kprobes.h |  27 ++
 arch/powerpc/kernel/Makefile   |   1 +
 arch/powerpc/kernel/optpr

Re: [RFC PATCH v2 2/3] arch/powerpc : optprobes for powerpc core

2016-05-24 Thread Anju T

Hi,
On Friday 20 May 2016 06:07 PM, Masami Hiramatsu wrote:

Hi Anju,

Please see my comments below,

On Thu, 19 May 2016 20:40:39 +0530
Anju T <a...@linux.vnet.ibm.com> wrote:


ppc_get_optinsn_slot() and ppc_free_optinsn_slot() are
geared towards the allocation and freeing of memory from
the area reserved for detour buffer.

Signed-off-by: Anju T <a...@linux.vnet.ibm.com>
---
  arch/powerpc/kernel/optprobes.c | 480 
  1 file changed, 480 insertions(+)
  create mode 100644 arch/powerpc/kernel/optprobes.c

diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c
new file mode 100644
index 000..bb61e18
--- /dev/null
+++ b/arch/powerpc/kernel/optprobes.c
@@ -0,0 +1,480 @@
+/*
+ * Code for Kernel probes Jump optimization.
+ *
+ * Copyright 2016, Anju T, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define SLOT_SIZE 65536
+#define TMPL_CALL_HDLR_IDX \
+   (optprobe_template_call_handler - optprobe_template_entry)
+#define TMPL_EMULATE_IDX   \
+   (optprobe_template_call_emulate - optprobe_template_entry)
+#define TMPL_RET_BRANCH_IDX\
+   (optprobe_template_ret_branch - optprobe_template_entry)
+#define TMPL_RET_IDX   \
+   (optprobe_template_ret - optprobe_template_entry)
+#define TMPL_OP1_IDX   \
+   (optprobe_template_op_address1 - optprobe_template_entry)
+#define TMPL_OP2_IDX   \
+   (optprobe_template_op_address2 - optprobe_template_entry)
+#define TMPL_INSN_IDX  \
+   (optprobe_template_insn - optprobe_template_entry)
+#define TMPL_END_IDX   \
+   (optprobe_template_end - optprobe_template_entry)
+
+struct kprobe_ppc_insn_page {
+   struct list_head list;
+   kprobe_opcode_t *insns; /* Page of instruction slots */
+   struct kprobe_insn_cache *cache;
+   int nused;
+   int ngarbage;
+   char slot_used[];
+};
+
+#define PPC_KPROBE_INSN_PAGE_SIZE(slots)   \
+   (offsetof(struct kprobe_ppc_insn_page, slot_used) + \
+   (sizeof(char) * (slots)))
+
+enum ppc_kprobe_slot_state {
+   SLOT_CLEAN = 0,
+   SLOT_DIRTY = 1,
+   SLOT_USED = 2,
+};
+
+static struct kprobe_insn_cache kprobe_ppc_optinsn_slots = {
+   .mutex = __MUTEX_INITIALIZER(kprobe_ppc_optinsn_slots.mutex),
+   .pages = LIST_HEAD_INIT(kprobe_ppc_optinsn_slots.pages),
+   /* .insn_size is initialized later */
+   .nr_garbage = 0,
+};
+
+static int ppc_slots_per_page(struct kprobe_insn_cache *c)
+{
+   /*
+* Here the #slots per page differs from x86 as we have
+* only 64KB reserved.
+*/
+   return SLOT_SIZE / (c->insn_size * sizeof(kprobe_opcode_t));
+}
+
+/* Return 1 if all garbages are collected, otherwise 0. */
+static int collect_one_slot(struct kprobe_ppc_insn_page *kip, int idx)
+{
+   kip->slot_used[idx] = SLOT_CLEAN;
+   kip->nused--;
+   return 0;
+}
+
+static int collect_garbage_slots(struct kprobe_insn_cache *c)
+{
+   struct kprobe_ppc_insn_page *kip, *next;
+
+   /* Ensure no-one is interrupted on the garbages */
+   synchronize_sched();
+
+   list_for_each_entry_safe(kip, next, >pages, list) {
+   int i;
+
+   if (kip->ngarbage == 0)
+   continue;
+   kip->ngarbage = 0;   /* we will collect all garbages */
+   for (i = 0; i < ppc_slots_per_page(c); i++) {
+   if (kip->slot_used[i] == SLOT_DIRTY &&
+   collect_one_slot(kip, i))
+   break;
+   }
+   }
+   c->nr_garbage = 0;
+   return 0;
+}
+
+kprobe_opcode_t  *__ppc_get_optinsn_slot(struct kprobe_insn_cache *c)
+{
+   struct kprobe_ppc_insn_page *kip;
+   kprobe_opcode_t *slot = NULL;
+
+   mutex_lock(>mutex);
+   list_for_each_entry(kip, >pages, list) {
+   if (kip->nused < ppc_slots_per_page(c)) {
+   int i;
+
+   for (i = 0; i < ppc_slots_per_page(c); i++) {
+   if (kip->slot_used[i] == SLOT_CLEAN) {
+   kip->slot_used[i] = SLOT_USED;
+   kip->nused++;
+   slot = kip->insns + (i * c->insn_size);
+   goto out;
+   }
+   }
+   /* kip->nused reached max value. */
+   kip->nused = ppc_slots_per_page(c);
+   WARN_ON(1);
+

[RFC PATCH v2 3/3] arch/powerpc : Enable optprobes support in powerpc

2016-05-19 Thread Anju T
Signed-off-by: Anju T <a...@linux.vnet.ibm.com>
---
 Documentation/features/debug/optprobes/arch-support.txt | 2 +-
 arch/powerpc/Kconfig| 1 +
 arch/powerpc/kernel/Makefile| 1 +
 3 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/Documentation/features/debug/optprobes/arch-support.txt 
b/Documentation/features/debug/optprobes/arch-support.txt
index b8999d8..45bc99d 100644
--- a/Documentation/features/debug/optprobes/arch-support.txt
+++ b/Documentation/features/debug/optprobes/arch-support.txt
@@ -27,7 +27,7 @@
 |   nios2: | TODO |
 |openrisc: | TODO |
 |  parisc: | TODO |
-| powerpc: | TODO |
+| powerpc: |  ok  |
 |s390: | TODO |
 |   score: | TODO |
 |  sh: | TODO |
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 7cd32c0..a87c9b1 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -104,6 +104,7 @@ config PPC
select HAVE_IOREMAP_PROT
select HAVE_EFFICIENT_UNALIGNED_ACCESS if !CPU_LITTLE_ENDIAN
select HAVE_KPROBES
+   select HAVE_OPTPROBES
select HAVE_ARCH_KGDB
select HAVE_KRETPROBES
select HAVE_ARCH_TRACEHOOK
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 2da380f..7994e22 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -99,6 +99,7 @@ endif
 obj-$(CONFIG_BOOTX_TEXT)   += btext.o
 obj-$(CONFIG_SMP)  += smp.o
 obj-$(CONFIG_KPROBES)  += kprobes.o
+obj-$(CONFIG_OPTPROBES)+= optprobes.o optprobes_head.o
 obj-$(CONFIG_UPROBES)  += uprobes.o
 obj-$(CONFIG_PPC_UDBG_16550)   += legacy_serial.o udbg_16550.o
 obj-$(CONFIG_STACKTRACE)   += stacktrace.o
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[RFC PATCH v2 2/3] arch/powerpc : optprobes for powerpc core

2016-05-19 Thread Anju T
ppc_get_optinsn_slot() and ppc_free_optinsn_slot() are
geared towards the allocation and freeing of memory from 
the area reserved for detour buffer.

Signed-off-by: Anju T <a...@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/optprobes.c | 480 
 1 file changed, 480 insertions(+)
 create mode 100644 arch/powerpc/kernel/optprobes.c

diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c
new file mode 100644
index 000..bb61e18
--- /dev/null
+++ b/arch/powerpc/kernel/optprobes.c
@@ -0,0 +1,480 @@
+/*
+ * Code for Kernel probes Jump optimization.
+ *
+ * Copyright 2016, Anju T, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define SLOT_SIZE 65536
+#define TMPL_CALL_HDLR_IDX \
+   (optprobe_template_call_handler - optprobe_template_entry)
+#define TMPL_EMULATE_IDX   \
+   (optprobe_template_call_emulate - optprobe_template_entry)
+#define TMPL_RET_BRANCH_IDX\
+   (optprobe_template_ret_branch - optprobe_template_entry)
+#define TMPL_RET_IDX   \
+   (optprobe_template_ret - optprobe_template_entry)
+#define TMPL_OP1_IDX   \
+   (optprobe_template_op_address1 - optprobe_template_entry)
+#define TMPL_OP2_IDX   \
+   (optprobe_template_op_address2 - optprobe_template_entry)
+#define TMPL_INSN_IDX  \
+   (optprobe_template_insn - optprobe_template_entry)
+#define TMPL_END_IDX   \
+   (optprobe_template_end - optprobe_template_entry)
+
+struct kprobe_ppc_insn_page {
+   struct list_head list;
+   kprobe_opcode_t *insns; /* Page of instruction slots */
+   struct kprobe_insn_cache *cache;
+   int nused;
+   int ngarbage;
+   char slot_used[];
+};
+
+#define PPC_KPROBE_INSN_PAGE_SIZE(slots)   \
+   (offsetof(struct kprobe_ppc_insn_page, slot_used) + \
+   (sizeof(char) * (slots)))
+
+enum ppc_kprobe_slot_state {
+   SLOT_CLEAN = 0,
+   SLOT_DIRTY = 1,
+   SLOT_USED = 2,
+};
+
+static struct kprobe_insn_cache kprobe_ppc_optinsn_slots = {
+   .mutex = __MUTEX_INITIALIZER(kprobe_ppc_optinsn_slots.mutex),
+   .pages = LIST_HEAD_INIT(kprobe_ppc_optinsn_slots.pages),
+   /* .insn_size is initialized later */
+   .nr_garbage = 0,
+};
+
+static int ppc_slots_per_page(struct kprobe_insn_cache *c)
+{
+   /*
+* Here the #slots per page differs from x86 as we have
+* only 64KB reserved.
+*/
+   return SLOT_SIZE / (c->insn_size * sizeof(kprobe_opcode_t));
+}
+
+/* Return 1 if all garbages are collected, otherwise 0. */
+static int collect_one_slot(struct kprobe_ppc_insn_page *kip, int idx)
+{
+   kip->slot_used[idx] = SLOT_CLEAN;
+   kip->nused--;
+   return 0;
+}
+
+static int collect_garbage_slots(struct kprobe_insn_cache *c)
+{
+   struct kprobe_ppc_insn_page *kip, *next;
+
+   /* Ensure no-one is interrupted on the garbages */
+   synchronize_sched();
+
+   list_for_each_entry_safe(kip, next, >pages, list) {
+   int i;
+
+   if (kip->ngarbage == 0)
+   continue;
+   kip->ngarbage = 0;  /* we will collect all garbages */
+   for (i = 0; i < ppc_slots_per_page(c); i++) {
+   if (kip->slot_used[i] == SLOT_DIRTY &&
+   collect_one_slot(kip, i))
+   break;
+   }
+   }
+   c->nr_garbage = 0;
+   return 0;
+}
+
+kprobe_opcode_t  *__ppc_get_optinsn_slot(struct kprobe_insn_cache *c)
+{
+   struct kprobe_ppc_insn_page *kip;
+   kprobe_opcode_t *slot = NULL;
+
+   mutex_lock(>mutex);
+   list_for_each_entry(kip, >pages, list) {
+   if (kip->nused < ppc_slots_per_page(c)) {
+   int i;
+
+   for (i = 0; i < ppc_slots_per_page(c); i++) {
+   if (kip->slot_used[i] == SLOT_CLEAN) {
+   kip->slot_used[i] = SLOT_USED;
+   kip->nused++;
+   slot = kip->insns + (i * c->insn_size);
+   goto out;
+   }
+   }
+   /* kip->nused reached max value. */
+   kip->nused = ppc_slots_per_page(c);
+   WARN_ON(1);
+   }
+   if (!list_empty(>pages)) {
+   pr_info("No more slots to allocate\n");
+   return NULL;
+ 

[RFC PATCH v2 1/3] arch/powerpc : Add detour buffer support for optprobes

2016-05-19 Thread Anju T
Detour buffer contains instructions to create an in memory pt_regs.
After the execution of prehandler a call is made for instruction emulation.
The NIP is decided after the probed instruction is executed. Hence a branch
instruction is created to the NIP returned by emulate_step().

Instruction slot for detour buffer is allocated from
the reserved area. For the time being 64KB is reserved
in memory for this purpose.

Signed-off-by: Anju T <a...@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/kprobes.h   |  25 
 arch/powerpc/kernel/optprobes_head.S | 108 +++
 2 files changed, 133 insertions(+)
 create mode 100644 arch/powerpc/kernel/optprobes_head.S

diff --git a/arch/powerpc/include/asm/kprobes.h 
b/arch/powerpc/include/asm/kprobes.h
index 039b583..3e4c998 100644
--- a/arch/powerpc/include/asm/kprobes.h
+++ b/arch/powerpc/include/asm/kprobes.h
@@ -38,7 +38,25 @@ struct pt_regs;
 struct kprobe;
 
 typedef ppc_opcode_t kprobe_opcode_t;
+
+extern kprobe_opcode_t optinsn_slot;
+/* Optinsn template address */
+extern kprobe_opcode_t optprobe_template_entry[];
+extern kprobe_opcode_t optprobe_template_call_handler[];
+extern kprobe_opcode_t optprobe_template_call_emulate[];
+extern kprobe_opcode_t optprobe_template_ret_branch[];
+extern kprobe_opcode_t optprobe_template_ret[];
+extern kprobe_opcode_t optprobe_template_insn[];
+extern kprobe_opcode_t optprobe_template_op_address1[];
+extern kprobe_opcode_t optprobe_template_op_address2[];
+extern kprobe_opcode_t optprobe_template_end[];
+
 #define MAX_INSN_SIZE 1
+#define MAX_OPTIMIZED_LENGTH4
+#define MAX_OPTINSN_SIZE   \
+   ((unsigned long)_template_end -\
+   (unsigned long)_template_entry)
+#define RELATIVEJUMP_SIZE   4
 
 #ifdef CONFIG_PPC64
 #if defined(_CALL_ELF) && _CALL_ELF == 2
@@ -129,5 +147,12 @@ struct kprobe_ctlblk {
 extern int kprobe_exceptions_notify(struct notifier_block *self,
unsigned long val, void *data);
 extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
+
+struct arch_optimized_insn {
+   kprobe_opcode_t copied_insn[1];
+   /* detour buffer */
+   kprobe_opcode_t *insn;
+};
+
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_KPROBES_H */
diff --git a/arch/powerpc/kernel/optprobes_head.S 
b/arch/powerpc/kernel/optprobes_head.S
new file mode 100644
index 000..ce32aec
--- /dev/null
+++ b/arch/powerpc/kernel/optprobes_head.S
@@ -0,0 +1,108 @@
+/*
+ * Code to prepare detour buffer for optprobes in kernel.
+ *
+ * Copyright 2016, Anju T, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include 
+#include 
+#include 
+
+.global optinsn_slot
+optinsn_slot:
+   /* Reserve an area to allocate slots for detour buffer */
+   .space  65536
+.global optprobe_template_entry
+optprobe_template_entry:
+   stdur1,-INT_FRAME_SIZE(r1)
+   SAVE_GPR(0,r1)
+   /* Save the previous SP into stack */
+   addir0,r1,INT_FRAME_SIZE
+   std 0,GPR1(r1)
+   SAVE_2GPRS(2,r1)
+   SAVE_8GPRS(4,r1)
+   SAVE_10GPRS(12,r1)
+   SAVE_10GPRS(22,r1)
+   /* Save SPRS */
+   mfcfar  r5
+   std r5,_NIP(r1)
+   mfmsr   r5
+   std r5,_MSR(r1)
+   mfctr   r5
+   std r5,_CTR(r1)
+   mflrr5
+   std r5,_LINK(r1)
+   mfspr   r5,SPRN_XER
+   std r5,_XER(r1)
+   li  r5,0
+   std r5,_TRAP(r1)
+   mfdar   r5
+   std r5,_DAR(r1)
+   mfdsisr r5
+   std r5,_DSISR(r1)
+   /* Pass parameters for optimized_callback */
+.global optprobe_template_op_address1
+optprobe_template_op_address1:
+   nop
+   nop
+   nop
+   nop
+   nop
+   addir4,r1,STACK_FRAME_OVERHEAD
+   /* Branch to the prehandler */
+.global optprobe_template_call_handler
+optprobe_template_call_handler:
+   nop
+   /* Pass parameters for instruction emulation */
+   addir3,r1,STACK_FRAME_OVERHEAD
+.global optprobe_template_insn
+optprobe_template_insn:
+   nop
+   nop
+   /* Branch to instruction emulation  */
+.global optprobe_template_call_emulate
+optprobe_template_call_emulate:
+   nop
+.global optprobe_template_op_address2
+optprobe_template_op_address2:
+   nop
+   nop
+   nop
+   nop
+   nop
+   addir4,r1,STACK_FRAME_OVERHEAD
+   /* Branch to create_return_branch() function */
+.global optprobe_template_ret_branch
+optprobe_template_ret_branch:
+   nop
+   /* Restore the registers */
+   ld  r5,_MSR(r1)
+   mtmsr   r5
+   ld  r5,_CTR(r1)
+   mtctr   r5
+   ld  r5,_LINK(r1)
+   mtlrr5
+   ld  r5,_XER(r1)
+   

[RFC PATCH v2 0/3] OPTPROBES for powerpc

2016-05-19 Thread Anju T
Here are the RFC patchset of the kprobes jump optimization
(a.k.a OPTPROBES)for powerpc. Kprobe being an inevitable tool
for kernel developers,enhancing the performance of kprobe has
got much importance.

Currently kprobes inserts a trap instruction to probe a running kernel.
Jump optimization allows kprobes to replace the trap with a branch,reducing
the probe overhead drastically.

Performance:
=
An optimized kprobe in powerpc is 1.05 to 4.7 times faster than a kprobe.

Example:

Placed a probe at an offset 0x50 in _do_fork().
*Time Diff here is, difference in time before hitting the probe and after the 
probed instruction.
mftb() is employed in kernel/fork.c for this purpose.


# echo 0 > /proc/sys/debug/kprobes-optimization 
Kprobes globally unoptimized

[  233.607120] Time Diff = 0x1f0
[  233.608273] Time Diff = 0x1ee
[  233.609228] Time Diff = 0x203
[  233.610400] Time Diff = 0x1ec
[  233.611335] Time Diff = 0x200
[  233.612552] Time Diff = 0x1f0
[  233.613386] Time Diff = 0x1ee
[  233.614547] Time Diff = 0x212
[  233.615570] Time Diff = 0x206
[  233.616819] Time Diff = 0x1f3
[  233.617773] Time Diff = 0x1ec
[  233.618944] Time Diff = 0x1fb
[  233.619879] Time Diff = 0x1f0
[  233.621066] Time Diff = 0x1f9
[  233.621999] Time Diff = 0x283
[  233.623281] Time Diff = 0x24d
[  233.624172] Time Diff = 0x1ea
[  233.625381] Time Diff = 0x1f0
[  233.626358] Time Diff = 0x200
[  233.627572] Time Diff = 0x1ed

# echo 1 > /proc/sys/debug/kprobes-optimization 
Kprobes globally optimized

[   70.797075] Time Diff = 0x103
[   70.799102] Time Diff = 0x181
[   70.801861] Time Diff = 0x15e
[   70.803466] Time Diff = 0xf0
[   70.804348] Time Diff = 0xd0
[   70.805653] Time Diff = 0xad
[   70.806477] Time Diff = 0xe0
[   70.807725] Time Diff = 0xbe
[   70.808541] Time Diff = 0xc3
[   70.810191] Time Diff = 0xc7
[   70.811007] Time Diff = 0xc0
[   70.812629] Time Diff = 0xc0
[   70.813640] Time Diff = 0xda
[   70.814915] Time Diff = 0xbb
[   70.815726] Time Diff = 0xc4
[   70.816955] Time Diff = 0xc0
[   70.817778] Time Diff = 0xcd
[   70.818999] Time Diff = 0xcd
[   70.820099] Time Diff = 0xcb
[   70.821333] Time Diff = 0xf0

Implementation:
===

The trap instruction is replaced by a branch to a detour buffer.
To address the limitation of branch instruction in power architecture
detour buffer slot is allocated from a reserved area . This will ensure
that the branch is within +/- 32 MB range. Patch 2/3 furnishes this.
The current kprobes insn caches  allocate memory area for insn slots
with module_alloc(). This will always be beyond +/- 32MB range.
Hence for allocating and freeing  slots from this reserved area
ppc_get_optinsn_slot() and ppc_free_optinsns_slot() are introduced.

The detour buffer contains a call to optimized_callback() which in turn
call the pre_handler(). Once the pre-handler is run, the original instruction
is emulated from the detour buffer itself. Also the detour buffer is equipped
with a branch back to the normal work flow after the probed instruction is 
emulated.
Before preparing optimization, Kprobes inserts original(user-defined) kprobe on 
the
specified address. So, even if the kprobe is not possible to be optimized, it 
just uses
a normal kprobe.

Limitations:
==

- Number of probes which can be optimized is limited by the size of the area 
reserved.

* TODO: Have a template based implementation that will alleviate the 
probe count by
  using a lesser space from the reserved area for optimization.

- Currently instructions which can be emulated are the only candidates for 
optimization.



Changes from RFC-v1:
---
- Detour buffer memory reservation code moved to optprobes.c
- optimized_callback() is marked as NOKPROBE_SYMBOL.
- Return NULL when there is no more slots to allocate from detour buffer.
- Other comments by Masami are addressed.


Kindly let me know your suggestions and comments.

Thanks
-Anju


Anju T (3):
  arch/powerpc : Add detour buffer support for optprobes
  arch/powerpc : optprobes for powerpc core
  arch/powerpc : Enable optprobes support in powerpc

 .../features/debug/optprobes/arch-support.txt  |   2 +-
 arch/powerpc/Kconfig   |   1 +
 arch/powerpc/include/asm/kprobes.h |  25 ++
 arch/powerpc/kernel/Makefile   |   1 +
 arch/powerpc/kernel/optprobes.c| 474 +
 arch/powerpc/kernel/optprobes_head.S   | 108 +
 6 files changed, 610 insertions(+), 1 deletion(-)
 create mode 100644 arch/powerpc/kernel/optprobes.c
 create mode 100644 arch/powerpc/kernel/optprobes_head.S

-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [RFC PATCH 2/3] arch/powerpc : optprobes for powerpc core

2016-05-19 Thread Anju T

Hi Masami,

 Thank you for reviewing the patch.

On Wednesday 18 May 2016 08:43 PM, Masami Hiramatsu wrote:

On Wed, 18 May 2016 02:09:37 +0530
Anju T <a...@linux.vnet.ibm.com> wrote:


Instruction slot for detour buffer is allocated from
the reserved area. For the time being 64KB is reserved
in memory for this purpose. ppc_get_optinsn_slot() and
ppc_free_optinsn_slot() are geared towards the allocation and freeing
of memory from this area.

Thank you for porting optprobe on ppc!!

I have some comments on this patch.


Signed-off-by: Anju T <a...@linux.vnet.ibm.com>
---
  arch/powerpc/kernel/optprobes.c | 463 
  1 file changed, 463 insertions(+)
  create mode 100644 arch/powerpc/kernel/optprobes.c

diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c
new file mode 100644
index 000..50a60c1
--- /dev/null
+++ b/arch/powerpc/kernel/optprobes.c
@@ -0,0 +1,463 @@
+/*
+ * Code for Kernel probes Jump optimization.
+ *
+ * Copyright 2016, Anju T, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/* Reserve an area to allocate slots for detour buffer */
+extern void  optprobe_trampoline_holder(void)
+{
+   asm volatile(".global optinsn_slot\n"
+   "optinsn_slot:\n"
+   ".space 65536");
+}

Would we better move this into optprobes_head.S?


Yes. Will do.

+
+#define SLOT_SIZE 65536
+#define TMPL_CALL_HDLR_IDX \
+   (optprobe_template_call_handler - optprobe_template_entry)
+#define TMPL_EMULATE_IDX   \
+   (optprobe_template_call_emulate - optprobe_template_entry)
+#define TMPL_RET_BRANCH_IDX\
+   (optprobe_template_ret_branch - optprobe_template_entry)
+#define TMPL_RET_IDX   \
+   (optprobe_template_ret - optprobe_template_entry)
+#define TMPL_OP1_IDX   \
+   (optprobe_template_op_address1 - optprobe_template_entry)
+#define TMPL_OP2_IDX   \
+   (optprobe_template_op_address2 - optprobe_template_entry)
+#define TMPL_INSN_IDX  \
+   (optprobe_template_insn - optprobe_template_entry)
+#define TMPL_END_IDX   \
+   (optprobe_template_end - optprobe_template_entry)
+
+struct kprobe_ppc_insn_page {
+   struct list_head list;
+   kprobe_opcode_t *insns; /* Page of instruction slots */
+   struct kprobe_insn_cache *cache;
+   int nused;
+   int ngarbage;
+   char slot_used[];
+};
+
+#define PPC_KPROBE_INSN_PAGE_SIZE(slots)   \
+   (offsetof(struct kprobe_ppc_insn_page, slot_used) + \
+   (sizeof(char) * (slots)))
+
+enum ppc_kprobe_slot_state {
+   SLOT_CLEAN = 0,
+   SLOT_DIRTY = 1,
+   SLOT_USED = 2,
+};
+
+static struct kprobe_insn_cache kprobe_ppc_optinsn_slots = {
+   .mutex = __MUTEX_INITIALIZER(kprobe_ppc_optinsn_slots.mutex),
+   .pages = LIST_HEAD_INIT(kprobe_ppc_optinsn_slots.pages),
+   /* .insn_size is initialized later */
+   .nr_garbage = 0,
+};
+
+static int ppc_slots_per_page(struct kprobe_insn_cache *c)
+{
+   /*
+* Here the #slots per page differs from x86 as we have
+* only 64KB reserved.
+*/
+   return SLOT_SIZE / (c->insn_size * sizeof(kprobe_opcode_t));
+}
+
+/* Return 1 if all garbages are collected, otherwise 0. */
+static int collect_one_slot(struct kprobe_ppc_insn_page *kip, int idx)
+{
+   kip->slot_used[idx] = SLOT_CLEAN;
+   kip->nused--;
+   return 0;
+}
+
+static int collect_garbage_slots(struct kprobe_insn_cache *c)
+{
+   struct kprobe_ppc_insn_page *kip, *next;
+
+   /* Ensure no-one is interrupted on the garbages */
+   synchronize_sched();
+
+   list_for_each_entry_safe(kip, next, >pages, list) {
+   int i;
+
+   if (kip->ngarbage == 0)
+   continue;
+   kip->ngarbage = 0;   /* we will collect all garbages */
+   for (i = 0; i < ppc_slots_per_page(c); i++) {
+   if (kip->slot_used[i] == SLOT_DIRTY &&
+   collect_one_slot(kip, i))
+   break;
+   }
+   }
+   c->nr_garbage = 0;
+   return 0;
+}
+
+kprobe_opcode_t  *__ppc_get_optinsn_slot(struct kprobe_insn_cache *c)
+{
+   struct kprobe_ppc_insn_page *kip;
+   kprobe_opcode_t *slot = NULL;
+
+   mutex_lock(>mutex);
+   list_for_each_entry(kip, >pages, list) {
+   if (kip->nused < ppc_slots_per_page(c)) {
+   int i;
+
+   for (i = 0; i < ppc_slots_per_page(c); i++) {
+   if 

[RFC PATCH 3/3] arch/powerpc : Enable optprobes support in powerpc

2016-05-17 Thread Anju T
Signed-off-by: Anju T <a...@linux.vnet.ibm.com>
---
 Documentation/features/debug/optprobes/arch-support.txt | 2 +-
 arch/powerpc/Kconfig| 1 +
 arch/powerpc/kernel/Makefile| 1 +
 3 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/Documentation/features/debug/optprobes/arch-support.txt 
b/Documentation/features/debug/optprobes/arch-support.txt
index b8999d8..45bc99d 100644
--- a/Documentation/features/debug/optprobes/arch-support.txt
+++ b/Documentation/features/debug/optprobes/arch-support.txt
@@ -27,7 +27,7 @@
 |   nios2: | TODO |
 |openrisc: | TODO |
 |  parisc: | TODO |
-| powerpc: | TODO |
+| powerpc: |  ok  |
 |s390: | TODO |
 |   score: | TODO |
 |  sh: | TODO |
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 7cd32c0..a87c9b1 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -104,6 +104,7 @@ config PPC
select HAVE_IOREMAP_PROT
select HAVE_EFFICIENT_UNALIGNED_ACCESS if !CPU_LITTLE_ENDIAN
select HAVE_KPROBES
+   select HAVE_OPTPROBES
select HAVE_ARCH_KGDB
select HAVE_KRETPROBES
select HAVE_ARCH_TRACEHOOK
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 2da380f..7994e22 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -99,6 +99,7 @@ endif
 obj-$(CONFIG_BOOTX_TEXT)   += btext.o
 obj-$(CONFIG_SMP)  += smp.o
 obj-$(CONFIG_KPROBES)  += kprobes.o
+obj-$(CONFIG_OPTPROBES)+= optprobes.o optprobes_head.o
 obj-$(CONFIG_UPROBES)  += uprobes.o
 obj-$(CONFIG_PPC_UDBG_16550)   += legacy_serial.o udbg_16550.o
 obj-$(CONFIG_STACKTRACE)   += stacktrace.o
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[RFC PATCH 2/3] arch/powerpc : optprobes for powerpc core

2016-05-17 Thread Anju T
Instruction slot for detour buffer is allocated from
the reserved area. For the time being 64KB is reserved
in memory for this purpose. ppc_get_optinsn_slot() and
ppc_free_optinsn_slot() are geared towards the allocation and freeing
of memory from this area.

Signed-off-by: Anju T <a...@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/optprobes.c | 463 
 1 file changed, 463 insertions(+)
 create mode 100644 arch/powerpc/kernel/optprobes.c

diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c
new file mode 100644
index 000..50a60c1
--- /dev/null
+++ b/arch/powerpc/kernel/optprobes.c
@@ -0,0 +1,463 @@
+/*
+ * Code for Kernel probes Jump optimization.
+ *
+ * Copyright 2016, Anju T, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+/* Reserve an area to allocate slots for detour buffer */
+extern void  optprobe_trampoline_holder(void)
+{
+   asm volatile(".global optinsn_slot\n"
+   "optinsn_slot:\n"
+   ".space 65536");
+}
+
+#define SLOT_SIZE 65536
+#define TMPL_CALL_HDLR_IDX \
+   (optprobe_template_call_handler - optprobe_template_entry)
+#define TMPL_EMULATE_IDX   \
+   (optprobe_template_call_emulate - optprobe_template_entry)
+#define TMPL_RET_BRANCH_IDX\
+   (optprobe_template_ret_branch - optprobe_template_entry)
+#define TMPL_RET_IDX   \
+   (optprobe_template_ret - optprobe_template_entry)
+#define TMPL_OP1_IDX   \
+   (optprobe_template_op_address1 - optprobe_template_entry)
+#define TMPL_OP2_IDX   \
+   (optprobe_template_op_address2 - optprobe_template_entry)
+#define TMPL_INSN_IDX  \
+   (optprobe_template_insn - optprobe_template_entry)
+#define TMPL_END_IDX   \
+   (optprobe_template_end - optprobe_template_entry)
+
+struct kprobe_ppc_insn_page {
+   struct list_head list;
+   kprobe_opcode_t *insns; /* Page of instruction slots */
+   struct kprobe_insn_cache *cache;
+   int nused;
+   int ngarbage;
+   char slot_used[];
+};
+
+#define PPC_KPROBE_INSN_PAGE_SIZE(slots)   \
+   (offsetof(struct kprobe_ppc_insn_page, slot_used) + \
+   (sizeof(char) * (slots)))
+
+enum ppc_kprobe_slot_state {
+   SLOT_CLEAN = 0,
+   SLOT_DIRTY = 1,
+   SLOT_USED = 2,
+};
+
+static struct kprobe_insn_cache kprobe_ppc_optinsn_slots = {
+   .mutex = __MUTEX_INITIALIZER(kprobe_ppc_optinsn_slots.mutex),
+   .pages = LIST_HEAD_INIT(kprobe_ppc_optinsn_slots.pages),
+   /* .insn_size is initialized later */
+   .nr_garbage = 0,
+};
+
+static int ppc_slots_per_page(struct kprobe_insn_cache *c)
+{
+   /*
+* Here the #slots per page differs from x86 as we have
+* only 64KB reserved.
+*/
+   return SLOT_SIZE / (c->insn_size * sizeof(kprobe_opcode_t));
+}
+
+/* Return 1 if all garbages are collected, otherwise 0. */
+static int collect_one_slot(struct kprobe_ppc_insn_page *kip, int idx)
+{
+   kip->slot_used[idx] = SLOT_CLEAN;
+   kip->nused--;
+   return 0;
+}
+
+static int collect_garbage_slots(struct kprobe_insn_cache *c)
+{
+   struct kprobe_ppc_insn_page *kip, *next;
+
+   /* Ensure no-one is interrupted on the garbages */
+   synchronize_sched();
+
+   list_for_each_entry_safe(kip, next, >pages, list) {
+   int i;
+
+   if (kip->ngarbage == 0)
+   continue;
+   kip->ngarbage = 0;  /* we will collect all garbages */
+   for (i = 0; i < ppc_slots_per_page(c); i++) {
+   if (kip->slot_used[i] == SLOT_DIRTY &&
+   collect_one_slot(kip, i))
+   break;
+   }
+   }
+   c->nr_garbage = 0;
+   return 0;
+}
+
+kprobe_opcode_t  *__ppc_get_optinsn_slot(struct kprobe_insn_cache *c)
+{
+   struct kprobe_ppc_insn_page *kip;
+   kprobe_opcode_t *slot = NULL;
+
+   mutex_lock(>mutex);
+   list_for_each_entry(kip, >pages, list) {
+   if (kip->nused < ppc_slots_per_page(c)) {
+   int i;
+
+   for (i = 0; i < ppc_slots_per_page(c); i++) {
+   if (kip->slot_used[i] == SLOT_CLEAN) {
+   kip->slot_used[i] = SLOT_USED;
+   kip->nused++;
+   slot = kip->insns + (i * c->insn_size);
+   goto out;
+ 

[RFC PATCH 1/3] arch/powerpc : Add detour buffer support for optprobes

2016-05-17 Thread Anju T
Detour buffer contains instructions to create an in memory pt_regs.
After the execution of prehandler a call is made for instruction emulation.
The NIP is decided after the probed instruction is executed. Hence a branch
instruction is created to the NIP returned by emulate_step().

Signed-off-by: Anju T <a...@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/kprobes.h   |  25 +
 arch/powerpc/kernel/optprobes_head.S | 104 +++
 2 files changed, 129 insertions(+)
 create mode 100644 arch/powerpc/kernel/optprobes_head.S

diff --git a/arch/powerpc/include/asm/kprobes.h 
b/arch/powerpc/include/asm/kprobes.h
index 039b583..3e4c998 100644
--- a/arch/powerpc/include/asm/kprobes.h
+++ b/arch/powerpc/include/asm/kprobes.h
@@ -38,7 +38,25 @@ struct pt_regs;
 struct kprobe;
 
 typedef ppc_opcode_t kprobe_opcode_t;
+
+extern kprobe_opcode_t optinsn_slot;
+/* Optinsn template address */
+extern kprobe_opcode_t optprobe_template_entry[];
+extern kprobe_opcode_t optprobe_template_call_handler[];
+extern kprobe_opcode_t optprobe_template_call_emulate[];
+extern kprobe_opcode_t optprobe_template_ret_branch[];
+extern kprobe_opcode_t optprobe_template_ret[];
+extern kprobe_opcode_t optprobe_template_insn[];
+extern kprobe_opcode_t optprobe_template_op_address1[];
+extern kprobe_opcode_t optprobe_template_op_address2[];
+extern kprobe_opcode_t optprobe_template_end[];
+
 #define MAX_INSN_SIZE 1
+#define MAX_OPTIMIZED_LENGTH4
+#define MAX_OPTINSN_SIZE   \
+   ((unsigned long)_template_end -\
+   (unsigned long)_template_entry)
+#define RELATIVEJUMP_SIZE   4
 
 #ifdef CONFIG_PPC64
 #if defined(_CALL_ELF) && _CALL_ELF == 2
@@ -129,5 +147,12 @@ struct kprobe_ctlblk {
 extern int kprobe_exceptions_notify(struct notifier_block *self,
unsigned long val, void *data);
 extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
+
+struct arch_optimized_insn {
+   kprobe_opcode_t copied_insn[1];
+   /* detour buffer */
+   kprobe_opcode_t *insn;
+};
+
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_KPROBES_H */
diff --git a/arch/powerpc/kernel/optprobes_head.S 
b/arch/powerpc/kernel/optprobes_head.S
new file mode 100644
index 000..025bab7
--- /dev/null
+++ b/arch/powerpc/kernel/optprobes_head.S
@@ -0,0 +1,104 @@
+/*
+ * Code to prepare detour buffer for optprobes in kernel.
+ *
+ * Copyright 2016, Anju T, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include 
+#include 
+#include 
+
+.global optprobe_template_entry
+optprobe_template_entry:
+   stdur1,-INT_FRAME_SIZE(r1)
+   SAVE_GPR(0,r1)
+   /* Save the previous SP into stack */
+   addir0,r1,INT_FRAME_SIZE
+   std 0,GPR1(r1)
+   SAVE_2GPRS(2,r1)
+   SAVE_8GPRS(4,r1)
+   SAVE_10GPRS(12,r1)
+   SAVE_10GPRS(22,r1)
+   /* Save SPRS */
+   mfcfar  r5
+   std r5,_NIP(r1)
+   mfmsr   r5
+   std r5,_MSR(r1)
+   mfctr   r5
+   std r5,_CTR(r1)
+   mflrr5
+   std r5,_LINK(r1)
+   mfspr   r5,SPRN_XER
+   std r5,_XER(r1)
+   li  r5,0
+   std r5,_TRAP(r1)
+   mfdar   r5
+   std r5,_DAR(r1)
+   mfdsisr r5
+   std r5,_DSISR(r1)
+   /* Pass parameters for optimized_callback */
+.global optprobe_template_op_address1
+optprobe_template_op_address1:
+   nop
+   nop
+   nop
+   nop
+   nop
+   addir4,r1,STACK_FRAME_OVERHEAD
+   /* Branch to the prehandler */
+.global optprobe_template_call_handler
+optprobe_template_call_handler:
+   nop
+   /* Pass parameters for instruction emulation */
+   addir3,r1,STACK_FRAME_OVERHEAD
+.global optprobe_template_insn
+optprobe_template_insn:
+   nop
+   nop
+   /* Branch to instruction emulation  */
+.global optprobe_template_call_emulate
+optprobe_template_call_emulate:
+   nop
+.global optprobe_template_op_address2
+optprobe_template_op_address2:
+   nop
+   nop
+   nop
+   nop
+   nop
+   addir4,r1,STACK_FRAME_OVERHEAD
+   /* Branch to create_return_branch() function */
+.global optprobe_template_ret_branch
+optprobe_template_ret_branch:
+   nop
+   /* Restore the registers */
+   ld  r5,_MSR(r1)
+   mtmsr   r5
+   ld  r5,_CTR(r1)
+   mtctr   r5
+   ld  r5,_LINK(r1)
+   mtlrr5
+   ld  r5,_XER(r1)
+   mtxer   r5
+   ld  r5,_DAR(r1)
+   mtdar   r5
+   ld  r5,_DSISR(r1)
+   mtdsisr r5
+   REST_GPR(0,r1)
+   REST_2GPRS(2,r1)
+   REST_8GPRS(4,r1)
+   REST_10GPRS(12,r1)
+   REST_10GPRS(22,r1)
+   /* Restore t

Re: [PATCH V11 0/4]perf/powerpc: Add ability to sample intr machine state in powerpc

2016-04-18 Thread Anju T

On Saturday 20 February 2016 10:32 AM, Anju T wrote:

This short patch series adds the ability to sample the interrupted
machine state for each hardware sample.

To test this patchset,
Eg:

$ perf record -I?   # list supported registers

output:
available registers: r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 r10 r11 r12 r13 r14 r15 r16 
r17 r18 r19 r20 r21 r22 r23 r24 r25 r26 r27 r28 r29 r30 r31 nip msr orig_r3 ctr 
link xer ccr softe trap dar dsisr

  usage: perf record [] []
 or: perf record [] --  []

 -I, --intr-regs[=]
   sample selected machine registers on interrupt, use 
-I ? to list register names


$ perf record -I ls   # record machine state at interrupt
$ perf script -D  # read the perf.data file

Sample output obtained for this patchset/ output looks like as follows:

496768515470 0x1988 [0x188]: PERF_RECORD_SAMPLE(IP, 0x1): 4522/4522: 
0xc01e538c period: 1 addr: 0
... intr regs: mask 0x7ff ABI 64-bit
 r00xc01e5e34
 r10xc00fe733f9a0
 r20xc1523100
 r30xc00ffaadeb60
 r40xc3456800
 r50x73a9b5e000
 r60x1e00
 r70x0
 r80x0
 r90x0
 r10   0x1
 r11   0x0
 r12   0x24022822
 r13   0xcfeec180
 r14   0x0
 r15   0xc01e4be18800
 r16   0x0
 r17   0xc00ffaac5000
 r18   0xc00fe733f8a0
 r19   0xc1523100
 r20   0xc009fd1c
 r21   0xc00fcaa69000
 r22   0xc01e4968
 r23   0xc1523100
 r24   0xc00fe733f850
 r25   0xc00fcaa69000
 r26   0xc3b8fcf0
 r27   0xfead
 r28   0x0
 r29   0xc00fcaa69000
 r30   0x1
 r31   0x0
 nip   0xc01dd320
 msr   0x90009032
 orig_r3 0xc01e538c
 ctr   0xc009d550
 link  0xc01e5e34
 xer   0x0
 ccr   0x84022882
 softe 0x0
 trap  0xf01
 dar   0x0
 dsisr 0xf0004006004
  ... thread: :4522:4522
  .. dso: /root/.debug/.build-id/b0/ef11b1a1629e62ac9de75199117ee5ef9469e9
:4522  4522   496.768515:  1 cycles:  c01e538c 
.perf_event_context_sched_in (/boot/vmlinux)



Changes from v10:

- Included SOFTE as suggested by mpe
- The name of registers displayed is  changed from
   gpr* to r* also the macro names changed from
   PERF_REG_POWERPC_GPR* to PERF_REG_POWERPC_R*.
- The conflict in returning the ABI is resolved.
- #define PERF_REG_SP  is again changed to  PERF_REG_POWERPC_R1
- Comment in tools/perf/config/Makefile is updated.
- removed the "Reviewed-By" tag as the patch has logic changes.


Changes from V9:

- Changed the name displayed for link register from "lnk" to "link" in
   tools/perf/arch/powerpc/include/perf_regs.h

changes from V8:

- Corrected the indentation issue in the Makefile mentioned in 3rd patch

Changes from V7:

- Addressed the new line issue in 3rd patch.

Changes from V6:

- Corrected the typo in patch  tools/perf: Map the ID values with register 
names.
   ie #define PERF_REG_SP  PERF_REG_POWERPC_R1 should be #define PERF_REG_SP   
PERF_REG_POWERPC_GPR1


Changes from V5:

- Enabled perf_sample_regs_user also in this patch set.Functions added in
arch/powerpc/perf/perf_regs.c
- Added Maddy's patch to this patchset for enabling -I? option which will
   list the supported register names.


Changes from V4:

- Removed the softe and MQ from all patches
- Switch case is replaced with an array in the 3rd patch

Changes from V3:

- Addressed the comments by Sukadev regarding the nits in the descriptions.
- Modified the subject of first patch.
- Included the sample output in the 3rd patch also.

Changes from V2:

- tools/perf/config/Makefile is moved to the patch tools/perf.
- The patchset is reordered.
- perf_regs_load() function is used for the dwarf unwind test.Since it is not 
required here,
   it is removed from tools/perf/arch/powerpc/include/perf_regs.h
- PERF_REGS_POWERPC_RESULT is removed.

Changes from V1:

- Solved the name missmatch issue in the from and signed-off field of the patch 
series.
- Added necessary comments in the 3rd patch ie perf/powerpc ,as suggested by 
Maddy.



Anju T (3):
   perf/powerpc: assign an id to each powerpc register
   perf/powerpc: add support for sampling intr machine state
   tools/perf: Map the ID values with register names

Madhavan Srinivasan (1):
   tool/perf: Add sample_reg_mask to include all perf_regs regs


  arch/powerpc/Kconfig|  1 +
  arch/powerpc/include/uapi/asm/perf_regs.h   | 50 
  arch/powerpc/perf/Makefile  |  1 +
  arch/powerpc/perf/perf_regs.c   | 91 +
  tools/perf/arch/powerpc/include/perf_regs.h | 69 ++
  tools/perf/arch/powerpc/util/Build  |  1 +
  tools/perf/arch/powerpc/util/perf_regs.c| 49 +++

Re: [PATCH V11 0/4]perf/powerpc: Add ability to sample intr machine state in powerpc

2016-03-07 Thread Anju T

Hi,

Any updates on this?

On Saturday 20 February 2016 10:32 AM, Anju T wrote:


This short patch series adds the ability to sample the interrupted
machine state for each hardware sample.

To test this patchset,
Eg:

$ perf record -I?   # list supported registers

output:
available registers: r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 r10 r11 r12 r13 r14 r15 r16 
r17 r18 r19 r20 r21 r22 r23 r24 r25 r26 r27 r28 r29 r30 r31 nip msr orig_r3 ctr 
link xer ccr softe trap dar dsisr

  usage: perf record [] []
 or: perf record [] --  []

 -I, --intr-regs[=]
   sample selected machine registers on interrupt, use 
-I ? to list register names


$ perf record -I ls   # record machine state at interrupt
$ perf script -D  # read the perf.data file

Sample output obtained for this patchset/ output looks like as follows:

496768515470 0x1988 [0x188]: PERF_RECORD_SAMPLE(IP, 0x1): 4522/4522: 
0xc01e538c period: 1 addr: 0
... intr regs: mask 0x7ff ABI 64-bit
 r00xc01e5e34
 r10xc00fe733f9a0
 r20xc1523100
 r30xc00ffaadeb60
 r40xc3456800
 r50x73a9b5e000
 r60x1e00
 r70x0
 r80x0
 r90x0
 r10   0x1
 r11   0x0
 r12   0x24022822
 r13   0xcfeec180
 r14   0x0
 r15   0xc01e4be18800
 r16   0x0
 r17   0xc00ffaac5000
 r18   0xc00fe733f8a0
 r19   0xc1523100
 r20   0xc009fd1c
 r21   0xc00fcaa69000
 r22   0xc01e4968
 r23   0xc1523100
 r24   0xc00fe733f850
 r25   0xc00fcaa69000
 r26   0xc3b8fcf0
 r27   0xfead
 r28   0x0
 r29   0xc00fcaa69000
 r30   0x1
 r31   0x0
 nip   0xc01dd320
 msr   0x90009032
 orig_r3 0xc01e538c
 ctr   0xc009d550
 link  0xc01e5e34
 xer   0x0
 ccr   0x84022882
 softe 0x0
 trap  0xf01
 dar   0x0
 dsisr 0xf0004006004
  ... thread: :4522:4522
  .. dso: /root/.debug/.build-id/b0/ef11b1a1629e62ac9de75199117ee5ef9469e9
:4522  4522   496.768515:  1 cycles:  c01e538c 
.perf_event_context_sched_in (/boot/vmlinux)



Changes from v10:

- Included SOFTE as suggested by mpe
- The name of registers displayed is  changed from
   gpr* to r* also the macro names changed from
   PERF_REG_POWERPC_GPR* to PERF_REG_POWERPC_R*.
- The conflict in returning the ABI is resolved.
- #define PERF_REG_SP  is again changed to  PERF_REG_POWERPC_R1
- Comment in tools/perf/config/Makefile is updated.
- removed the "Reviewed-By" tag as the patch has logic changes.


Changes from V9:

- Changed the name displayed for link register from "lnk" to "link" in
   tools/perf/arch/powerpc/include/perf_regs.h

changes from V8:

- Corrected the indentation issue in the Makefile mentioned in 3rd patch

Changes from V7:

- Addressed the new line issue in 3rd patch.

Changes from V6:

- Corrected the typo in patch  tools/perf: Map the ID values with register 
names.
   ie #define PERF_REG_SP  PERF_REG_POWERPC_R1 should be #define PERF_REG_SP   
PERF_REG_POWERPC_GPR1


Changes from V5:

- Enabled perf_sample_regs_user also in this patch set.Functions added in
arch/powerpc/perf/perf_regs.c
- Added Maddy's patch to this patchset for enabling -I? option which will
   list the supported register names.


Changes from V4:

- Removed the softe and MQ from all patches
- Switch case is replaced with an array in the 3rd patch

Changes from V3:

- Addressed the comments by Sukadev regarding the nits in the descriptions.
- Modified the subject of first patch.
- Included the sample output in the 3rd patch also.

Changes from V2:

- tools/perf/config/Makefile is moved to the patch tools/perf.
- The patchset is reordered.
- perf_regs_load() function is used for the dwarf unwind test.Since it is not 
required here,
   it is removed from tools/perf/arch/powerpc/include/perf_regs.h
- PERF_REGS_POWERPC_RESULT is removed.

Changes from V1:

- Solved the name missmatch issue in the from and signed-off field of the patch 
series.
- Added necessary comments in the 3rd patch ie perf/powerpc ,as suggested by 
Maddy.



Anju T (3):
   perf/powerpc: assign an id to each powerpc register
   perf/powerpc: add support for sampling intr machine state
   tools/perf: Map the ID values with register names

Madhavan Srinivasan (1):
   tool/perf: Add sample_reg_mask to include all perf_regs regs


  arch/powerpc/Kconfig|  1 +
  arch/powerpc/include/uapi/asm/perf_regs.h   | 50 
  arch/powerpc/perf/Makefile  |  1 +
  arch/powerpc/perf/perf_regs.c   | 91 +
  tools/perf/arch/powerpc/include/perf_regs.h | 69 ++
  tools/perf/arch/powerpc/util/Build  |  1 +
  tools/perf/arch/powerpc/util/perf_regs.c   

[PATCH V2 4/4] tool/perf: Add sample_reg_mask to include all perf_regs

2016-02-19 Thread Anju T
From: Madhavan Srinivasan 

Add sample_reg_mask array with pt_regs registers.
This is needed for printing supported regs ( -I? option).

Signed-off-by: Madhavan Srinivasan 
---
 tools/perf/arch/powerpc/util/Build   |  1 +
 tools/perf/arch/powerpc/util/perf_regs.c | 49 
 2 files changed, 50 insertions(+)
 create mode 100644 tools/perf/arch/powerpc/util/perf_regs.c

diff --git a/tools/perf/arch/powerpc/util/Build 
b/tools/perf/arch/powerpc/util/Build
index 7b8b0d1..3deb1bc 100644
--- a/tools/perf/arch/powerpc/util/Build
+++ b/tools/perf/arch/powerpc/util/Build
@@ -1,5 +1,6 @@
 libperf-y += header.o
 libperf-y += sym-handling.o
+libperf-y += perf_regs.o
 
 libperf-$(CONFIG_DWARF) += dwarf-regs.o
 libperf-$(CONFIG_DWARF) += skip-callchain-idx.o
diff --git a/tools/perf/arch/powerpc/util/perf_regs.c 
b/tools/perf/arch/powerpc/util/perf_regs.c
new file mode 100644
index 000..df9db75
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/perf_regs.c
@@ -0,0 +1,49 @@
+#include "../../perf.h"
+#include "../../util/perf_regs.h"
+
+const struct sample_reg sample_reg_masks[] = {
+   SMPL_REG(r0, PERF_REG_POWERPC_R0),
+   SMPL_REG(r1, PERF_REG_POWERPC_R1),
+   SMPL_REG(r2, PERF_REG_POWERPC_R2),
+   SMPL_REG(r3, PERF_REG_POWERPC_R3),
+   SMPL_REG(r4, PERF_REG_POWERPC_R4),
+   SMPL_REG(r5, PERF_REG_POWERPC_R5),
+   SMPL_REG(r6, PERF_REG_POWERPC_R6),
+   SMPL_REG(r7, PERF_REG_POWERPC_R7),
+   SMPL_REG(r8, PERF_REG_POWERPC_R8),
+   SMPL_REG(r9, PERF_REG_POWERPC_R9),
+   SMPL_REG(r10, PERF_REG_POWERPC_R10),
+   SMPL_REG(r11, PERF_REG_POWERPC_R11),
+   SMPL_REG(r12, PERF_REG_POWERPC_R12),
+   SMPL_REG(r13, PERF_REG_POWERPC_R13),
+   SMPL_REG(r14, PERF_REG_POWERPC_R14),
+   SMPL_REG(r15, PERF_REG_POWERPC_R15),
+   SMPL_REG(r16, PERF_REG_POWERPC_R16),
+   SMPL_REG(r17, PERF_REG_POWERPC_R17),
+   SMPL_REG(r18, PERF_REG_POWERPC_R18),
+   SMPL_REG(r19, PERF_REG_POWERPC_R19),
+   SMPL_REG(r20, PERF_REG_POWERPC_R20),
+   SMPL_REG(r21, PERF_REG_POWERPC_R21),
+   SMPL_REG(r22, PERF_REG_POWERPC_R22),
+   SMPL_REG(r23, PERF_REG_POWERPC_R23),
+   SMPL_REG(r24, PERF_REG_POWERPC_R24),
+   SMPL_REG(r25, PERF_REG_POWERPC_R25),
+   SMPL_REG(r26, PERF_REG_POWERPC_R26),
+   SMPL_REG(r27, PERF_REG_POWERPC_R27),
+   SMPL_REG(r28, PERF_REG_POWERPC_R28),
+   SMPL_REG(r29, PERF_REG_POWERPC_R29),
+   SMPL_REG(r30, PERF_REG_POWERPC_R30),
+   SMPL_REG(r31, PERF_REG_POWERPC_R31),
+   SMPL_REG(nip, PERF_REG_POWERPC_NIP),
+   SMPL_REG(msr, PERF_REG_POWERPC_MSR),
+   SMPL_REG(orig_r3, PERF_REG_POWERPC_ORIG_R3),
+   SMPL_REG(ctr, PERF_REG_POWERPC_CTR),
+   SMPL_REG(link, PERF_REG_POWERPC_LNK),
+   SMPL_REG(xer, PERF_REG_POWERPC_XER),
+   SMPL_REG(ccr, PERF_REG_POWERPC_CCR),
+   SMPL_REG(softe, PERF_REG_POWERPC_SOFTE),
+   SMPL_REG(trap, PERF_REG_POWERPC_TRAP),
+   SMPL_REG(dar, PERF_REG_POWERPC_DAR),
+   SMPL_REG(dsisr, PERF_REG_POWERPC_DSISR),
+   SMPL_REG_END
+};
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V11 3/4] tools/perf: Map the ID values with register names

2016-02-19 Thread Anju T
Map ID values with corresponding register names. These names are then
displayed when user issues perf record with the -I option
followed by perf report/script with -D option.

To test this patchset,
Eg:
$ perf record -I ls   # record machine state at interrupt
$ perf script -D  # read the perf.data file

Sample output obtained for this patch / output looks like as follows:

496768515470 0x1988 [0x188]: PERF_RECORD_SAMPLE(IP, 0x1): 4522/4522: 
0xc01e538c period: 1 addr: 0
... intr regs: mask 0x7ff ABI 64-bit
 r00xc01e5e34
 r10xc00fe733f9a0
 r20xc1523100
 r30xc00ffaadeb60
 r40xc3456800
 r50x73a9b5e000
 r60x1e00
 r70x0
 r80x0
 r90x0
 r10   0x1
 r11   0x0
 r12   0x24022822
 r13   0xcfeec180
 r14   0x0
 r15   0xc01e4be18800
 r16   0x0
 r17   0xc00ffaac5000
 r18   0xc00fe733f8a0
 r19   0xc1523100
 r20   0xc009fd1c
 r21   0xc00fcaa69000
 r22   0xc01e4968
 r23   0xc1523100
 r24   0xc00fe733f850
 r25   0xc00fcaa69000
 r26   0xc3b8fcf0
 r27   0xfead
 r28   0x0
 r29   0xc00fcaa69000
 r30   0x1
 r31   0x0
 nip   0xc01dd320
 msr   0x90009032
 orig_r3 0xc01e538c
 ctr   0xc009d550
 link  0xc01e5e34
 xer   0x0
 ccr   0x84022882
 softe 0x0
 trap  0xf01
 dar   0x0
 dsisr 0xf0004006004
 ... thread: :4522:4522
 .. dso: /root/.debug/.build-id/b0/ef11b1a1629e62ac9de75199117ee5ef9469e9
   :4522  4522   496.768515:  1 cycles:  c01e538c 
.perf_event_context_sched_in (/boot/vmlinux)

Signed-off-by: Anju T <a...@linux.vnet.ibm.com>
---
 tools/perf/arch/powerpc/include/perf_regs.h | 69 +
 tools/perf/config/Makefile  |  5 +++
 2 files changed, 74 insertions(+)
 create mode 100644 tools/perf/arch/powerpc/include/perf_regs.h

diff --git a/tools/perf/arch/powerpc/include/perf_regs.h 
b/tools/perf/arch/powerpc/include/perf_regs.h
new file mode 100644
index 000..0b77a93
--- /dev/null
+++ b/tools/perf/arch/powerpc/include/perf_regs.h
@@ -0,0 +1,69 @@
+#ifndef ARCH_PERF_REGS_H
+#define ARCH_PERF_REGS_H
+
+#include 
+#include 
+#include 
+
+#define PERF_REGS_MASK  ((1ULL << PERF_REG_POWERPC_MAX) - 1)
+#define PERF_REGS_MAX   PERF_REG_POWERPC_MAX
+#ifdef __powerpc64__
+   #define PERF_SAMPLE_REGS_ABIPERF_SAMPLE_REGS_ABI_64
+#else
+   #define PERF_SAMPLE_REGS_ABIPERF_SAMPLE_REGS_ABI_32
+#endif
+
+#define PERF_REG_IP PERF_REG_POWERPC_NIP
+#define PERF_REG_SP PERF_REG_POWERPC_R1
+
+static const char *reg_names[] = {
+   [PERF_REG_POWERPC_R0] = "r0",
+   [PERF_REG_POWERPC_R1] = "r1",
+   [PERF_REG_POWERPC_R2] = "r2",
+   [PERF_REG_POWERPC_R3] = "r3",
+   [PERF_REG_POWERPC_R4] = "r4",
+   [PERF_REG_POWERPC_R5] = "r5",
+   [PERF_REG_POWERPC_R6] = "r6",
+   [PERF_REG_POWERPC_R7] = "r7",
+   [PERF_REG_POWERPC_R8] = "r8",
+   [PERF_REG_POWERPC_R9] = "r9",
+   [PERF_REG_POWERPC_R10] = "r10",
+   [PERF_REG_POWERPC_R11] = "r11",
+   [PERF_REG_POWERPC_R12] = "r12",
+   [PERF_REG_POWERPC_R13] = "r13",
+   [PERF_REG_POWERPC_R14] = "r14",
+   [PERF_REG_POWERPC_R15] = "r15",
+   [PERF_REG_POWERPC_R16] = "r16",
+   [PERF_REG_POWERPC_R17] = "r17",
+   [PERF_REG_POWERPC_R18] = "r18",
+   [PERF_REG_POWERPC_R19] = "r19",
+   [PERF_REG_POWERPC_R20] = "r20",
+   [PERF_REG_POWERPC_R21] = "r21",
+   [PERF_REG_POWERPC_R22] = "r22",
+   [PERF_REG_POWERPC_R23] = "r23",
+   [PERF_REG_POWERPC_R24] = "r24",
+   [PERF_REG_POWERPC_R25] = "r25",
+   [PERF_REG_POWERPC_R26] = "r26",
+   [PERF_REG_POWERPC_R27] = "r27",
+   [PERF_REG_POWERPC_R28] = "r28",
+   [PERF_REG_POWERPC_R29] = "r29",
+   [PERF_REG_POWERPC_R30] = "r30",
+   [PERF_REG_POWERPC_R31] = "r31",
+   [PERF_REG_POWERPC_NIP] = "nip",
+   [PERF_REG_POWERPC_MSR] = "msr",
+   [PERF_REG_POWERPC_ORIG_R3] = "orig_r3",
+   [PERF_REG_POWERPC_CTR] = "ctr",
+   [PERF_REG_POWERPC_LNK] = "link",
+   [PERF_REG_POWERPC_XER] = "xer",
+   [PERF_REG_POWERPC_CCR] = "ccr",
+   [PERF_REG_POWERPC_SOFTE] = "softe",
+   [PERF_REG_POWERPC_TRAP] = "trap",
+   [PERF_REG_POWERPC_DAR] = "dar",
+   [PERF_REG_POWERPC_DSISR] = "dsisr"
+};
+
+static inline 

[PATCH V11 2/4] perf/powerpc: add support for sampling intr machine state

2016-02-19 Thread Anju T
The perf infrastructure uses a bit mask to find out valid
registers to display. Define a register mask for supported
registers defined in asm/perf_regs.h. The bit positions also
correspond to register IDs which is used by perf infrastructure
to fetch the register values. CONFIG_HAVE_PERF_REGS enables
sampling of the interrupted machine state.

Signed-off-by: Anju T <a...@linux.vnet.ibm.com>
---
 arch/powerpc/Kconfig  |  1 +
 arch/powerpc/perf/Makefile|  1 +
 arch/powerpc/perf/perf_regs.c | 91 +++
 3 files changed, 93 insertions(+)
 create mode 100644 arch/powerpc/perf/perf_regs.c

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 9a7057e..c4ce60d 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -119,6 +119,7 @@ config PPC
select GENERIC_ATOMIC64 if PPC32
select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
select HAVE_PERF_EVENTS
+   select HAVE_PERF_REGS
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_HW_BREAKPOINT if PERF_EVENTS && PPC_BOOK3S_64
select ARCH_WANT_IPC_PARSE_VERSION
diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile
index f9c083a..2f2d3d2 100644
--- a/arch/powerpc/perf/Makefile
+++ b/arch/powerpc/perf/Makefile
@@ -8,6 +8,7 @@ obj64-$(CONFIG_PPC_PERF_CTRS)   += power4-pmu.o ppc970-pmu.o 
power5-pmu.o \
   power8-pmu.o
 obj32-$(CONFIG_PPC_PERF_CTRS)  += mpc7450-pmu.o
 
+obj-$(CONFIG_PERF_EVENTS)  += perf_regs.o
 obj-$(CONFIG_FSL_EMB_PERF_EVENT) += core-fsl-emb.o
 obj-$(CONFIG_FSL_EMB_PERF_EVENT_E500) += e500-pmu.o e6500-pmu.o
 
diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c
new file mode 100644
index 000..ae0759c
--- /dev/null
+++ b/arch/powerpc/perf/perf_regs.c
@@ -0,0 +1,91 @@
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define PT_REGS_OFFSET(id, r) [id] = offsetof(struct pt_regs, r)
+
+#define REG_RESERVED (~((1ULL << PERF_REG_POWERPC_MAX) - 1))
+
+static unsigned int pt_regs_offset[PERF_REG_POWERPC_MAX] = {
+   PT_REGS_OFFSET(PERF_REG_POWERPC_R0, gpr[0]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_R1, gpr[1]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_R2, gpr[2]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_R3, gpr[3]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_R4, gpr[4]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_R5, gpr[5]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_R6, gpr[6]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_R7, gpr[7]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_R8, gpr[8]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_R9, gpr[9]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_R10, gpr[10]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_R11, gpr[11]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_R12, gpr[12]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_R13, gpr[13]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_R14, gpr[14]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_R15, gpr[15]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_R16, gpr[16]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_R17, gpr[17]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_R18, gpr[18]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_R19, gpr[19]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_R20, gpr[20]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_R21, gpr[21]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_R22, gpr[22]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_R23, gpr[23]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_R24, gpr[24]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_R25, gpr[25]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_R26, gpr[26]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_R27, gpr[27]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_R28, gpr[28]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_R29, gpr[29]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_R30, gpr[30]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_R31, gpr[31]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_NIP, nip),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_MSR, msr),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_ORIG_R3, orig_gpr3),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_CTR, ctr),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_LNK, link),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_XER, xer),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_CCR, ccr),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_SOFTE, softe),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_TRAP, trap),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_DAR, dar),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_DSISR, dsisr),
+};
+
+u64 perf_reg_value(struct pt_regs *regs, int idx)
+{
+   if (WARN_ON_ONCE(idx >= PERF_REG_POWERPC_MAX))
+   return 0;
+
+   return regs_get_register(regs, pt_regs_offset[idx]);
+}
+
+int perf_reg_validate(u64 mask)
+{
+   if (!mask || mask & REG_RESERVED)
+   return -EINVAL;
+   return 0;
+}
+
+u64 perf_reg_abi(struct task_struct *task)
+{
+#ifdef __powerpc64__
+   if (!test_tsk_thread_flag(task, TIF_32BIT))
+   return PERF_SAMPLE_REGS_ABI_64;
+   

[PATCH V11 1/4] perf/powerpc: assign an id to each powerpc register

2016-02-19 Thread Anju T
The enum definition assigns an 'id' to each register in "struct pt_regs"
of arch/powerpc. The order of these values in the enum definition are
based on the corresponding macros in arch/powerpc/include/uapi/asm/ptrace.h.

Signed-off-by: Anju T <a...@linux.vnet.ibm.com>
---
 arch/powerpc/include/uapi/asm/perf_regs.h | 50 +++
 1 file changed, 50 insertions(+)
 create mode 100644 arch/powerpc/include/uapi/asm/perf_regs.h

diff --git a/arch/powerpc/include/uapi/asm/perf_regs.h 
b/arch/powerpc/include/uapi/asm/perf_regs.h
new file mode 100644
index 000..62b8a5e
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/perf_regs.h
@@ -0,0 +1,50 @@
+#ifndef _ASM_POWERPC_PERF_REGS_H
+#define _ASM_POWERPC_PERF_REGS_H
+
+enum perf_event_powerpc_regs {
+   PERF_REG_POWERPC_R0,
+   PERF_REG_POWERPC_R1,
+   PERF_REG_POWERPC_R2,
+   PERF_REG_POWERPC_R3,
+   PERF_REG_POWERPC_R4,
+   PERF_REG_POWERPC_R5,
+   PERF_REG_POWERPC_R6,
+   PERF_REG_POWERPC_R7,
+   PERF_REG_POWERPC_R8,
+   PERF_REG_POWERPC_R9,
+   PERF_REG_POWERPC_R10,
+   PERF_REG_POWERPC_R11,
+   PERF_REG_POWERPC_R12,
+   PERF_REG_POWERPC_R13,
+   PERF_REG_POWERPC_R14,
+   PERF_REG_POWERPC_R15,
+   PERF_REG_POWERPC_R16,
+   PERF_REG_POWERPC_R17,
+   PERF_REG_POWERPC_R18,
+   PERF_REG_POWERPC_R19,
+   PERF_REG_POWERPC_R20,
+   PERF_REG_POWERPC_R21,
+   PERF_REG_POWERPC_R22,
+   PERF_REG_POWERPC_R23,
+   PERF_REG_POWERPC_R24,
+   PERF_REG_POWERPC_R25,
+   PERF_REG_POWERPC_R26,
+   PERF_REG_POWERPC_R27,
+   PERF_REG_POWERPC_R28,
+   PERF_REG_POWERPC_R29,
+   PERF_REG_POWERPC_R30,
+   PERF_REG_POWERPC_R31,
+   PERF_REG_POWERPC_NIP,
+   PERF_REG_POWERPC_MSR,
+   PERF_REG_POWERPC_ORIG_R3,
+   PERF_REG_POWERPC_CTR,
+   PERF_REG_POWERPC_LNK,
+   PERF_REG_POWERPC_XER,
+   PERF_REG_POWERPC_CCR,
+   PERF_REG_POWERPC_SOFTE,
+   PERF_REG_POWERPC_TRAP,
+   PERF_REG_POWERPC_DAR,
+   PERF_REG_POWERPC_DSISR,
+   PERF_REG_POWERPC_MAX,
+};
+#endif /* _ASM_POWERPC_PERF_REGS_H */
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V11 0/4]perf/powerpc: Add ability to sample intr machine state in powerpc

2016-02-19 Thread Anju T
This short patch series adds the ability to sample the interrupted
machine state for each hardware sample.

To test this patchset,
Eg:

$ perf record -I?   # list supported registers

output:
available registers: r0 r1 r2 r3 r4 r5 r6 r7 r8 r9 r10 r11 r12 r13 r14 r15 r16 
r17 r18 r19 r20 r21 r22 r23 r24 r25 r26 r27 r28 r29 r30 r31 nip msr orig_r3 ctr 
link xer ccr softe trap dar dsisr 

 usage: perf record [] []
or: perf record [] --  []

-I, --intr-regs[=]
  sample selected machine registers on interrupt, use 
-I ? to list register names


$ perf record -I ls   # record machine state at interrupt
$ perf script -D  # read the perf.data file

Sample output obtained for this patchset/ output looks like as follows:

496768515470 0x1988 [0x188]: PERF_RECORD_SAMPLE(IP, 0x1): 4522/4522: 
0xc01e538c period: 1 addr: 0
... intr regs: mask 0x7ff ABI 64-bit
 r00xc01e5e34
 r10xc00fe733f9a0
 r20xc1523100
 r30xc00ffaadeb60
 r40xc3456800
 r50x73a9b5e000
 r60x1e00
 r70x0
 r80x0
 r90x0
 r10   0x1
 r11   0x0
 r12   0x24022822
 r13   0xcfeec180
 r14   0x0
 r15   0xc01e4be18800
 r16   0x0
 r17   0xc00ffaac5000
 r18   0xc00fe733f8a0
 r19   0xc1523100
 r20   0xc009fd1c
 r21   0xc00fcaa69000
 r22   0xc01e4968
 r23   0xc1523100
 r24   0xc00fe733f850
 r25   0xc00fcaa69000
 r26   0xc3b8fcf0
 r27   0xfead
 r28   0x0
 r29   0xc00fcaa69000
 r30   0x1
 r31   0x0
 nip   0xc01dd320
 msr   0x90009032
 orig_r3 0xc01e538c
 ctr   0xc009d550
 link  0xc01e5e34
 xer   0x0
 ccr   0x84022882
 softe 0x0
 trap  0xf01
 dar   0x0
 dsisr 0xf0004006004
 ... thread: :4522:4522
 .. dso: /root/.debug/.build-id/b0/ef11b1a1629e62ac9de75199117ee5ef9469e9
   :4522  4522   496.768515:  1 cycles:  c01e538c 
.perf_event_context_sched_in (/boot/vmlinux)



Changes from v10:

- Included SOFTE as suggested by mpe
- The name of registers displayed is  changed from
  gpr* to r* also the macro names changed from 
  PERF_REG_POWERPC_GPR* to PERF_REG_POWERPC_R*.
- The conflict in returning the ABI is resolved.
- #define PERF_REG_SP  is again changed to  PERF_REG_POWERPC_R1
- Comment in tools/perf/config/Makefile is updated.
- removed the "Reviewed-By" tag as the patch has logic changes.


Changes from V9:

- Changed the name displayed for link register from "lnk" to "link" in 
  tools/perf/arch/powerpc/include/perf_regs.h

changes from V8:

- Corrected the indentation issue in the Makefile mentioned in 3rd patch

Changes from V7:

- Addressed the new line issue in 3rd patch.

Changes from V6:

- Corrected the typo in patch  tools/perf: Map the ID values with register 
names.
  ie #define PERF_REG_SP  PERF_REG_POWERPC_R1 should be #define PERF_REG_SP   
PERF_REG_POWERPC_GPR1


Changes from V5:

- Enabled perf_sample_regs_user also in this patch set.Functions added in 
   arch/powerpc/perf/perf_regs.c
- Added Maddy's patch to this patchset for enabling -I? option which will
  list the supported register names.


Changes from V4:

- Removed the softe and MQ from all patches
- Switch case is replaced with an array in the 3rd patch

Changes from V3:

- Addressed the comments by Sukadev regarding the nits in the descriptions.
- Modified the subject of first patch.
- Included the sample output in the 3rd patch also.

Changes from V2:

- tools/perf/config/Makefile is moved to the patch tools/perf.
- The patchset is reordered.
- perf_regs_load() function is used for the dwarf unwind test.Since it is not 
required here,
  it is removed from tools/perf/arch/powerpc/include/perf_regs.h
- PERF_REGS_POWERPC_RESULT is removed.

Changes from V1:

- Solved the name missmatch issue in the from and signed-off field of the patch 
series.
- Added necessary comments in the 3rd patch ie perf/powerpc ,as suggested by 
Maddy.



Anju T (3):
  perf/powerpc: assign an id to each powerpc register
  perf/powerpc: add support for sampling intr machine state
  tools/perf: Map the ID values with register names

Madhavan Srinivasan (1):
  tool/perf: Add sample_reg_mask to include all perf_regs regs


 arch/powerpc/Kconfig|  1 +
 arch/powerpc/include/uapi/asm/perf_regs.h   | 50 
 arch/powerpc/perf/Makefile  |  1 +
 arch/powerpc/perf/perf_regs.c   | 91 +
 tools/perf/arch/powerpc/include/perf_regs.h | 69 ++
 tools/perf/arch/powerpc/util/Build  |  1 +
 tools/perf/arch/powerpc/util/perf_regs.c| 49 
 tools/perf/config/Makefile  |  5 ++
 8 files changed, 267 insertio

Re: [PATCH v10 3/4] tools/perf: Map the ID values with register names

2016-01-21 Thread Anju T

Hi mpe,
On Wednesday 20 January 2016 04:16 PM, Michael Ellerman wrote:

On Mon, 2016-01-11 at 15:58 +0530, Anju T wrote:

diff --git a/tools/perf/arch/powerpc/include/perf_regs.h 
b/tools/perf/arch/powerpc/include/perf_regs.h
new file mode 100644
index 000..93080f5
--- /dev/null
+++ b/tools/perf/arch/powerpc/include/perf_regs.h
@@ -0,0 +1,64 @@
+#ifndef ARCH_PERF_REGS_H
+#define ARCH_PERF_REGS_H
+
+#include 
+#include 
+#include 
+
+#define PERF_REGS_MASK  ((1ULL << PERF_REG_POWERPC_MAX) - 1)
+#define PERF_REGS_MAX   PERF_REG_POWERPC_MAX
+#define PERF_SAMPLE_REGS_ABI   PERF_SAMPLE_REGS_ABI_64

That looks wrong if perf is built 32-bit ?



Yes. You are right. The ABI differs for 32 bit.



+#define PERF_REG_IP PERF_REG_POWERPC_NIP
+#define PERF_REG_SP PERF_REG_POWERPC_GPR1
+
+static const char *reg_names[] = {
+   [PERF_REG_POWERPC_GPR0] = "gpr0",

Can you instead call them "r0" etc.

That is much more common on powerpc than "gpr0".


+   [PERF_REG_POWERPC_GPR1] = "gpr1",
+   [PERF_REG_POWERPC_GPR2] = "gpr2",
+   [PERF_REG_POWERPC_GPR3] = "gpr3",
+   [PERF_REG_POWERPC_GPR4] = "gpr4",
+   [PERF_REG_POWERPC_GPR5] = "gpr5",
+   [PERF_REG_POWERPC_GPR6] = "gpr6",
+   [PERF_REG_POWERPC_GPR7] = "gpr7",
+   [PERF_REG_POWERPC_GPR8] = "gpr8",
+   [PERF_REG_POWERPC_GPR9] = "gpr9",
+   [PERF_REG_POWERPC_GPR10] = "gpr10",
+   [PERF_REG_POWERPC_GPR11] = "gpr11",
+   [PERF_REG_POWERPC_GPR12] = "gpr12",
+   [PERF_REG_POWERPC_GPR13] = "gpr13",
+   [PERF_REG_POWERPC_GPR14] = "gpr14",
+   [PERF_REG_POWERPC_GPR15] = "gpr15",
+   [PERF_REG_POWERPC_GPR16] = "gpr16",
+   [PERF_REG_POWERPC_GPR17] = "gpr17",
+   [PERF_REG_POWERPC_GPR18] = "gpr18",
+   [PERF_REG_POWERPC_GPR19] = "gpr19",
+   [PERF_REG_POWERPC_GPR20] = "gpr20",
+   [PERF_REG_POWERPC_GPR21] = "gpr21",
+   [PERF_REG_POWERPC_GPR22] = "gpr22",
+   [PERF_REG_POWERPC_GPR23] = "gpr23",
+   [PERF_REG_POWERPC_GPR24] = "gpr24",
+   [PERF_REG_POWERPC_GPR25] = "gpr25",
+   [PERF_REG_POWERPC_GPR26] = "gpr26",
+   [PERF_REG_POWERPC_GPR27] = "gpr27",
+   [PERF_REG_POWERPC_GPR28] = "gpr28",
+   [PERF_REG_POWERPC_GPR29] = "gpr29",
+   [PERF_REG_POWERPC_GPR30] = "gpr30",
+   [PERF_REG_POWERPC_GPR31] = "gpr31",
+   [PERF_REG_POWERPC_NIP] = "nip",
+   [PERF_REG_POWERPC_MSR] = "msr",
+   [PERF_REG_POWERPC_ORIG_R3] = "orig_r3",
+   [PERF_REG_POWERPC_CTR] = "ctr",
+   [PERF_REG_POWERPC_LNK] = "link",
+   [PERF_REG_POWERPC_XER] = "xer",
+   [PERF_REG_POWERPC_CCR] = "ccr",
+   [PERF_REG_POWERPC_TRAP] = "trap",
+   [PERF_REG_POWERPC_DAR] = "dar",
+   [PERF_REG_POWERPC_DSISR] = "dsisr"
+};
+
+static inline const char *perf_reg_name(int id)
+{
+   return reg_names[id];
+}
+#endif /* ARCH_PERF_REGS_H */
diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile
index 38a0853..62a2f2d 100644
--- a/tools/perf/config/Makefile
+++ b/tools/perf/config/Makefile
@@ -23,6 +23,11 @@ $(call detected_var,ARCH)
  
  NO_PERF_REGS := 1
  
+# Additional ARCH settings for ppc64

+ifeq ($(ARCH),powerpc)

powerpc also includes ppc, ie. 32-bit, so the comment is wrong.



I will update the comment here  in the next patch. :)



+  NO_PERF_REGS := 0
+endif
+
  # Additional ARCH settings for x86
  ifeq ($(ARCH),x86)
$(call detected,CONFIG_X86)



Thanks and Regards

Anju

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH V10 1/4] perf/powerpc: assign an id to each powerpc register

2016-01-21 Thread Anju T

On Wednesday 20 January 2016 04:08 PM, Michael Ellerman wrote:

Hi Anju,

On Mon, 2016-01-11 at 15:58 +0530, Anju T wrote:


The enum definition assigns an 'id' to each register in "struct pt_regs"
of arch/powerpc. The order of these values in the enum definition are
based on the corresponding macros in arch/powerpc/include/uapi/asm/ptrace.h.

Sorry one thing ...


diff --git a/arch/powerpc/include/uapi/asm/perf_regs.h 
b/arch/powerpc/include/uapi/asm/perf_regs.h
new file mode 100644
index 000..cfbd068
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/perf_regs.h
@@ -0,0 +1,49 @@
+#ifndef _ASM_POWERPC_PERF_REGS_H
+#define _ASM_POWERPC_PERF_REGS_H
+
+enum perf_event_powerpc_regs {
+   PERF_REG_POWERPC_GPR0,
+   PERF_REG_POWERPC_GPR1,
+   PERF_REG_POWERPC_GPR2,
+   PERF_REG_POWERPC_GPR3,
+   PERF_REG_POWERPC_GPR4,
+   PERF_REG_POWERPC_GPR5,
+   PERF_REG_POWERPC_GPR6,
+   PERF_REG_POWERPC_GPR7,
+   PERF_REG_POWERPC_GPR8,
+   PERF_REG_POWERPC_GPR9,
+   PERF_REG_POWERPC_GPR10,
+   PERF_REG_POWERPC_GPR11,
+   PERF_REG_POWERPC_GPR12,
+   PERF_REG_POWERPC_GPR13,
+   PERF_REG_POWERPC_GPR14,
+   PERF_REG_POWERPC_GPR15,
+   PERF_REG_POWERPC_GPR16,
+   PERF_REG_POWERPC_GPR17,
+   PERF_REG_POWERPC_GPR18,
+   PERF_REG_POWERPC_GPR19,
+   PERF_REG_POWERPC_GPR20,
+   PERF_REG_POWERPC_GPR21,
+   PERF_REG_POWERPC_GPR22,
+   PERF_REG_POWERPC_GPR23,
+   PERF_REG_POWERPC_GPR24,
+   PERF_REG_POWERPC_GPR25,
+   PERF_REG_POWERPC_GPR26,
+   PERF_REG_POWERPC_GPR27,
+   PERF_REG_POWERPC_GPR28,
+   PERF_REG_POWERPC_GPR29,
+   PERF_REG_POWERPC_GPR30,
+   PERF_REG_POWERPC_GPR31,
+   PERF_REG_POWERPC_NIP,
+   PERF_REG_POWERPC_MSR,
+   PERF_REG_POWERPC_ORIG_R3,
+   PERF_REG_POWERPC_CTR,
+   PERF_REG_POWERPC_LNK,
+   PERF_REG_POWERPC_XER,
+   PERF_REG_POWERPC_CCR,

You skipped SOFTE here at my suggestion, because it's called MQ on 32-bit.

But I've changed my mind, I think we *should* define SOFTE, and ignore MQ,
because MQ is unused. So just add:

   +PERF_REG_POWERPC_SOFTE,



Thank you for reviewing the patch.

Yes here we can add SOFTE.


Thanks

Anju




+   PERF_REG_POWERPC_TRAP,
+   PERF_REG_POWERPC_DAR,
+   PERF_REG_POWERPC_DSISR,
+   PERF_REG_POWERPC_MAX,
+};
+#endif /* _ASM_POWERPC_PERF_REGS_H */

cheers



___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH V10 2/4] perf/powerpc: add support for sampling intr machine state

2016-01-21 Thread Anju T

Hi mpe,
On Wednesday 20 January 2016 04:10 PM, Michael Ellerman wrote:

On Mon, 2016-01-11 at 15:58 +0530, Anju T wrote:

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 9a7057e..c4ce60d 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -119,6 +119,7 @@ config PPC
select GENERIC_ATOMIC64 if PPC32
select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
select HAVE_PERF_EVENTS
+   select HAVE_PERF_REGS
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_HW_BREAKPOINT if PERF_EVENTS && PPC_BOOK3S_64
select ARCH_WANT_IPC_PARSE_VERSION
diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c
new file mode 100644
index 000..d32581763
--- /dev/null
+++ b/arch/powerpc/perf/perf_regs.c

...

+
+u64 perf_reg_abi(struct task_struct *task)
+{
+   return PERF_SAMPLE_REGS_ABI_64;

What is this value used for exactly?

It seems like on 32-bit kernels we should be returning PERF_SAMPLE_REGS_ABI_32.



Values to determine ABI of the registers dump.

enum perf_sample_regs_abi {

PERF_SAMPLE_REGS_ABI_NONE = 0,

PERF_SAMPLE_REGS_ABI_32 = 1,

PERF_SAMPLE_REGS_ABI_64 = 2,

};


Initially the ABI is set as NONE. So when we enable 
PERF_SAMPLE_REGS_INTR we need to get the correspodning ABI. This in turn 
required for ..


void perf_output_sample(struct perf_output_handle *handle,
if (sample_type & PERF_SAMPLE_TRANSACTION)
perf_output_put(handle, data->txn);

   if (sample_type & PERF_SAMPLE_REGS_INTR) {
   u64 abi = data->regs_intr.abi;
   /*
* If there are no regs to dump, notice it through
* first u64 being zero (PERF_SAMPLE_REGS_ABI_NONE).
*/
   perf_output_put(handle, abi);

   if (abi) {
   u64 mask = event->attr.sample_regs_intr;

   perf_output_sample_regs(handle,
data->regs_intr.regs,
   mask);
   }
   }



Here as you suggested we may need to pass the right ABI for 64 and 32 bit.


Thanks and Regards

Anju







+}
+
+void perf_get_regs_user(struct perf_regs *regs_user,
+   struct pt_regs *regs,
+   struct pt_regs *regs_user_copy)
+{
+   regs_user->regs = task_pt_regs(current);
+   regs_user->abi  = perf_reg_abi(current);
+}

cheers



___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v10 3/4] tools/perf: Map the ID values with register names

2016-01-11 Thread Anju T
Map ID values with corresponding register names. These names are then
displayed when user issues perf record with the -I option
followed by perf report/script with -D option.

To test this patchset,
Eg:

$ perf record -I ls   # record machine state at interrupt
$ perf script -D  # read the perf.data file

Sample output obtained for this patch / output looks like as follows:

178329381464 0x138 [0x180]: PERF_RECORD_SAMPLE(IP, 0x1): 7803/7803: 
0xc000fd9c period: 1 addr: 0
... intr regs: mask 0x3ff ABI 64-bit
 gpr0  0xc01a6420
 gpr1  0xc01e4df039b0
 gpr2  0xc0cdd100
 gpr3  0x1
 gpr4  0xc01e4a96d000
 gpr5  0x29854255ba
 gpr6  0xc00ffa3050b8
 gpr7  0x0
 gpr8  0x0
 gpr9  0x0
 gpr10 0x0
 gpr11 0x0
 gpr12 0x24022822
 gpr13 0xcfe03000
 gpr14 0x0
 gpr15 0xc0d763f8
 gpr16 0x0
 gpr17 0xc01e4ddcf000
 gpr18 0x0
 gpr19 0xc00ffa305000
 gpr20 0xc01e4df038c0
 gpr21 0xc01e40ed7a00
 gpr22 0xc00aa28c
 gpr23 0xc0cdd100
 gpr24 0x0
 gpr25 0xc0cdd100
 gpr26 0xc01e4df038b0
 gpr27 0xfeae
 gpr28 0xc01e4df03880
 gpr29 0xc0dce900
 gpr30 0xc01e4df03890
 gpr31 0xc01e355c7a30
 nip   0xc01a62d8
 msr   0x90009032
 orig_r3 0xc01a6320
 ctr   0xc00a7be0
 link   0xc01a6428
 xer   0x0
 ccr   0x24022888
 trap  0xf01
 dar   0xc01e40ed7a00
 dsisr 0x3000c006004
 ... thread: :7803:7803
 .. dso: /root/.debug/.build-id/d0/eb47b06c0d294143af13c50616f638c2d88658
   :7803  7803   178.329381:  1 cycles:  c000fd9c 
.arch_local_irq_restore (/boot/vmlinux)


Signed-off-by: Anju T <a...@linux.vnet.ibm.com>
Reviewed-by  : Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
---
 tools/perf/arch/powerpc/include/perf_regs.h | 64 +
 tools/perf/config/Makefile  |  5 +++
 2 files changed, 69 insertions(+)
 create mode 100644 tools/perf/arch/powerpc/include/perf_regs.h

diff --git a/tools/perf/arch/powerpc/include/perf_regs.h 
b/tools/perf/arch/powerpc/include/perf_regs.h
new file mode 100644
index 000..93080f5
--- /dev/null
+++ b/tools/perf/arch/powerpc/include/perf_regs.h
@@ -0,0 +1,64 @@
+#ifndef ARCH_PERF_REGS_H
+#define ARCH_PERF_REGS_H
+
+#include 
+#include 
+#include 
+
+#define PERF_REGS_MASK  ((1ULL << PERF_REG_POWERPC_MAX) - 1)
+#define PERF_REGS_MAX   PERF_REG_POWERPC_MAX
+#define PERF_SAMPLE_REGS_ABI   PERF_SAMPLE_REGS_ABI_64
+
+#define PERF_REG_IP PERF_REG_POWERPC_NIP
+#define PERF_REG_SP PERF_REG_POWERPC_GPR1
+
+static const char *reg_names[] = {
+   [PERF_REG_POWERPC_GPR0] = "gpr0",
+   [PERF_REG_POWERPC_GPR1] = "gpr1",
+   [PERF_REG_POWERPC_GPR2] = "gpr2",
+   [PERF_REG_POWERPC_GPR3] = "gpr3",
+   [PERF_REG_POWERPC_GPR4] = "gpr4",
+   [PERF_REG_POWERPC_GPR5] = "gpr5",
+   [PERF_REG_POWERPC_GPR6] = "gpr6",
+   [PERF_REG_POWERPC_GPR7] = "gpr7",
+   [PERF_REG_POWERPC_GPR8] = "gpr8",
+   [PERF_REG_POWERPC_GPR9] = "gpr9",
+   [PERF_REG_POWERPC_GPR10] = "gpr10",
+   [PERF_REG_POWERPC_GPR11] = "gpr11",
+   [PERF_REG_POWERPC_GPR12] = "gpr12",
+   [PERF_REG_POWERPC_GPR13] = "gpr13",
+   [PERF_REG_POWERPC_GPR14] = "gpr14",
+   [PERF_REG_POWERPC_GPR15] = "gpr15",
+   [PERF_REG_POWERPC_GPR16] = "gpr16",
+   [PERF_REG_POWERPC_GPR17] = "gpr17",
+   [PERF_REG_POWERPC_GPR18] = "gpr18",
+   [PERF_REG_POWERPC_GPR19] = "gpr19",
+   [PERF_REG_POWERPC_GPR20] = "gpr20",
+   [PERF_REG_POWERPC_GPR21] = "gpr21",
+   [PERF_REG_POWERPC_GPR22] = "gpr22",
+   [PERF_REG_POWERPC_GPR23] = "gpr23",
+   [PERF_REG_POWERPC_GPR24] = "gpr24",
+   [PERF_REG_POWERPC_GPR25] = "gpr25",
+   [PERF_REG_POWERPC_GPR26] = "gpr26",
+   [PERF_REG_POWERPC_GPR27] = "gpr27",
+   [PERF_REG_POWERPC_GPR28] = "gpr28",
+   [PERF_REG_POWERPC_GPR29] = "gpr29",
+   [PERF_REG_POWERPC_GPR30] = "gpr30",
+   [PERF_REG_POWERPC_GPR31] = "gpr31",
+   [PERF_REG_POWERPC_NIP] = "nip",
+   [PERF_REG_POWERPC_MSR] = "msr",
+   [PERF_REG_POWERPC_ORIG_R3] = "orig_r3",
+   [PERF_REG_POWERPC_CTR] = "ctr",
+   [PERF_REG_POWERPC_LNK] = "link",
+   [PERF_REG_POWERPC_XER] = "xer",
+   [PERF_REG_POWERPC_CCR] = "ccr",
+   [PERF_REG_POWERPC_TRAP] = "trap",
+   [PERF_REG_POWERPC_DAR] = "dar",
+   [PERF_REG_POWERPC_DSIS

[PATCH V10 0/4] perf/powerpc: Add ability to sample intr machine state in powerpc

2016-01-11 Thread Anju T
This short patch series adds the ability to sample the interrupted
machine state for each hardware sample.

To test this patchset,
Eg:

$ perf record -I?   # list supported registers

output:

available registers: gpr0 gpr1 gpr2 gpr3 gpr4 gpr5 gpr6 gpr7 gpr8 gpr9 gpr10 
gpr11 gpr12 gpr13 gpr14 gpr15 gpr16 gpr17 gpr18 gpr19 gpr20 gpr21 gpr22 gpr23 
gpr24 gpr25 gpr26 gpr27 gpr28 gpr29 gpr30 gpr31 nip msr orig_r3 ctr link xer 
ccr trap dar dsisr
usage: perf record [] []
or: perf record [] --  []
 -I, --intr-regs[=]
sample selected machine registers on interrupt, use -I ? to list register names


$ perf record -I ls   # record machine state at interrupt
$ perf script -D  # read the perf.data file

Samplfdoutput obtained for this patchset/ output looks like as follows:

178329381464 0x138 [0x180]: PERF_RECORD_SAMPLE(IP, 0x1): 7803/7803: 
0xc000fd9c period: 1 addr: 0
... intr regs: mask 0x3ff ABI 64-bit
 gpr0  0xc01a6420
 gpr1  0xc01e4df039b0
 gpr2  0xc0cdd100
 gpr3  0x1
 gpr4  0xc01e4a96d000
 gpr5  0x29854255ba
 gpr6  0xc00ffa3050b8
 gpr7  0x0
 gpr8  0x0
 gpr9  0x0
 gpr10 0x0
 gpr11 0x0
 gpr12 0x24022822
 gpr13 0xcfe03000
 gpr14 0x0
 gpr15 0xc0d763f8
 gpr16 0x0
 gpr17 0xc01e4ddcf000
 gpr18 0x0
 gpr19 0xc00ffa305000
 gpr20 0xc01e4df038c0
 gpr21 0xc01e40ed7a00
 gpr22 0xc00aa28c
 gpr23 0xc0cdd100
 gpr24 0x0
 gpr25 0xc0cdd100
 gpr26 0xc01e4df038b0
 gpr27 0xfeae
 gpr28 0xc01e4df03880
 gpr29 0xc0dce900
 gpr30 0xc01e4df03890
 gpr31 0xc01e355c7a30
 nip   0xc01a62d8
 msr   0x90009032
 orig_r3 0xc01a6320
 ctr   0xc00a7be0
 link   0xc01a6428
 xer   0x0
 ccr   0x24022888
 trap  0xf01
 dar   0xc01e40ed7a00
 dsisr 0x3000c006004
 ... thread: :7803:7803
 .. dso: /root/.debug/.build-id/d0/eb47b06c0d294143af13c50616f638c2d88658
   :7803  7803   178.329381:  1 cycles:  c000fd9c 
.arch_local_irq_restore (/boot/vmlinux)

Changes from V9:

- Changed the name displayed for link register from "lnk" to "link" in 
  tools/perf/arch/powerpc/include/perf_regs.h

changes from V8:

- Corrected the indentation issue in the Makefile mentioned in 3rd patch

Changes from V7:

- Addressed the new line issue in 3rd patch.

Changes from V6:

- Corrected the typo in patch  tools/perf: Map the ID values with register 
names.
  ie #define PERF_REG_SP  PERF_REG_POWERPC_R1 should be #define PERF_REG_SP   
PERF_REG_POWERPC_GPR1


Changes from V5:

- Enabled perf_sample_regs_user also in this patch set.Functions added in 
   arch/powerpc/perf/perf_regs.c
- Added Maddy's patch to this patchset for enabling -I? option which will
  list the supported register names.


Changes from V4:

- Removed the softe and MQ from all patches
- Switch case is replaced with an array in the 3rd patch

Changes from V3:

- Addressed the comments by Sukadev regarding the nits in the descriptions.
- Modified the subject of first patch.
- Included the sample output in the 3rd patch also.

Changes from V2:

- tools/perf/config/Makefile is moved to the patch tools/perf.
- The patchset is reordered.
- perf_regs_load() function is used for the dwarf unwind test.Since it is not 
required here,
  it is removed from tools/perf/arch/powerpc/include/perf_regs.h
- PERF_REGS_POWERPC_RESULT is removed.

Changes from V1:

- Solved the name missmatch issue in the from and signed-off field of the patch 
series.
- Added necessary comments in the 3rd patch ie perf/powerpc ,as suggested by 
Maddy.



Anju T (3):
  perf/powerpc: assign an id to each powerpc register
  perf/powerpc: add support for sampling intr machine state
  tools/perf: Map the ID values with register names

Madhavan Srinivasan (1):
  tool/perf: Add sample_reg_mask to include all perf_regs regs

 arch/powerpc/Kconfig|  1 +
 arch/powerpc/include/uapi/asm/perf_regs.h   | 49 +
 arch/powerpc/perf/Makefile  |  1 +
 arch/powerpc/perf/perf_regs.c   | 85 +
 tools/perf/arch/powerpc/include/perf_regs.h | 64 ++
 tools/perf/arch/powerpc/util/Build  |  1 +
 tools/perf/arch/powerpc/util/perf_regs.c| 48 
 tools/perf/config/Makefile  |  5 ++
 8 files changed, 254 insertions(+)
 create mode 100644 arch/powerpc/include/uapi/asm/perf_regs.h
 create mode 100644 arch/powerpc/perf/perf_regs.c
 create mode 100644 tools/perf/arch/powerpc/include/perf_regs.h
 create mode 100644 tools/perf/arch/powerpc/util/perf_regs.c

-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V10 1/4] perf/powerpc: assign an id to each powerpc register

2016-01-11 Thread Anju T
The enum definition assigns an 'id' to each register in "struct pt_regs"
of arch/powerpc. The order of these values in the enum definition are
based on the corresponding macros in arch/powerpc/include/uapi/asm/ptrace.h.

Signed-off-by: Anju T <a...@linux.vnet.ibm.com>
Reviewed-by  : Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
---
 arch/powerpc/include/uapi/asm/perf_regs.h | 49 +++
 1 file changed, 49 insertions(+)
 create mode 100644 arch/powerpc/include/uapi/asm/perf_regs.h

diff --git a/arch/powerpc/include/uapi/asm/perf_regs.h 
b/arch/powerpc/include/uapi/asm/perf_regs.h
new file mode 100644
index 000..cfbd068
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/perf_regs.h
@@ -0,0 +1,49 @@
+#ifndef _ASM_POWERPC_PERF_REGS_H
+#define _ASM_POWERPC_PERF_REGS_H
+
+enum perf_event_powerpc_regs {
+   PERF_REG_POWERPC_GPR0,
+   PERF_REG_POWERPC_GPR1,
+   PERF_REG_POWERPC_GPR2,
+   PERF_REG_POWERPC_GPR3,
+   PERF_REG_POWERPC_GPR4,
+   PERF_REG_POWERPC_GPR5,
+   PERF_REG_POWERPC_GPR6,
+   PERF_REG_POWERPC_GPR7,
+   PERF_REG_POWERPC_GPR8,
+   PERF_REG_POWERPC_GPR9,
+   PERF_REG_POWERPC_GPR10,
+   PERF_REG_POWERPC_GPR11,
+   PERF_REG_POWERPC_GPR12,
+   PERF_REG_POWERPC_GPR13,
+   PERF_REG_POWERPC_GPR14,
+   PERF_REG_POWERPC_GPR15,
+   PERF_REG_POWERPC_GPR16,
+   PERF_REG_POWERPC_GPR17,
+   PERF_REG_POWERPC_GPR18,
+   PERF_REG_POWERPC_GPR19,
+   PERF_REG_POWERPC_GPR20,
+   PERF_REG_POWERPC_GPR21,
+   PERF_REG_POWERPC_GPR22,
+   PERF_REG_POWERPC_GPR23,
+   PERF_REG_POWERPC_GPR24,
+   PERF_REG_POWERPC_GPR25,
+   PERF_REG_POWERPC_GPR26,
+   PERF_REG_POWERPC_GPR27,
+   PERF_REG_POWERPC_GPR28,
+   PERF_REG_POWERPC_GPR29,
+   PERF_REG_POWERPC_GPR30,
+   PERF_REG_POWERPC_GPR31,
+   PERF_REG_POWERPC_NIP,
+   PERF_REG_POWERPC_MSR,
+   PERF_REG_POWERPC_ORIG_R3,
+   PERF_REG_POWERPC_CTR,
+   PERF_REG_POWERPC_LNK,
+   PERF_REG_POWERPC_XER,
+   PERF_REG_POWERPC_CCR,
+   PERF_REG_POWERPC_TRAP,
+   PERF_REG_POWERPC_DAR,
+   PERF_REG_POWERPC_DSISR,
+   PERF_REG_POWERPC_MAX,
+};
+#endif /* _ASM_POWERPC_PERF_REGS_H */
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V1 4/4] tool/perf: Add sample_reg_mask to include all perf_regs regs

2016-01-11 Thread Anju T
From: Madhavan Srinivasan 

Add sample_reg_mask array with pt_regs registers.
This is needed for printing supported regs ( -I? option).

Signed-off-by: Madhavan Srinivasan 
---
 tools/perf/arch/powerpc/util/Build   |  1 +
 tools/perf/arch/powerpc/util/perf_regs.c | 48 
 2 files changed, 49 insertions(+)
 create mode 100644 tools/perf/arch/powerpc/util/perf_regs.c

diff --git a/tools/perf/arch/powerpc/util/Build 
b/tools/perf/arch/powerpc/util/Build
index 7b8b0d1..3deb1bc 100644
--- a/tools/perf/arch/powerpc/util/Build
+++ b/tools/perf/arch/powerpc/util/Build
@@ -1,5 +1,6 @@
 libperf-y += header.o
 libperf-y += sym-handling.o
+libperf-y += perf_regs.o
 
 libperf-$(CONFIG_DWARF) += dwarf-regs.o
 libperf-$(CONFIG_DWARF) += skip-callchain-idx.o
diff --git a/tools/perf/arch/powerpc/util/perf_regs.c 
b/tools/perf/arch/powerpc/util/perf_regs.c
new file mode 100644
index 000..0b0ec65
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/perf_regs.c
@@ -0,0 +1,48 @@
+#include "../../perf.h"
+#include "../../util/perf_regs.h"
+
+const struct sample_reg sample_reg_masks[] = {
+   SMPL_REG(gpr0, PERF_REG_POWERPC_GPR0),
+   SMPL_REG(gpr1, PERF_REG_POWERPC_GPR1),
+   SMPL_REG(gpr2, PERF_REG_POWERPC_GPR2),
+   SMPL_REG(gpr3, PERF_REG_POWERPC_GPR3),
+   SMPL_REG(gpr4, PERF_REG_POWERPC_GPR4),
+   SMPL_REG(gpr5, PERF_REG_POWERPC_GPR5),
+   SMPL_REG(gpr6, PERF_REG_POWERPC_GPR6),
+   SMPL_REG(gpr7, PERF_REG_POWERPC_GPR7),
+   SMPL_REG(gpr8, PERF_REG_POWERPC_GPR8),
+   SMPL_REG(gpr9, PERF_REG_POWERPC_GPR9),
+   SMPL_REG(gpr10, PERF_REG_POWERPC_GPR10),
+   SMPL_REG(gpr11, PERF_REG_POWERPC_GPR11),
+   SMPL_REG(gpr12, PERF_REG_POWERPC_GPR12),
+   SMPL_REG(gpr13, PERF_REG_POWERPC_GPR13),
+   SMPL_REG(gpr14, PERF_REG_POWERPC_GPR14),
+   SMPL_REG(gpr15, PERF_REG_POWERPC_GPR15),
+   SMPL_REG(gpr16, PERF_REG_POWERPC_GPR16),
+   SMPL_REG(gpr17, PERF_REG_POWERPC_GPR17),
+   SMPL_REG(gpr18, PERF_REG_POWERPC_GPR18),
+   SMPL_REG(gpr19, PERF_REG_POWERPC_GPR19),
+   SMPL_REG(gpr20, PERF_REG_POWERPC_GPR20),
+   SMPL_REG(gpr21, PERF_REG_POWERPC_GPR21),
+   SMPL_REG(gpr22, PERF_REG_POWERPC_GPR22),
+   SMPL_REG(gpr23, PERF_REG_POWERPC_GPR23),
+   SMPL_REG(gpr24, PERF_REG_POWERPC_GPR24),
+   SMPL_REG(gpr25, PERF_REG_POWERPC_GPR25),
+   SMPL_REG(gpr26, PERF_REG_POWERPC_GPR26),
+   SMPL_REG(gpr27, PERF_REG_POWERPC_GPR27),
+   SMPL_REG(gpr28, PERF_REG_POWERPC_GPR28),
+   SMPL_REG(gpr29, PERF_REG_POWERPC_GPR29),
+   SMPL_REG(gpr30, PERF_REG_POWERPC_GPR30),
+   SMPL_REG(gpr31, PERF_REG_POWERPC_GPR31),
+   SMPL_REG(nip, PERF_REG_POWERPC_NIP),
+   SMPL_REG(msr, PERF_REG_POWERPC_MSR),
+   SMPL_REG(orig_r3, PERF_REG_POWERPC_ORIG_R3),
+   SMPL_REG(ctr, PERF_REG_POWERPC_CTR),
+   SMPL_REG(link, PERF_REG_POWERPC_LNK),
+   SMPL_REG(xer, PERF_REG_POWERPC_XER),
+   SMPL_REG(ccr, PERF_REG_POWERPC_CCR),
+   SMPL_REG(trap, PERF_REG_POWERPC_TRAP),
+   SMPL_REG(dar, PERF_REG_POWERPC_DAR),
+   SMPL_REG(dsisr, PERF_REG_POWERPC_DSISR),
+   SMPL_REG_END
+};
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V10 2/4] perf/powerpc: add support for sampling intr machine state

2016-01-11 Thread Anju T
The perf infrastructure uses a bit mask to find out valid
registers to display. Define a register mask for supported
registers defined in asm/perf_regs.h. The bit positions also
correspond to register IDs which is used by perf infrastructure
to fetch the register values. CONFIG_HAVE_PERF_REGS enables
sampling of the interrupted machine state.

Signed-off-by: Anju T <a...@linux.vnet.ibm.com>
Reviewed-by  : Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
---
 arch/powerpc/Kconfig  |  1 +
 arch/powerpc/perf/Makefile|  1 +
 arch/powerpc/perf/perf_regs.c | 85 +++
 3 files changed, 87 insertions(+)
 create mode 100644 arch/powerpc/perf/perf_regs.c

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 9a7057e..c4ce60d 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -119,6 +119,7 @@ config PPC
select GENERIC_ATOMIC64 if PPC32
select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
select HAVE_PERF_EVENTS
+   select HAVE_PERF_REGS
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_HW_BREAKPOINT if PERF_EVENTS && PPC_BOOK3S_64
select ARCH_WANT_IPC_PARSE_VERSION
diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile
index f9c083a..2f2d3d2 100644
--- a/arch/powerpc/perf/Makefile
+++ b/arch/powerpc/perf/Makefile
@@ -8,6 +8,7 @@ obj64-$(CONFIG_PPC_PERF_CTRS)   += power4-pmu.o ppc970-pmu.o 
power5-pmu.o \
   power8-pmu.o
 obj32-$(CONFIG_PPC_PERF_CTRS)  += mpc7450-pmu.o
 
+obj-$(CONFIG_PERF_EVENTS)  += perf_regs.o
 obj-$(CONFIG_FSL_EMB_PERF_EVENT) += core-fsl-emb.o
 obj-$(CONFIG_FSL_EMB_PERF_EVENT_E500) += e500-pmu.o e6500-pmu.o
 
diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c
new file mode 100644
index 000..d32581763
--- /dev/null
+++ b/arch/powerpc/perf/perf_regs.c
@@ -0,0 +1,85 @@
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define PT_REGS_OFFSET(id, r) [id] = offsetof(struct pt_regs, r)
+
+#define REG_RESERVED (~((1ULL << PERF_REG_POWERPC_MAX) - 1))
+
+static unsigned int pt_regs_offset[PERF_REG_POWERPC_MAX] = {
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR0, gpr[0]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR1, gpr[1]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR2, gpr[2]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR3, gpr[3]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR4, gpr[4]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR5, gpr[5]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR6, gpr[6]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR7, gpr[7]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR8, gpr[8]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR9, gpr[9]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR10, gpr[10]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR11, gpr[11]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR12, gpr[12]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR13, gpr[13]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR14, gpr[14]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR15, gpr[15]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR16, gpr[16]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR17, gpr[17]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR18, gpr[18]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR19, gpr[19]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR20, gpr[20]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR21, gpr[21]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR22, gpr[22]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR23, gpr[23]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR24, gpr[24]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR25, gpr[25]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR26, gpr[26]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR27, gpr[27]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR28, gpr[28]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR29, gpr[29]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR30, gpr[30]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR31, gpr[31]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_NIP, nip),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_MSR, msr),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_ORIG_R3, orig_gpr3),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_CTR, ctr),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_LNK, link),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_XER, xer),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_CCR, ccr),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_TRAP, trap),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_DAR, dar),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_DSISR, dsisr),
+};
+
+u64 perf_reg_value(struct pt_regs *regs, int idx)
+{
+   if (WARN_ON_ONCE(idx >= PERF_REG_POWERPC_MAX))
+   return 0;
+
+   return regs_get_register(regs, pt_regs_offset[idx]);
+}
+
+int perf_reg_validate(u64 mask)
+{
+   if (!mask || mask & REG_RESERVED)
+   return -EINVAL;
+   return 0;
+}
+
+u64 perf_reg_abi(struct task_struct *task)
+{
+   return PERF_SAMPLE_REGS_ABI_64;
+}
+
+void perf_get_regs

[PATCH V1 4/4] tool/perf: Add sample_reg_mask to include all perf_regs regs

2016-01-10 Thread Anju T
From: Madhavan Srinivasan 

Add sample_reg_mask array with pt_regs registers.
This is needed for printing supported regs ( -I? option).

Signed-off-by: Madhavan Srinivasan 
---
 tools/perf/arch/powerpc/util/Build   |  1 +
 tools/perf/arch/powerpc/util/perf_regs.c | 48 
 2 files changed, 49 insertions(+)
 create mode 100644 tools/perf/arch/powerpc/util/perf_regs.c

diff --git a/tools/perf/arch/powerpc/util/Build 
b/tools/perf/arch/powerpc/util/Build
index 7b8b0d1..3deb1bc 100644
--- a/tools/perf/arch/powerpc/util/Build
+++ b/tools/perf/arch/powerpc/util/Build
@@ -1,5 +1,6 @@
 libperf-y += header.o
 libperf-y += sym-handling.o
+libperf-y += perf_regs.o
 
 libperf-$(CONFIG_DWARF) += dwarf-regs.o
 libperf-$(CONFIG_DWARF) += skip-callchain-idx.o
diff --git a/tools/perf/arch/powerpc/util/perf_regs.c 
b/tools/perf/arch/powerpc/util/perf_regs.c
new file mode 100644
index 000..0b0ec65
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/perf_regs.c
@@ -0,0 +1,48 @@
+#include "../../perf.h"
+#include "../../util/perf_regs.h"
+
+const struct sample_reg sample_reg_masks[] = {
+   SMPL_REG(gpr0, PERF_REG_POWERPC_GPR0),
+   SMPL_REG(gpr1, PERF_REG_POWERPC_GPR1),
+   SMPL_REG(gpr2, PERF_REG_POWERPC_GPR2),
+   SMPL_REG(gpr3, PERF_REG_POWERPC_GPR3),
+   SMPL_REG(gpr4, PERF_REG_POWERPC_GPR4),
+   SMPL_REG(gpr5, PERF_REG_POWERPC_GPR5),
+   SMPL_REG(gpr6, PERF_REG_POWERPC_GPR6),
+   SMPL_REG(gpr7, PERF_REG_POWERPC_GPR7),
+   SMPL_REG(gpr8, PERF_REG_POWERPC_GPR8),
+   SMPL_REG(gpr9, PERF_REG_POWERPC_GPR9),
+   SMPL_REG(gpr10, PERF_REG_POWERPC_GPR10),
+   SMPL_REG(gpr11, PERF_REG_POWERPC_GPR11),
+   SMPL_REG(gpr12, PERF_REG_POWERPC_GPR12),
+   SMPL_REG(gpr13, PERF_REG_POWERPC_GPR13),
+   SMPL_REG(gpr14, PERF_REG_POWERPC_GPR14),
+   SMPL_REG(gpr15, PERF_REG_POWERPC_GPR15),
+   SMPL_REG(gpr16, PERF_REG_POWERPC_GPR16),
+   SMPL_REG(gpr17, PERF_REG_POWERPC_GPR17),
+   SMPL_REG(gpr18, PERF_REG_POWERPC_GPR18),
+   SMPL_REG(gpr19, PERF_REG_POWERPC_GPR19),
+   SMPL_REG(gpr20, PERF_REG_POWERPC_GPR20),
+   SMPL_REG(gpr21, PERF_REG_POWERPC_GPR21),
+   SMPL_REG(gpr22, PERF_REG_POWERPC_GPR22),
+   SMPL_REG(gpr23, PERF_REG_POWERPC_GPR23),
+   SMPL_REG(gpr24, PERF_REG_POWERPC_GPR24),
+   SMPL_REG(gpr25, PERF_REG_POWERPC_GPR25),
+   SMPL_REG(gpr26, PERF_REG_POWERPC_GPR26),
+   SMPL_REG(gpr27, PERF_REG_POWERPC_GPR27),
+   SMPL_REG(gpr28, PERF_REG_POWERPC_GPR28),
+   SMPL_REG(gpr29, PERF_REG_POWERPC_GPR29),
+   SMPL_REG(gpr30, PERF_REG_POWERPC_GPR30),
+   SMPL_REG(gpr31, PERF_REG_POWERPC_GPR31),
+   SMPL_REG(nip, PERF_REG_POWERPC_NIP),
+   SMPL_REG(msr, PERF_REG_POWERPC_MSR),
+   SMPL_REG(orig_r3, PERF_REG_POWERPC_ORIG_R3),
+   SMPL_REG(ctr, PERF_REG_POWERPC_CTR),
+   SMPL_REG(link, PERF_REG_POWERPC_LNK),
+   SMPL_REG(xer, PERF_REG_POWERPC_XER),
+   SMPL_REG(ccr, PERF_REG_POWERPC_CCR),
+   SMPL_REG(trap, PERF_REG_POWERPC_TRAP),
+   SMPL_REG(dar, PERF_REG_POWERPC_DAR),
+   SMPL_REG(dsisr, PERF_REG_POWERPC_DSISR),
+   SMPL_REG_END
+};
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v10 3/4] tools/perf: Map the ID values with register names

2016-01-10 Thread Anju T
Map ID values with corresponding register names. These names are then
displayed when user issues perf record with the -I option
followed by perf report/script with -D option.

To test this patchset,
Eg:

$ perf record -I ls   # record machine state at interrupt
$ perf script -D  # read the perf.data file

Sample output obtained for this patch / output looks like as follows:

178329381464 0x138 [0x180]: PERF_RECORD_SAMPLE(IP, 0x1): 7803/7803: 
0xc000fd9c period: 1 addr: 0
... intr regs: mask 0x3ff ABI 64-bit
 gpr0  0xc01a6420
 gpr1  0xc01e4df039b0
 gpr2  0xc0cdd100
 gpr3  0x1
 gpr4  0xc01e4a96d000
 gpr5  0x29854255ba
 gpr6  0xc00ffa3050b8
 gpr7  0x0
 gpr8  0x0
 gpr9  0x0
 gpr10 0x0
 gpr11 0x0
 gpr12 0x24022822
 gpr13 0xcfe03000
 gpr14 0x0
 gpr15 0xc0d763f8
 gpr16 0x0
 gpr17 0xc01e4ddcf000
 gpr18 0x0
 gpr19 0xc00ffa305000
 gpr20 0xc01e4df038c0
 gpr21 0xc01e40ed7a00
 gpr22 0xc00aa28c
 gpr23 0xc0cdd100
 gpr24 0x0
 gpr25 0xc0cdd100
 gpr26 0xc01e4df038b0
 gpr27 0xfeae
 gpr28 0xc01e4df03880
 gpr29 0xc0dce900
 gpr30 0xc01e4df03890
 gpr31 0xc01e355c7a30
 nip   0xc01a62d8
 msr   0x90009032
 orig_r3 0xc01a6320
 ctr   0xc00a7be0
 link   0xc01a6428
 xer   0x0
 ccr   0x24022888
 trap  0xf01
 dar   0xc01e40ed7a00
 dsisr 0x3000c006004
 ... thread: :7803:7803
 .. dso: /root/.debug/.build-id/d0/eb47b06c0d294143af13c50616f638c2d88658
   :7803  7803   178.329381:  1 cycles:  c000fd9c 
.arch_local_irq_restore (/boot/vmlinux)


Signed-off-by: Anju T <a...@linux.vnet.ibm.com>
Reviewed-by  : Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
---
 tools/perf/arch/powerpc/include/perf_regs.h | 64 +
 tools/perf/config/Makefile  |  5 +++
 2 files changed, 69 insertions(+)
 create mode 100644 tools/perf/arch/powerpc/include/perf_regs.h

diff --git a/tools/perf/arch/powerpc/include/perf_regs.h 
b/tools/perf/arch/powerpc/include/perf_regs.h
new file mode 100644
index 000..93080f5
--- /dev/null
+++ b/tools/perf/arch/powerpc/include/perf_regs.h
@@ -0,0 +1,64 @@
+#ifndef ARCH_PERF_REGS_H
+#define ARCH_PERF_REGS_H
+
+#include 
+#include 
+#include 
+
+#define PERF_REGS_MASK  ((1ULL << PERF_REG_POWERPC_MAX) - 1)
+#define PERF_REGS_MAX   PERF_REG_POWERPC_MAX
+#define PERF_SAMPLE_REGS_ABI   PERF_SAMPLE_REGS_ABI_64
+
+#define PERF_REG_IP PERF_REG_POWERPC_NIP
+#define PERF_REG_SP PERF_REG_POWERPC_GPR1
+
+static const char *reg_names[] = {
+   [PERF_REG_POWERPC_GPR0] = "gpr0",
+   [PERF_REG_POWERPC_GPR1] = "gpr1",
+   [PERF_REG_POWERPC_GPR2] = "gpr2",
+   [PERF_REG_POWERPC_GPR3] = "gpr3",
+   [PERF_REG_POWERPC_GPR4] = "gpr4",
+   [PERF_REG_POWERPC_GPR5] = "gpr5",
+   [PERF_REG_POWERPC_GPR6] = "gpr6",
+   [PERF_REG_POWERPC_GPR7] = "gpr7",
+   [PERF_REG_POWERPC_GPR8] = "gpr8",
+   [PERF_REG_POWERPC_GPR9] = "gpr9",
+   [PERF_REG_POWERPC_GPR10] = "gpr10",
+   [PERF_REG_POWERPC_GPR11] = "gpr11",
+   [PERF_REG_POWERPC_GPR12] = "gpr12",
+   [PERF_REG_POWERPC_GPR13] = "gpr13",
+   [PERF_REG_POWERPC_GPR14] = "gpr14",
+   [PERF_REG_POWERPC_GPR15] = "gpr15",
+   [PERF_REG_POWERPC_GPR16] = "gpr16",
+   [PERF_REG_POWERPC_GPR17] = "gpr17",
+   [PERF_REG_POWERPC_GPR18] = "gpr18",
+   [PERF_REG_POWERPC_GPR19] = "gpr19",
+   [PERF_REG_POWERPC_GPR20] = "gpr20",
+   [PERF_REG_POWERPC_GPR21] = "gpr21",
+   [PERF_REG_POWERPC_GPR22] = "gpr22",
+   [PERF_REG_POWERPC_GPR23] = "gpr23",
+   [PERF_REG_POWERPC_GPR24] = "gpr24",
+   [PERF_REG_POWERPC_GPR25] = "gpr25",
+   [PERF_REG_POWERPC_GPR26] = "gpr26",
+   [PERF_REG_POWERPC_GPR27] = "gpr27",
+   [PERF_REG_POWERPC_GPR28] = "gpr28",
+   [PERF_REG_POWERPC_GPR29] = "gpr29",
+   [PERF_REG_POWERPC_GPR30] = "gpr30",
+   [PERF_REG_POWERPC_GPR31] = "gpr31",
+   [PERF_REG_POWERPC_NIP] = "nip",
+   [PERF_REG_POWERPC_MSR] = "msr",
+   [PERF_REG_POWERPC_ORIG_R3] = "orig_r3",
+   [PERF_REG_POWERPC_CTR] = "ctr",
+   [PERF_REG_POWERPC_LNK] = "link",
+   [PERF_REG_POWERPC_XER] = "xer",
+   [PERF_REG_POWERPC_CCR] = "ccr",
+   [PERF_REG_POWERPC_TRAP] = "trap",
+   [PERF_REG_POWERPC_DAR] = "dar",
+   [PERF_REG_POWERPC_DSIS

[PATCH V10 0/4] perf/powerpc: Add ability to sample intr machine state in powerpc

2016-01-10 Thread Anju T
This short patch series adds the ability to sample the interrupted
machine state for each hardware sample.

To test this patchset,
Eg:

$ perf record -I?   # list supported registers

output:

available registers: gpr0 gpr1 gpr2 gpr3 gpr4 gpr5 gpr6 gpr7 gpr8 gpr9 gpr10 
gpr11 gpr12 gpr13 gpr14 gpr15 gpr16 gpr17 gpr18 gpr19 gpr20 gpr21 gpr22 gpr23 
gpr24 gpr25 gpr26 gpr27 gpr28 gpr29 gpr30 gpr31 nip msr orig_r3 ctr link xer 
ccr trap dar dsisr
usage: perf record [] []
or: perf record [] --  []
 -I, --intr-regs[=]
sample selected machine registers on interrupt, use -I ? to list register names


$ perf record -I ls   # record machine state at interrupt
$ perf script -D  # read the perf.data file

Samplfdoutput obtained for this patchset/ output looks like as follows:

178329381464 0x138 [0x180]: PERF_RECORD_SAMPLE(IP, 0x1): 7803/7803: 
0xc000fd9c period: 1 addr: 0
... intr regs: mask 0x3ff ABI 64-bit
 gpr0  0xc01a6420
 gpr1  0xc01e4df039b0
 gpr2  0xc0cdd100
 gpr3  0x1
 gpr4  0xc01e4a96d000
 gpr5  0x29854255ba
 gpr6  0xc00ffa3050b8
 gpr7  0x0
 gpr8  0x0
 gpr9  0x0
 gpr10 0x0
 gpr11 0x0
 gpr12 0x24022822
 gpr13 0xcfe03000
 gpr14 0x0
 gpr15 0xc0d763f8
 gpr16 0x0
 gpr17 0xc01e4ddcf000
 gpr18 0x0
 gpr19 0xc00ffa305000
 gpr20 0xc01e4df038c0
 gpr21 0xc01e40ed7a00
 gpr22 0xc00aa28c
 gpr23 0xc0cdd100
 gpr24 0x0
 gpr25 0xc0cdd100
 gpr26 0xc01e4df038b0
 gpr27 0xfeae
 gpr28 0xc01e4df03880
 gpr29 0xc0dce900
 gpr30 0xc01e4df03890
 gpr31 0xc01e355c7a30
 nip   0xc01a62d8
 msr   0x90009032
 orig_r3 0xc01a6320
 ctr   0xc00a7be0
 link   0xc01a6428
 xer   0x0
 ccr   0x24022888
 trap  0xf01
 dar   0xc01e40ed7a00
 dsisr 0x3000c006004
 ... thread: :7803:7803
 .. dso: /root/.debug/.build-id/d0/eb47b06c0d294143af13c50616f638c2d88658
   :7803  7803   178.329381:  1 cycles:  c000fd9c 
.arch_local_irq_restore (/boot/vmlinux)

Changes from V9:

- Changed the name displayed for link register from "lnk" to "link" in 
  tools/perf/arch/powerpc/include/perf_regs.h

changes from V8:

- Corrected the indentation issue in the Makefile mentioned in 3rd patch

Changes from V7:

- Addressed the new line issue in 3rd patch.

Changes from V6:

- Corrected the typo in patch  tools/perf: Map the ID values with register 
names.
  ie #define PERF_REG_SP  PERF_REG_POWERPC_R1 should be #define PERF_REG_SP   
PERF_REG_POWERPC_GPR1


Changes from V5:

- Enabled perf_sample_regs_user also in this patch set.Functions added in 
   arch/powerpc/perf/perf_regs.c
- Added Maddy's patch to this patchset for enabling -I? option which will
  list the supported register names.


Changes from V4:

- Removed the softe and MQ from all patches
- Switch case is replaced with an array in the 3rd patch

Changes from V3:

- Addressed the comments by Sukadev regarding the nits in the descriptions.
- Modified the subject of first patch.
- Included the sample output in the 3rd patch also.

Changes from V2:

- tools/perf/config/Makefile is moved to the patch tools/perf.
- The patchset is reordered.
- perf_regs_load() function is used for the dwarf unwind test.Since it is not 
required here,
  it is removed from tools/perf/arch/powerpc/include/perf_regs.h
- PERF_REGS_POWERPC_RESULT is removed.

Changes from V1:

- Solved the name missmatch issue in the from and signed-off field of the patch 
series.
- Added necessary comments in the 3rd patch ie perf/powerpc ,as suggested by 
Maddy.



Anju T (3):
  perf/powerpc: assign an id to each powerpc register
  perf/powerpc: add support for sampling intr machine state
  tools/perf: Map the ID values with register names

Madhavan Srinivasan (1):
  tool/perf: Add sample_reg_mask to include all perf_regs regs

 arch/powerpc/Kconfig|  1 +
 arch/powerpc/include/uapi/asm/perf_regs.h   | 49 +
 arch/powerpc/perf/Makefile  |  1 +
 arch/powerpc/perf/perf_regs.c   | 85 +
 tools/perf/arch/powerpc/include/perf_regs.h | 64 ++
 tools/perf/arch/powerpc/util/Build  |  1 +
 tools/perf/arch/powerpc/util/perf_regs.c| 48 
 tools/perf/config/Makefile  |  5 ++
 8 files changed, 254 insertions(+)
 create mode 100644 arch/powerpc/include/uapi/asm/perf_regs.h
 create mode 100644 arch/powerpc/perf/perf_regs.c
 create mode 100644 tools/perf/arch/powerpc/include/perf_regs.h
 create mode 100644 tools/perf/arch/powerpc/util/perf_regs.c

-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 10 1/4] perf/powerpc: assign an id to each powerpc register

2016-01-10 Thread Anju T
The enum definition assigns an 'id' to each register in "struct pt_regs"
of arch/powerpc. The order of these values in the enum definition are
based on the corresponding macros in arch/powerpc/include/uapi/asm/ptrace.h.

Signed-off-by: Anju T <a...@linux.vnet.ibm.com>
Reviewed-by  : Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
---
 arch/powerpc/include/uapi/asm/perf_regs.h | 49 +++
 1 file changed, 49 insertions(+)
 create mode 100644 arch/powerpc/include/uapi/asm/perf_regs.h

diff --git a/arch/powerpc/include/uapi/asm/perf_regs.h 
b/arch/powerpc/include/uapi/asm/perf_regs.h
new file mode 100644
index 000..cfbd068
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/perf_regs.h
@@ -0,0 +1,49 @@
+#ifndef _ASM_POWERPC_PERF_REGS_H
+#define _ASM_POWERPC_PERF_REGS_H
+
+enum perf_event_powerpc_regs {
+   PERF_REG_POWERPC_GPR0,
+   PERF_REG_POWERPC_GPR1,
+   PERF_REG_POWERPC_GPR2,
+   PERF_REG_POWERPC_GPR3,
+   PERF_REG_POWERPC_GPR4,
+   PERF_REG_POWERPC_GPR5,
+   PERF_REG_POWERPC_GPR6,
+   PERF_REG_POWERPC_GPR7,
+   PERF_REG_POWERPC_GPR8,
+   PERF_REG_POWERPC_GPR9,
+   PERF_REG_POWERPC_GPR10,
+   PERF_REG_POWERPC_GPR11,
+   PERF_REG_POWERPC_GPR12,
+   PERF_REG_POWERPC_GPR13,
+   PERF_REG_POWERPC_GPR14,
+   PERF_REG_POWERPC_GPR15,
+   PERF_REG_POWERPC_GPR16,
+   PERF_REG_POWERPC_GPR17,
+   PERF_REG_POWERPC_GPR18,
+   PERF_REG_POWERPC_GPR19,
+   PERF_REG_POWERPC_GPR20,
+   PERF_REG_POWERPC_GPR21,
+   PERF_REG_POWERPC_GPR22,
+   PERF_REG_POWERPC_GPR23,
+   PERF_REG_POWERPC_GPR24,
+   PERF_REG_POWERPC_GPR25,
+   PERF_REG_POWERPC_GPR26,
+   PERF_REG_POWERPC_GPR27,
+   PERF_REG_POWERPC_GPR28,
+   PERF_REG_POWERPC_GPR29,
+   PERF_REG_POWERPC_GPR30,
+   PERF_REG_POWERPC_GPR31,
+   PERF_REG_POWERPC_NIP,
+   PERF_REG_POWERPC_MSR,
+   PERF_REG_POWERPC_ORIG_R3,
+   PERF_REG_POWERPC_CTR,
+   PERF_REG_POWERPC_LNK,
+   PERF_REG_POWERPC_XER,
+   PERF_REG_POWERPC_CCR,
+   PERF_REG_POWERPC_TRAP,
+   PERF_REG_POWERPC_DAR,
+   PERF_REG_POWERPC_DSISR,
+   PERF_REG_POWERPC_MAX,
+};
+#endif /* _ASM_POWERPC_PERF_REGS_H */
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V10 2/4] perf/powerpc: add support for sampling intr machine state

2016-01-10 Thread Anju T
The perf infrastructure uses a bit mask to find out valid
registers to display. Define a register mask for supported
registers defined in asm/perf_regs.h. The bit positions also
correspond to register IDs which is used by perf infrastructure
to fetch the register values. CONFIG_HAVE_PERF_REGS enables
sampling of the interrupted machine state.

Signed-off-by: Anju T <a...@linux.vnet.ibm.com>
Reviewed-by  : Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
---
 arch/powerpc/Kconfig  |  1 +
 arch/powerpc/perf/Makefile|  1 +
 arch/powerpc/perf/perf_regs.c | 85 +++
 3 files changed, 87 insertions(+)
 create mode 100644 arch/powerpc/perf/perf_regs.c

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 9a7057e..c4ce60d 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -119,6 +119,7 @@ config PPC
select GENERIC_ATOMIC64 if PPC32
select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
select HAVE_PERF_EVENTS
+   select HAVE_PERF_REGS
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_HW_BREAKPOINT if PERF_EVENTS && PPC_BOOK3S_64
select ARCH_WANT_IPC_PARSE_VERSION
diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile
index f9c083a..2f2d3d2 100644
--- a/arch/powerpc/perf/Makefile
+++ b/arch/powerpc/perf/Makefile
@@ -8,6 +8,7 @@ obj64-$(CONFIG_PPC_PERF_CTRS)   += power4-pmu.o ppc970-pmu.o 
power5-pmu.o \
   power8-pmu.o
 obj32-$(CONFIG_PPC_PERF_CTRS)  += mpc7450-pmu.o
 
+obj-$(CONFIG_PERF_EVENTS)  += perf_regs.o
 obj-$(CONFIG_FSL_EMB_PERF_EVENT) += core-fsl-emb.o
 obj-$(CONFIG_FSL_EMB_PERF_EVENT_E500) += e500-pmu.o e6500-pmu.o
 
diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c
new file mode 100644
index 000..d32581763
--- /dev/null
+++ b/arch/powerpc/perf/perf_regs.c
@@ -0,0 +1,85 @@
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define PT_REGS_OFFSET(id, r) [id] = offsetof(struct pt_regs, r)
+
+#define REG_RESERVED (~((1ULL << PERF_REG_POWERPC_MAX) - 1))
+
+static unsigned int pt_regs_offset[PERF_REG_POWERPC_MAX] = {
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR0, gpr[0]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR1, gpr[1]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR2, gpr[2]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR3, gpr[3]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR4, gpr[4]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR5, gpr[5]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR6, gpr[6]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR7, gpr[7]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR8, gpr[8]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR9, gpr[9]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR10, gpr[10]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR11, gpr[11]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR12, gpr[12]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR13, gpr[13]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR14, gpr[14]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR15, gpr[15]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR16, gpr[16]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR17, gpr[17]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR18, gpr[18]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR19, gpr[19]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR20, gpr[20]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR21, gpr[21]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR22, gpr[22]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR23, gpr[23]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR24, gpr[24]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR25, gpr[25]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR26, gpr[26]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR27, gpr[27]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR28, gpr[28]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR29, gpr[29]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR30, gpr[30]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR31, gpr[31]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_NIP, nip),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_MSR, msr),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_ORIG_R3, orig_gpr3),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_CTR, ctr),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_LNK, link),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_XER, xer),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_CCR, ccr),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_TRAP, trap),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_DAR, dar),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_DSISR, dsisr),
+};
+
+u64 perf_reg_value(struct pt_regs *regs, int idx)
+{
+   if (WARN_ON_ONCE(idx >= PERF_REG_POWERPC_MAX))
+   return 0;
+
+   return regs_get_register(regs, pt_regs_offset[idx]);
+}
+
+int perf_reg_validate(u64 mask)
+{
+   if (!mask || mask & REG_RESERVED)
+   return -EINVAL;
+   return 0;
+}
+
+u64 perf_reg_abi(struct task_struct *task)
+{
+   return PERF_SAMPLE_REGS_ABI_64;
+}
+
+void perf_get_regs

[PATCH V8 1/4] perf/powerpc: assign an id to each powerpc register

2016-01-08 Thread Anju T
The enum definition assigns an 'id' to each register in "struct pt_regs"
of arch/powerpc. The order of these values in the enum definition are
based on the corresponding macros in arch/powerpc/include/uapi/asm/ptrace.h.

Signed-off-by: Anju T <a...@linux.vnet.ibm.com>
Reviewed-by  : Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
---
 arch/powerpc/include/uapi/asm/perf_regs.h | 49 +++
 1 file changed, 49 insertions(+)
 create mode 100644 arch/powerpc/include/uapi/asm/perf_regs.h

diff --git a/arch/powerpc/include/uapi/asm/perf_regs.h 
b/arch/powerpc/include/uapi/asm/perf_regs.h
new file mode 100644
index 000..cfbd068
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/perf_regs.h
@@ -0,0 +1,49 @@
+#ifndef _ASM_POWERPC_PERF_REGS_H
+#define _ASM_POWERPC_PERF_REGS_H
+
+enum perf_event_powerpc_regs {
+   PERF_REG_POWERPC_GPR0,
+   PERF_REG_POWERPC_GPR1,
+   PERF_REG_POWERPC_GPR2,
+   PERF_REG_POWERPC_GPR3,
+   PERF_REG_POWERPC_GPR4,
+   PERF_REG_POWERPC_GPR5,
+   PERF_REG_POWERPC_GPR6,
+   PERF_REG_POWERPC_GPR7,
+   PERF_REG_POWERPC_GPR8,
+   PERF_REG_POWERPC_GPR9,
+   PERF_REG_POWERPC_GPR10,
+   PERF_REG_POWERPC_GPR11,
+   PERF_REG_POWERPC_GPR12,
+   PERF_REG_POWERPC_GPR13,
+   PERF_REG_POWERPC_GPR14,
+   PERF_REG_POWERPC_GPR15,
+   PERF_REG_POWERPC_GPR16,
+   PERF_REG_POWERPC_GPR17,
+   PERF_REG_POWERPC_GPR18,
+   PERF_REG_POWERPC_GPR19,
+   PERF_REG_POWERPC_GPR20,
+   PERF_REG_POWERPC_GPR21,
+   PERF_REG_POWERPC_GPR22,
+   PERF_REG_POWERPC_GPR23,
+   PERF_REG_POWERPC_GPR24,
+   PERF_REG_POWERPC_GPR25,
+   PERF_REG_POWERPC_GPR26,
+   PERF_REG_POWERPC_GPR27,
+   PERF_REG_POWERPC_GPR28,
+   PERF_REG_POWERPC_GPR29,
+   PERF_REG_POWERPC_GPR30,
+   PERF_REG_POWERPC_GPR31,
+   PERF_REG_POWERPC_NIP,
+   PERF_REG_POWERPC_MSR,
+   PERF_REG_POWERPC_ORIG_R3,
+   PERF_REG_POWERPC_CTR,
+   PERF_REG_POWERPC_LNK,
+   PERF_REG_POWERPC_XER,
+   PERF_REG_POWERPC_CCR,
+   PERF_REG_POWERPC_TRAP,
+   PERF_REG_POWERPC_DAR,
+   PERF_REG_POWERPC_DSISR,
+   PERF_REG_POWERPC_MAX,
+};
+#endif /* _ASM_POWERPC_PERF_REGS_H */
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V8 2/4] perf/powerpc: add support for sampling intr machine state

2016-01-08 Thread Anju T
The perf infrastructure uses a bit mask to find out valid
registers to display. Define a register mask for supported
registers defined in asm/perf_regs.h. The bit positions also
correspond to register IDs which is used by perf infrastructure
to fetch the register values. CONFIG_HAVE_PERF_REGS enables
sampling of the interrupted machine state.

Signed-off-by: Anju T <a...@linux.vnet.ibm.com>
Reviewed-by  : Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
---
 arch/powerpc/Kconfig  |  1 +
 arch/powerpc/perf/Makefile|  1 +
 arch/powerpc/perf/perf_regs.c | 85 +++
 3 files changed, 87 insertions(+)
 create mode 100644 arch/powerpc/perf/perf_regs.c

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 9a7057e..c4ce60d 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -119,6 +119,7 @@ config PPC
select GENERIC_ATOMIC64 if PPC32
select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
select HAVE_PERF_EVENTS
+   select HAVE_PERF_REGS
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_HW_BREAKPOINT if PERF_EVENTS && PPC_BOOK3S_64
select ARCH_WANT_IPC_PARSE_VERSION
diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile
index f9c083a..2f2d3d2 100644
--- a/arch/powerpc/perf/Makefile
+++ b/arch/powerpc/perf/Makefile
@@ -8,6 +8,7 @@ obj64-$(CONFIG_PPC_PERF_CTRS)   += power4-pmu.o ppc970-pmu.o 
power5-pmu.o \
   power8-pmu.o
 obj32-$(CONFIG_PPC_PERF_CTRS)  += mpc7450-pmu.o
 
+obj-$(CONFIG_PERF_EVENTS)  += perf_regs.o
 obj-$(CONFIG_FSL_EMB_PERF_EVENT) += core-fsl-emb.o
 obj-$(CONFIG_FSL_EMB_PERF_EVENT_E500) += e500-pmu.o e6500-pmu.o
 
diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c
new file mode 100644
index 000..d32581763
--- /dev/null
+++ b/arch/powerpc/perf/perf_regs.c
@@ -0,0 +1,85 @@
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define PT_REGS_OFFSET(id, r) [id] = offsetof(struct pt_regs, r)
+
+#define REG_RESERVED (~((1ULL << PERF_REG_POWERPC_MAX) - 1))
+
+static unsigned int pt_regs_offset[PERF_REG_POWERPC_MAX] = {
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR0, gpr[0]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR1, gpr[1]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR2, gpr[2]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR3, gpr[3]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR4, gpr[4]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR5, gpr[5]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR6, gpr[6]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR7, gpr[7]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR8, gpr[8]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR9, gpr[9]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR10, gpr[10]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR11, gpr[11]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR12, gpr[12]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR13, gpr[13]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR14, gpr[14]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR15, gpr[15]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR16, gpr[16]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR17, gpr[17]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR18, gpr[18]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR19, gpr[19]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR20, gpr[20]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR21, gpr[21]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR22, gpr[22]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR23, gpr[23]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR24, gpr[24]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR25, gpr[25]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR26, gpr[26]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR27, gpr[27]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR28, gpr[28]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR29, gpr[29]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR30, gpr[30]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR31, gpr[31]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_NIP, nip),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_MSR, msr),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_ORIG_R3, orig_gpr3),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_CTR, ctr),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_LNK, link),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_XER, xer),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_CCR, ccr),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_TRAP, trap),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_DAR, dar),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_DSISR, dsisr),
+};
+
+u64 perf_reg_value(struct pt_regs *regs, int idx)
+{
+   if (WARN_ON_ONCE(idx >= PERF_REG_POWERPC_MAX))
+   return 0;
+
+   return regs_get_register(regs, pt_regs_offset[idx]);
+}
+
+int perf_reg_validate(u64 mask)
+{
+   if (!mask || mask & REG_RESERVED)
+   return -EINVAL;
+   return 0;
+}
+
+u64 perf_reg_abi(struct task_struct *task)
+{
+   return PERF_SAMPLE_REGS_ABI_64;
+}
+
+void perf_get_regs

[PATCH V8 3/4] tools/perf: Map the ID values with register names

2016-01-08 Thread Anju T
Map ID values with corresponding register names. These names are then
displayed when user issues perf record with the -I option
followed by perf report/script with -D option.

To test this patchset,
Eg:

$ perf record -I ls   # record machine state at interrupt
$ perf script -D  # read the perf.data file

Sample output obtained for this patch / output looks like as follows:

178329381464 0x138 [0x180]: PERF_RECORD_SAMPLE(IP, 0x1): 7803/7803: 
0xc000fd9c period: 1 addr: 0
... intr regs: mask 0x3ff ABI 64-bit
 gpr0  0xc01a6420
 gpr1  0xc01e4df039b0
 gpr2  0xc0cdd100
 gpr3  0x1
 gpr4  0xc01e4a96d000
 gpr5  0x29854255ba
 gpr6  0xc00ffa3050b8
 gpr7  0x0
 gpr8  0x0
 gpr9  0x0
 gpr10 0x0
 gpr11 0x0
 gpr12 0x24022822
 gpr13 0xcfe03000
 gpr14 0x0
 gpr15 0xc0d763f8
 gpr16 0x0
 gpr17 0xc01e4ddcf000
 gpr18 0x0
 gpr19 0xc00ffa305000
 gpr20 0xc01e4df038c0
 gpr21 0xc01e40ed7a00
 gpr22 0xc00aa28c
 gpr23 0xc0cdd100
 gpr24 0x0
 gpr25 0xc0cdd100
 gpr26 0xc01e4df038b0
 gpr27 0xfeae
 gpr28 0xc01e4df03880
 gpr29 0xc0dce900
 gpr30 0xc01e4df03890
 gpr31 0xc01e355c7a30
 nip   0xc01a62d8
 msr   0x90009032
 orig_r3 0xc01a6320
 ctr   0xc00a7be0
 lnk   0xc01a6428
 xer   0x0
 ccr   0x24022888
 trap  0xf01
 dar   0xc01e40ed7a00
 dsisr 0x3000c006004
 ... thread: :7803:7803
 .. dso: /root/.debug/.build-id/d0/eb47b06c0d294143af13c50616f638c2d88658
   :7803  7803   178.329381:  1 cycles:  c000fd9c 
.arch_local_irq_restore (/boot/vmlinux)


Signed-off-by: Anju T <a...@linux.vnet.ibm.com>
Reviewed-by  : Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
---
 tools/perf/arch/powerpc/include/perf_regs.h | 64 +
 tools/perf/config/Makefile  |  5 +++
 2 files changed, 69 insertions(+)
 create mode 100644 tools/perf/arch/powerpc/include/perf_regs.h

diff --git a/tools/perf/arch/powerpc/include/perf_regs.h 
b/tools/perf/arch/powerpc/include/perf_regs.h
new file mode 100644
index 000..c725834
--- /dev/null
+++ b/tools/perf/arch/powerpc/include/perf_regs.h
@@ -0,0 +1,64 @@
+#ifndef ARCH_PERF_REGS_H
+#define ARCH_PERF_REGS_H
+
+#include 
+#include 
+#include 
+
+#define PERF_REGS_MASK  ((1ULL << PERF_REG_POWERPC_MAX) - 1)
+#define PERF_REGS_MAX   PERF_REG_POWERPC_MAX
+#define PERF_SAMPLE_REGS_ABI   PERF_SAMPLE_REGS_ABI_64
+
+#define PERF_REG_IP PERF_REG_POWERPC_NIP
+#define PERF_REG_SP PERF_REG_POWERPC_GPR1
+
+static const char *reg_names[] = {
+   [PERF_REG_POWERPC_GPR0] = "gpr0",
+   [PERF_REG_POWERPC_GPR1] = "gpr1",
+   [PERF_REG_POWERPC_GPR2] = "gpr2",
+   [PERF_REG_POWERPC_GPR3] = "gpr3",
+   [PERF_REG_POWERPC_GPR4] = "gpr4",
+   [PERF_REG_POWERPC_GPR5] = "gpr5",
+   [PERF_REG_POWERPC_GPR6] = "gpr6",
+   [PERF_REG_POWERPC_GPR7] = "gpr7",
+   [PERF_REG_POWERPC_GPR8] = "gpr8",
+   [PERF_REG_POWERPC_GPR9] = "gpr9",
+   [PERF_REG_POWERPC_GPR10] = "gpr10",
+   [PERF_REG_POWERPC_GPR11] = "gpr11",
+   [PERF_REG_POWERPC_GPR12] = "gpr12",
+   [PERF_REG_POWERPC_GPR13] = "gpr13",
+   [PERF_REG_POWERPC_GPR14] = "gpr14",
+   [PERF_REG_POWERPC_GPR15] = "gpr15",
+   [PERF_REG_POWERPC_GPR16] = "gpr16",
+   [PERF_REG_POWERPC_GPR17] = "gpr17",
+   [PERF_REG_POWERPC_GPR18] = "gpr18",
+   [PERF_REG_POWERPC_GPR19] = "gpr19",
+   [PERF_REG_POWERPC_GPR20] = "gpr20",
+   [PERF_REG_POWERPC_GPR21] = "gpr21",
+   [PERF_REG_POWERPC_GPR22] = "gpr22",
+   [PERF_REG_POWERPC_GPR23] = "gpr23",
+   [PERF_REG_POWERPC_GPR24] = "gpr24",
+   [PERF_REG_POWERPC_GPR25] = "gpr25",
+   [PERF_REG_POWERPC_GPR26] = "gpr26",
+   [PERF_REG_POWERPC_GPR27] = "gpr27",
+   [PERF_REG_POWERPC_GPR28] = "gpr28",
+   [PERF_REG_POWERPC_GPR29] = "gpr29",
+   [PERF_REG_POWERPC_GPR30] = "gpr30",
+   [PERF_REG_POWERPC_GPR31] = "gpr31",
+   [PERF_REG_POWERPC_NIP] = "nip",
+   [PERF_REG_POWERPC_MSR] = "msr",
+   [PERF_REG_POWERPC_ORIG_R3] = "orig_r3",
+   [PERF_REG_POWERPC_CTR] = "ctr",
+   [PERF_REG_POWERPC_LNK] = "lnk",
+   [PERF_REG_POWERPC_XER] = "xer",
+   [PERF_REG_POWERPC_CCR] = "ccr",
+   [PERF_REG_POWERPC_TRAP] = "trap",
+   [PERF_REG_POWERPC_DAR] = "dar",
+   [PERF_REG_POWERPC_DSIS

[PATCH V1 4/4] tool/perf: Add sample_reg_mask to include all perf_regs regs

2016-01-08 Thread Anju T
From: Madhavan Srinivasan 

Add sample_reg_mask array with pt_regs registers.
This is needed for printing supported regs ( -I? option).

Signed-off-by: Madhavan Srinivasan 
---
 tools/perf/arch/powerpc/util/Build   |  1 +
 tools/perf/arch/powerpc/util/perf_regs.c | 48 
 2 files changed, 49 insertions(+)
 create mode 100644 tools/perf/arch/powerpc/util/perf_regs.c

diff --git a/tools/perf/arch/powerpc/util/Build 
b/tools/perf/arch/powerpc/util/Build
index 7b8b0d1..3deb1bc 100644
--- a/tools/perf/arch/powerpc/util/Build
+++ b/tools/perf/arch/powerpc/util/Build
@@ -1,5 +1,6 @@
 libperf-y += header.o
 libperf-y += sym-handling.o
+libperf-y += perf_regs.o
 
 libperf-$(CONFIG_DWARF) += dwarf-regs.o
 libperf-$(CONFIG_DWARF) += skip-callchain-idx.o
diff --git a/tools/perf/arch/powerpc/util/perf_regs.c 
b/tools/perf/arch/powerpc/util/perf_regs.c
new file mode 100644
index 000..0b0ec65
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/perf_regs.c
@@ -0,0 +1,48 @@
+#include "../../perf.h"
+#include "../../util/perf_regs.h"
+
+const struct sample_reg sample_reg_masks[] = {
+   SMPL_REG(gpr0, PERF_REG_POWERPC_GPR0),
+   SMPL_REG(gpr1, PERF_REG_POWERPC_GPR1),
+   SMPL_REG(gpr2, PERF_REG_POWERPC_GPR2),
+   SMPL_REG(gpr3, PERF_REG_POWERPC_GPR3),
+   SMPL_REG(gpr4, PERF_REG_POWERPC_GPR4),
+   SMPL_REG(gpr5, PERF_REG_POWERPC_GPR5),
+   SMPL_REG(gpr6, PERF_REG_POWERPC_GPR6),
+   SMPL_REG(gpr7, PERF_REG_POWERPC_GPR7),
+   SMPL_REG(gpr8, PERF_REG_POWERPC_GPR8),
+   SMPL_REG(gpr9, PERF_REG_POWERPC_GPR9),
+   SMPL_REG(gpr10, PERF_REG_POWERPC_GPR10),
+   SMPL_REG(gpr11, PERF_REG_POWERPC_GPR11),
+   SMPL_REG(gpr12, PERF_REG_POWERPC_GPR12),
+   SMPL_REG(gpr13, PERF_REG_POWERPC_GPR13),
+   SMPL_REG(gpr14, PERF_REG_POWERPC_GPR14),
+   SMPL_REG(gpr15, PERF_REG_POWERPC_GPR15),
+   SMPL_REG(gpr16, PERF_REG_POWERPC_GPR16),
+   SMPL_REG(gpr17, PERF_REG_POWERPC_GPR17),
+   SMPL_REG(gpr18, PERF_REG_POWERPC_GPR18),
+   SMPL_REG(gpr19, PERF_REG_POWERPC_GPR19),
+   SMPL_REG(gpr20, PERF_REG_POWERPC_GPR20),
+   SMPL_REG(gpr21, PERF_REG_POWERPC_GPR21),
+   SMPL_REG(gpr22, PERF_REG_POWERPC_GPR22),
+   SMPL_REG(gpr23, PERF_REG_POWERPC_GPR23),
+   SMPL_REG(gpr24, PERF_REG_POWERPC_GPR24),
+   SMPL_REG(gpr25, PERF_REG_POWERPC_GPR25),
+   SMPL_REG(gpr26, PERF_REG_POWERPC_GPR26),
+   SMPL_REG(gpr27, PERF_REG_POWERPC_GPR27),
+   SMPL_REG(gpr28, PERF_REG_POWERPC_GPR28),
+   SMPL_REG(gpr29, PERF_REG_POWERPC_GPR29),
+   SMPL_REG(gpr30, PERF_REG_POWERPC_GPR30),
+   SMPL_REG(gpr31, PERF_REG_POWERPC_GPR31),
+   SMPL_REG(nip, PERF_REG_POWERPC_NIP),
+   SMPL_REG(msr, PERF_REG_POWERPC_MSR),
+   SMPL_REG(orig_r3, PERF_REG_POWERPC_ORIG_R3),
+   SMPL_REG(ctr, PERF_REG_POWERPC_CTR),
+   SMPL_REG(link, PERF_REG_POWERPC_LNK),
+   SMPL_REG(xer, PERF_REG_POWERPC_XER),
+   SMPL_REG(ccr, PERF_REG_POWERPC_CCR),
+   SMPL_REG(trap, PERF_REG_POWERPC_TRAP),
+   SMPL_REG(dar, PERF_REG_POWERPC_DAR),
+   SMPL_REG(dsisr, PERF_REG_POWERPC_DSISR),
+   SMPL_REG_END
+};
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V8 0/4] perf/powerpc: Add ability to sample intr machine state in powerpc

2016-01-08 Thread Anju T
This short patch series adds the ability to sample the interrupted
machine state for each hardware sample.

To test this patchset,
Eg:

$ perf record -I?   # list supported registers

output:

available registers: gpr0 gpr1 gpr2 gpr3 gpr4 gpr5 gpr6 gpr7 gpr8 gpr9 gpr10 
gpr11 gpr12 gpr13 gpr14 gpr15 gpr16 gpr17 gpr18 gpr19 gpr20 gpr21 gpr22 gpr23 
gpr24 gpr25 gpr26 gpr27 gpr28 gpr29 gpr30 gpr31 nip msr orig_r3 ctr link xer 
ccr trap dar dsisr
usage: perf record [] []
or: perf record [] --  []
 -I, --intr-regs[=]
sample selected machine registers on interrupt, use -I ? to list register names


$ perf record -I ls   # record machine state at interrupt
$ perf script -D  # read the perf.data file

Samplfdoutput obtained for this patchset/ output looks like as follows:

178329381464 0x138 [0x180]: PERF_RECORD_SAMPLE(IP, 0x1): 7803/7803: 
0xc000fd9c period: 1 addr: 0
... intr regs: mask 0x3ff ABI 64-bit
 gpr0  0xc01a6420
 gpr1  0xc01e4df039b0
 gpr2  0xc0cdd100
 gpr3  0x1
 gpr4  0xc01e4a96d000
 gpr5  0x29854255ba
 gpr6  0xc00ffa3050b8
 gpr7  0x0
 gpr8  0x0
 gpr9  0x0
 gpr10 0x0
 gpr11 0x0
 gpr12 0x24022822
 gpr13 0xcfe03000
 gpr14 0x0
 gpr15 0xc0d763f8
 gpr16 0x0
 gpr17 0xc01e4ddcf000
 gpr18 0x0
 gpr19 0xc00ffa305000
 gpr20 0xc01e4df038c0
 gpr21 0xc01e40ed7a00
 gpr22 0xc00aa28c
 gpr23 0xc0cdd100
 gpr24 0x0
 gpr25 0xc0cdd100
 gpr26 0xc01e4df038b0
 gpr27 0xfeae
 gpr28 0xc01e4df03880
 gpr29 0xc0dce900
 gpr30 0xc01e4df03890
 gpr31 0xc01e355c7a30
 nip   0xc01a62d8
 msr   0x90009032
 orig_r3 0xc01a6320
 ctr   0xc00a7be0
 lnk   0xc01a6428
 xer   0x0
 ccr   0x24022888
 trap  0xf01
 dar   0xc01e40ed7a00
 dsisr 0x3000c006004
 ... thread: :7803:7803
 .. dso: /root/.debug/.build-id/d0/eb47b06c0d294143af13c50616f638c2d88658
   :7803  7803   178.329381:  1 cycles:  c000fd9c 
.arch_local_irq_restore (/boot/vmlinux)

Changes from V7:

- Addressed the new line issue in 3rd patch.

Changes from V6:

- Corrected the typo in patch  tools/perf: Map the ID values with register 
names.
  ie #define PERF_REG_SP  PERF_REG_POWERPC_R1 should be #define PERF_REG_SP   
PERF_REG_POWERPC_GPR1


Changes from V5:

- Enabled perf_sample_regs_user also in this patch set.Functions added in 
   arch/powerpc/perf/perf_regs.c
- Added Maddy's patch to this patchset for enabling -I? option which will
  list the supported register names.


Changes from V4:

- Removed the softe and MQ from all patches
- Switch case is replaced with an array in the 3rd patch

Changes from V3:

- Addressed the comments by Sukadev regarding the nits in the descriptions.
- Modified the subject of first patch.
- Included the sample output in the 3rd patch also.

Changes from V2:

- tools/perf/config/Makefile is moved to the patch tools/perf.
- The patchset is reordered.
- perf_regs_load() function is used for the dwarf unwind test.Since it is not 
required here,
  it is removed from tools/perf/arch/powerpc/include/perf_regs.h
- PERF_REGS_POWERPC_RESULT is removed.

Changes from V1:

- Solved the name missmatch issue in the from and signed-off field of the patch 
series.
- Added necessary comments in the 3rd patch ie perf/powerpc ,as suggested by 
Maddy.



Anju T (3):
  perf/powerpc: assign an id to each powerpc register
  perf/powerpc: add support for sampling intr machine state
  tools/perf: Map the ID values with register names

Madhavan Srinivasan (1):
  tool/perf: Add sample_reg_mask to include all perf_regs regs

 arch/powerpc/Kconfig|  1 +
 arch/powerpc/include/uapi/asm/perf_regs.h   | 49 +
 arch/powerpc/perf/Makefile  |  1 +
 arch/powerpc/perf/perf_regs.c   | 85 +
 tools/perf/arch/powerpc/include/perf_regs.h | 64 ++
 tools/perf/arch/powerpc/util/Build  |  1 +
 tools/perf/arch/powerpc/util/perf_regs.c| 48 
 tools/perf/config/Makefile  |  5 ++
 8 files changed, 254 insertions(+)
 create mode 100644 arch/powerpc/include/uapi/asm/perf_regs.h
 create mode 100644 arch/powerpc/perf/perf_regs.c
 create mode 100644 tools/perf/arch/powerpc/include/perf_regs.h
 create mode 100644 tools/perf/arch/powerpc/util/perf_regs.c

-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V9 3/4] tools/perf: Map the ID values with register names

2016-01-08 Thread Anju T
Map ID values with corresponding register names. These names are then
displayed when user issues perf record with the -I option
followed by perf report/script with -D option.

To test this patchset,
Eg:

$ perf record -I ls   # record machine state at interrupt
$ perf script -D  # read the perf.data file

Sample output obtained for this patch / output looks like as follows:

178329381464 0x138 [0x180]: PERF_RECORD_SAMPLE(IP, 0x1): 7803/7803: 
0xc000fd9c period: 1 addr: 0
... intr regs: mask 0x3ff ABI 64-bit
 gpr0  0xc01a6420
 gpr1  0xc01e4df039b0
 gpr2  0xc0cdd100
 gpr3  0x1
 gpr4  0xc01e4a96d000
 gpr5  0x29854255ba
 gpr6  0xc00ffa3050b8
 gpr7  0x0
 gpr8  0x0
 gpr9  0x0
 gpr10 0x0
 gpr11 0x0
 gpr12 0x24022822
 gpr13 0xcfe03000
 gpr14 0x0
 gpr15 0xc0d763f8
 gpr16 0x0
 gpr17 0xc01e4ddcf000
 gpr18 0x0
 gpr19 0xc00ffa305000
 gpr20 0xc01e4df038c0
 gpr21 0xc01e40ed7a00
 gpr22 0xc00aa28c
 gpr23 0xc0cdd100
 gpr24 0x0
 gpr25 0xc0cdd100
 gpr26 0xc01e4df038b0
 gpr27 0xfeae
 gpr28 0xc01e4df03880
 gpr29 0xc0dce900
 gpr30 0xc01e4df03890
 gpr31 0xc01e355c7a30
 nip   0xc01a62d8
 msr   0x90009032
 orig_r3 0xc01a6320
 ctr   0xc00a7be0
 lnk   0xc01a6428
 xer   0x0
 ccr   0x24022888
 trap  0xf01
 dar   0xc01e40ed7a00
 dsisr 0x3000c006004
 ... thread: :7803:7803
 .. dso: /root/.debug/.build-id/d0/eb47b06c0d294143af13c50616f638c2d88658
   :7803  7803   178.329381:  1 cycles:  c000fd9c 
.arch_local_irq_restore (/boot/vmlinux)


Signed-off-by: Anju T <a...@linux.vnet.ibm.com>
Reviewed-by  : Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
---
 tools/perf/arch/powerpc/include/perf_regs.h | 64 +
 tools/perf/config/Makefile  |  5 +++
 2 files changed, 69 insertions(+)
 create mode 100644 tools/perf/arch/powerpc/include/perf_regs.h

diff --git a/tools/perf/arch/powerpc/include/perf_regs.h 
b/tools/perf/arch/powerpc/include/perf_regs.h
new file mode 100644
index 000..c725834
--- /dev/null
+++ b/tools/perf/arch/powerpc/include/perf_regs.h
@@ -0,0 +1,64 @@
+#ifndef ARCH_PERF_REGS_H
+#define ARCH_PERF_REGS_H
+
+#include 
+#include 
+#include 
+
+#define PERF_REGS_MASK  ((1ULL << PERF_REG_POWERPC_MAX) - 1)
+#define PERF_REGS_MAX   PERF_REG_POWERPC_MAX
+#define PERF_SAMPLE_REGS_ABI   PERF_SAMPLE_REGS_ABI_64
+
+#define PERF_REG_IP PERF_REG_POWERPC_NIP
+#define PERF_REG_SP PERF_REG_POWERPC_GPR1
+
+static const char *reg_names[] = {
+   [PERF_REG_POWERPC_GPR0] = "gpr0",
+   [PERF_REG_POWERPC_GPR1] = "gpr1",
+   [PERF_REG_POWERPC_GPR2] = "gpr2",
+   [PERF_REG_POWERPC_GPR3] = "gpr3",
+   [PERF_REG_POWERPC_GPR4] = "gpr4",
+   [PERF_REG_POWERPC_GPR5] = "gpr5",
+   [PERF_REG_POWERPC_GPR6] = "gpr6",
+   [PERF_REG_POWERPC_GPR7] = "gpr7",
+   [PERF_REG_POWERPC_GPR8] = "gpr8",
+   [PERF_REG_POWERPC_GPR9] = "gpr9",
+   [PERF_REG_POWERPC_GPR10] = "gpr10",
+   [PERF_REG_POWERPC_GPR11] = "gpr11",
+   [PERF_REG_POWERPC_GPR12] = "gpr12",
+   [PERF_REG_POWERPC_GPR13] = "gpr13",
+   [PERF_REG_POWERPC_GPR14] = "gpr14",
+   [PERF_REG_POWERPC_GPR15] = "gpr15",
+   [PERF_REG_POWERPC_GPR16] = "gpr16",
+   [PERF_REG_POWERPC_GPR17] = "gpr17",
+   [PERF_REG_POWERPC_GPR18] = "gpr18",
+   [PERF_REG_POWERPC_GPR19] = "gpr19",
+   [PERF_REG_POWERPC_GPR20] = "gpr20",
+   [PERF_REG_POWERPC_GPR21] = "gpr21",
+   [PERF_REG_POWERPC_GPR22] = "gpr22",
+   [PERF_REG_POWERPC_GPR23] = "gpr23",
+   [PERF_REG_POWERPC_GPR24] = "gpr24",
+   [PERF_REG_POWERPC_GPR25] = "gpr25",
+   [PERF_REG_POWERPC_GPR26] = "gpr26",
+   [PERF_REG_POWERPC_GPR27] = "gpr27",
+   [PERF_REG_POWERPC_GPR28] = "gpr28",
+   [PERF_REG_POWERPC_GPR29] = "gpr29",
+   [PERF_REG_POWERPC_GPR30] = "gpr30",
+   [PERF_REG_POWERPC_GPR31] = "gpr31",
+   [PERF_REG_POWERPC_NIP] = "nip",
+   [PERF_REG_POWERPC_MSR] = "msr",
+   [PERF_REG_POWERPC_ORIG_R3] = "orig_r3",
+   [PERF_REG_POWERPC_CTR] = "ctr",
+   [PERF_REG_POWERPC_LNK] = "lnk",
+   [PERF_REG_POWERPC_XER] = "xer",
+   [PERF_REG_POWERPC_CCR] = "ccr",
+   [PERF_REG_POWERPC_TRAP] = "trap",
+   [PERF_REG_POWERPC_DAR] = "dar",
+   [PERF_REG_POWERPC_DSIS

[PATCH V9 0/4] perf/powerpc: Add ability to sample intr machine state in powerpc

2016-01-08 Thread Anju T
This short patch series adds the ability to sample the interrupted
machine state for each hardware sample.

To test this patchset,
Eg:

$ perf record -I?   # list supported registers

output:

available registers: gpr0 gpr1 gpr2 gpr3 gpr4 gpr5 gpr6 gpr7 gpr8 gpr9 gpr10 
gpr11 gpr12 gpr13 gpr14 gpr15 gpr16 gpr17 gpr18 gpr19 gpr20 gpr21 gpr22 gpr23 
gpr24 gpr25 gpr26 gpr27 gpr28 gpr29 gpr30 gpr31 nip msr orig_r3 ctr link xer 
ccr trap dar dsisr
usage: perf record [] []
or: perf record [] --  []
 -I, --intr-regs[=]
sample selected machine registers on interrupt, use -I ? to list register names


$ perf record -I ls   # record machine state at interrupt
$ perf script -D  # read the perf.data file

Samplfdoutput obtained for this patchset/ output looks like as follows:

178329381464 0x138 [0x180]: PERF_RECORD_SAMPLE(IP, 0x1): 7803/7803: 
0xc000fd9c period: 1 addr: 0
... intr regs: mask 0x3ff ABI 64-bit
 gpr0  0xc01a6420
 gpr1  0xc01e4df039b0
 gpr2  0xc0cdd100
 gpr3  0x1
 gpr4  0xc01e4a96d000
 gpr5  0x29854255ba
 gpr6  0xc00ffa3050b8
 gpr7  0x0
 gpr8  0x0
 gpr9  0x0
 gpr10 0x0
 gpr11 0x0
 gpr12 0x24022822
 gpr13 0xcfe03000
 gpr14 0x0
 gpr15 0xc0d763f8
 gpr16 0x0
 gpr17 0xc01e4ddcf000
 gpr18 0x0
 gpr19 0xc00ffa305000
 gpr20 0xc01e4df038c0
 gpr21 0xc01e40ed7a00
 gpr22 0xc00aa28c
 gpr23 0xc0cdd100
 gpr24 0x0
 gpr25 0xc0cdd100
 gpr26 0xc01e4df038b0
 gpr27 0xfeae
 gpr28 0xc01e4df03880
 gpr29 0xc0dce900
 gpr30 0xc01e4df03890
 gpr31 0xc01e355c7a30
 nip   0xc01a62d8
 msr   0x90009032
 orig_r3 0xc01a6320
 ctr   0xc00a7be0
 lnk   0xc01a6428
 xer   0x0
 ccr   0x24022888
 trap  0xf01
 dar   0xc01e40ed7a00
 dsisr 0x3000c006004
 ... thread: :7803:7803
 .. dso: /root/.debug/.build-id/d0/eb47b06c0d294143af13c50616f638c2d88658
   :7803  7803   178.329381:  1 cycles:  c000fd9c 
.arch_local_irq_restore (/boot/vmlinux)

changes from V8:

- Corrected the indentation issue in the Makefile mentioned in 3rd patch

Changes from V7:

- Addressed the new line issue in 3rd patch.

Changes from V6:

- Corrected the typo in patch  tools/perf: Map the ID values with register 
names.
  ie #define PERF_REG_SP  PERF_REG_POWERPC_R1 should be #define PERF_REG_SP   
PERF_REG_POWERPC_GPR1


Changes from V5:

- Enabled perf_sample_regs_user also in this patch set.Functions added in 
   arch/powerpc/perf/perf_regs.c
- Added Maddy's patch to this patchset for enabling -I? option which will
  list the supported register names.


Changes from V4:

- Removed the softe and MQ from all patches
- Switch case is replaced with an array in the 3rd patch

Changes from V3:

- Addressed the comments by Sukadev regarding the nits in the descriptions.
- Modified the subject of first patch.
- Included the sample output in the 3rd patch also.

Changes from V2:

- tools/perf/config/Makefile is moved to the patch tools/perf.
- The patchset is reordered.
- perf_regs_load() function is used for the dwarf unwind test.Since it is not 
required here,
  it is removed from tools/perf/arch/powerpc/include/perf_regs.h
- PERF_REGS_POWERPC_RESULT is removed.

Changes from V1:

- Solved the name missmatch issue in the from and signed-off field of the patch 
series.
- Added necessary comments in the 3rd patch ie perf/powerpc ,as suggested by 
Maddy.



Anju T (3):
  perf/powerpc: assign an id to each powerpc register
  perf/powerpc: add support for sampling intr machine state
  tools/perf: Map the ID values with register names

Madhavan Srinivasan (1):
  tool/perf: Add sample_reg_mask to include all perf_regs regs

 arch/powerpc/Kconfig|  1 +
 arch/powerpc/include/uapi/asm/perf_regs.h   | 49 +
 arch/powerpc/perf/Makefile  |  1 +
 arch/powerpc/perf/perf_regs.c   | 85 +
 tools/perf/arch/powerpc/include/perf_regs.h | 64 ++
 tools/perf/arch/powerpc/util/Build  |  1 +
 tools/perf/arch/powerpc/util/perf_regs.c| 48 
 tools/perf/config/Makefile  |  5 ++
 8 files changed, 254 insertions(+)
 create mode 100644 arch/powerpc/include/uapi/asm/perf_regs.h
 create mode 100644 arch/powerpc/perf/perf_regs.c
 create mode 100644 tools/perf/arch/powerpc/include/perf_regs.h
 create mode 100644 tools/perf/arch/powerpc/util/perf_regs.c

-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V9 1/4] perf/powerpc: assign an id to each powerpc register

2016-01-08 Thread Anju T
The enum definition assigns an 'id' to each register in "struct pt_regs"
of arch/powerpc. The order of these values in the enum definition are
based on the corresponding macros in arch/powerpc/include/uapi/asm/ptrace.h.

Signed-off-by: Anju T <a...@linux.vnet.ibm.com>
Reviewed-by  : Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
---
 arch/powerpc/include/uapi/asm/perf_regs.h | 49 +++
 1 file changed, 49 insertions(+)
 create mode 100644 arch/powerpc/include/uapi/asm/perf_regs.h

diff --git a/arch/powerpc/include/uapi/asm/perf_regs.h 
b/arch/powerpc/include/uapi/asm/perf_regs.h
new file mode 100644
index 000..cfbd068
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/perf_regs.h
@@ -0,0 +1,49 @@
+#ifndef _ASM_POWERPC_PERF_REGS_H
+#define _ASM_POWERPC_PERF_REGS_H
+
+enum perf_event_powerpc_regs {
+   PERF_REG_POWERPC_GPR0,
+   PERF_REG_POWERPC_GPR1,
+   PERF_REG_POWERPC_GPR2,
+   PERF_REG_POWERPC_GPR3,
+   PERF_REG_POWERPC_GPR4,
+   PERF_REG_POWERPC_GPR5,
+   PERF_REG_POWERPC_GPR6,
+   PERF_REG_POWERPC_GPR7,
+   PERF_REG_POWERPC_GPR8,
+   PERF_REG_POWERPC_GPR9,
+   PERF_REG_POWERPC_GPR10,
+   PERF_REG_POWERPC_GPR11,
+   PERF_REG_POWERPC_GPR12,
+   PERF_REG_POWERPC_GPR13,
+   PERF_REG_POWERPC_GPR14,
+   PERF_REG_POWERPC_GPR15,
+   PERF_REG_POWERPC_GPR16,
+   PERF_REG_POWERPC_GPR17,
+   PERF_REG_POWERPC_GPR18,
+   PERF_REG_POWERPC_GPR19,
+   PERF_REG_POWERPC_GPR20,
+   PERF_REG_POWERPC_GPR21,
+   PERF_REG_POWERPC_GPR22,
+   PERF_REG_POWERPC_GPR23,
+   PERF_REG_POWERPC_GPR24,
+   PERF_REG_POWERPC_GPR25,
+   PERF_REG_POWERPC_GPR26,
+   PERF_REG_POWERPC_GPR27,
+   PERF_REG_POWERPC_GPR28,
+   PERF_REG_POWERPC_GPR29,
+   PERF_REG_POWERPC_GPR30,
+   PERF_REG_POWERPC_GPR31,
+   PERF_REG_POWERPC_NIP,
+   PERF_REG_POWERPC_MSR,
+   PERF_REG_POWERPC_ORIG_R3,
+   PERF_REG_POWERPC_CTR,
+   PERF_REG_POWERPC_LNK,
+   PERF_REG_POWERPC_XER,
+   PERF_REG_POWERPC_CCR,
+   PERF_REG_POWERPC_TRAP,
+   PERF_REG_POWERPC_DAR,
+   PERF_REG_POWERPC_DSISR,
+   PERF_REG_POWERPC_MAX,
+};
+#endif /* _ASM_POWERPC_PERF_REGS_H */
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V9 2/4] perf/powerpc: add support for sampling intr machine state

2016-01-08 Thread Anju T
The perf infrastructure uses a bit mask to find out valid
registers to display. Define a register mask for supported
registers defined in asm/perf_regs.h. The bit positions also
correspond to register IDs which is used by perf infrastructure
to fetch the register values. CONFIG_HAVE_PERF_REGS enables
sampling of the interrupted machine state.

Signed-off-by: Anju T <a...@linux.vnet.ibm.com>
Reviewed-by  : Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
---
 arch/powerpc/Kconfig  |  1 +
 arch/powerpc/perf/Makefile|  1 +
 arch/powerpc/perf/perf_regs.c | 85 +++
 3 files changed, 87 insertions(+)
 create mode 100644 arch/powerpc/perf/perf_regs.c

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 9a7057e..c4ce60d 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -119,6 +119,7 @@ config PPC
select GENERIC_ATOMIC64 if PPC32
select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
select HAVE_PERF_EVENTS
+   select HAVE_PERF_REGS
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_HW_BREAKPOINT if PERF_EVENTS && PPC_BOOK3S_64
select ARCH_WANT_IPC_PARSE_VERSION
diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile
index f9c083a..2f2d3d2 100644
--- a/arch/powerpc/perf/Makefile
+++ b/arch/powerpc/perf/Makefile
@@ -8,6 +8,7 @@ obj64-$(CONFIG_PPC_PERF_CTRS)   += power4-pmu.o ppc970-pmu.o 
power5-pmu.o \
   power8-pmu.o
 obj32-$(CONFIG_PPC_PERF_CTRS)  += mpc7450-pmu.o
 
+obj-$(CONFIG_PERF_EVENTS)  += perf_regs.o
 obj-$(CONFIG_FSL_EMB_PERF_EVENT) += core-fsl-emb.o
 obj-$(CONFIG_FSL_EMB_PERF_EVENT_E500) += e500-pmu.o e6500-pmu.o
 
diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c
new file mode 100644
index 000..d32581763
--- /dev/null
+++ b/arch/powerpc/perf/perf_regs.c
@@ -0,0 +1,85 @@
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define PT_REGS_OFFSET(id, r) [id] = offsetof(struct pt_regs, r)
+
+#define REG_RESERVED (~((1ULL << PERF_REG_POWERPC_MAX) - 1))
+
+static unsigned int pt_regs_offset[PERF_REG_POWERPC_MAX] = {
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR0, gpr[0]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR1, gpr[1]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR2, gpr[2]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR3, gpr[3]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR4, gpr[4]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR5, gpr[5]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR6, gpr[6]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR7, gpr[7]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR8, gpr[8]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR9, gpr[9]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR10, gpr[10]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR11, gpr[11]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR12, gpr[12]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR13, gpr[13]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR14, gpr[14]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR15, gpr[15]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR16, gpr[16]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR17, gpr[17]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR18, gpr[18]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR19, gpr[19]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR20, gpr[20]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR21, gpr[21]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR22, gpr[22]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR23, gpr[23]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR24, gpr[24]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR25, gpr[25]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR26, gpr[26]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR27, gpr[27]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR28, gpr[28]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR29, gpr[29]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR30, gpr[30]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR31, gpr[31]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_NIP, nip),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_MSR, msr),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_ORIG_R3, orig_gpr3),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_CTR, ctr),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_LNK, link),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_XER, xer),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_CCR, ccr),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_TRAP, trap),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_DAR, dar),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_DSISR, dsisr),
+};
+
+u64 perf_reg_value(struct pt_regs *regs, int idx)
+{
+   if (WARN_ON_ONCE(idx >= PERF_REG_POWERPC_MAX))
+   return 0;
+
+   return regs_get_register(regs, pt_regs_offset[idx]);
+}
+
+int perf_reg_validate(u64 mask)
+{
+   if (!mask || mask & REG_RESERVED)
+   return -EINVAL;
+   return 0;
+}
+
+u64 perf_reg_abi(struct task_struct *task)
+{
+   return PERF_SAMPLE_REGS_ABI_64;
+}
+
+void perf_get_regs

[PATCH V1 4/4] tool/perf: Add sample_reg_mask to include all perf_regs regs

2016-01-08 Thread Anju T
From: Madhavan Srinivasan 

Add sample_reg_mask array with pt_regs registers.
This is needed for printing supported regs ( -I? option).

Signed-off-by: Madhavan Srinivasan 
---
 tools/perf/arch/powerpc/util/Build   |  1 +
 tools/perf/arch/powerpc/util/perf_regs.c | 48 
 2 files changed, 49 insertions(+)
 create mode 100644 tools/perf/arch/powerpc/util/perf_regs.c

diff --git a/tools/perf/arch/powerpc/util/Build 
b/tools/perf/arch/powerpc/util/Build
index 7b8b0d1..3deb1bc 100644
--- a/tools/perf/arch/powerpc/util/Build
+++ b/tools/perf/arch/powerpc/util/Build
@@ -1,5 +1,6 @@
 libperf-y += header.o
 libperf-y += sym-handling.o
+libperf-y += perf_regs.o
 
 libperf-$(CONFIG_DWARF) += dwarf-regs.o
 libperf-$(CONFIG_DWARF) += skip-callchain-idx.o
diff --git a/tools/perf/arch/powerpc/util/perf_regs.c 
b/tools/perf/arch/powerpc/util/perf_regs.c
new file mode 100644
index 000..0b0ec65
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/perf_regs.c
@@ -0,0 +1,48 @@
+#include "../../perf.h"
+#include "../../util/perf_regs.h"
+
+const struct sample_reg sample_reg_masks[] = {
+   SMPL_REG(gpr0, PERF_REG_POWERPC_GPR0),
+   SMPL_REG(gpr1, PERF_REG_POWERPC_GPR1),
+   SMPL_REG(gpr2, PERF_REG_POWERPC_GPR2),
+   SMPL_REG(gpr3, PERF_REG_POWERPC_GPR3),
+   SMPL_REG(gpr4, PERF_REG_POWERPC_GPR4),
+   SMPL_REG(gpr5, PERF_REG_POWERPC_GPR5),
+   SMPL_REG(gpr6, PERF_REG_POWERPC_GPR6),
+   SMPL_REG(gpr7, PERF_REG_POWERPC_GPR7),
+   SMPL_REG(gpr8, PERF_REG_POWERPC_GPR8),
+   SMPL_REG(gpr9, PERF_REG_POWERPC_GPR9),
+   SMPL_REG(gpr10, PERF_REG_POWERPC_GPR10),
+   SMPL_REG(gpr11, PERF_REG_POWERPC_GPR11),
+   SMPL_REG(gpr12, PERF_REG_POWERPC_GPR12),
+   SMPL_REG(gpr13, PERF_REG_POWERPC_GPR13),
+   SMPL_REG(gpr14, PERF_REG_POWERPC_GPR14),
+   SMPL_REG(gpr15, PERF_REG_POWERPC_GPR15),
+   SMPL_REG(gpr16, PERF_REG_POWERPC_GPR16),
+   SMPL_REG(gpr17, PERF_REG_POWERPC_GPR17),
+   SMPL_REG(gpr18, PERF_REG_POWERPC_GPR18),
+   SMPL_REG(gpr19, PERF_REG_POWERPC_GPR19),
+   SMPL_REG(gpr20, PERF_REG_POWERPC_GPR20),
+   SMPL_REG(gpr21, PERF_REG_POWERPC_GPR21),
+   SMPL_REG(gpr22, PERF_REG_POWERPC_GPR22),
+   SMPL_REG(gpr23, PERF_REG_POWERPC_GPR23),
+   SMPL_REG(gpr24, PERF_REG_POWERPC_GPR24),
+   SMPL_REG(gpr25, PERF_REG_POWERPC_GPR25),
+   SMPL_REG(gpr26, PERF_REG_POWERPC_GPR26),
+   SMPL_REG(gpr27, PERF_REG_POWERPC_GPR27),
+   SMPL_REG(gpr28, PERF_REG_POWERPC_GPR28),
+   SMPL_REG(gpr29, PERF_REG_POWERPC_GPR29),
+   SMPL_REG(gpr30, PERF_REG_POWERPC_GPR30),
+   SMPL_REG(gpr31, PERF_REG_POWERPC_GPR31),
+   SMPL_REG(nip, PERF_REG_POWERPC_NIP),
+   SMPL_REG(msr, PERF_REG_POWERPC_MSR),
+   SMPL_REG(orig_r3, PERF_REG_POWERPC_ORIG_R3),
+   SMPL_REG(ctr, PERF_REG_POWERPC_CTR),
+   SMPL_REG(link, PERF_REG_POWERPC_LNK),
+   SMPL_REG(xer, PERF_REG_POWERPC_XER),
+   SMPL_REG(ccr, PERF_REG_POWERPC_CCR),
+   SMPL_REG(trap, PERF_REG_POWERPC_TRAP),
+   SMPL_REG(dar, PERF_REG_POWERPC_DAR),
+   SMPL_REG(dsisr, PERF_REG_POWERPC_DSISR),
+   SMPL_REG_END
+};
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V7 1/4] perf/powerpc: assign an id to each powerpc register

2016-01-06 Thread Anju T
The enum definition assigns an 'id' to each register in "struct pt_regs"
of arch/powerpc. The order of these values in the enum definition are
based on the corresponding macros in arch/powerpc/include/uapi/asm/ptrace.h.

Signed-off-by: Anju T <a...@linux.vnet.ibm.com>
Reviewed-by  : Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
---
 arch/powerpc/include/uapi/asm/perf_regs.h | 49 +++
 1 file changed, 49 insertions(+)
 create mode 100644 arch/powerpc/include/uapi/asm/perf_regs.h

diff --git a/arch/powerpc/include/uapi/asm/perf_regs.h 
b/arch/powerpc/include/uapi/asm/perf_regs.h
new file mode 100644
index 000..cfbd068
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/perf_regs.h
@@ -0,0 +1,49 @@
+#ifndef _ASM_POWERPC_PERF_REGS_H
+#define _ASM_POWERPC_PERF_REGS_H
+
+enum perf_event_powerpc_regs {
+   PERF_REG_POWERPC_GPR0,
+   PERF_REG_POWERPC_GPR1,
+   PERF_REG_POWERPC_GPR2,
+   PERF_REG_POWERPC_GPR3,
+   PERF_REG_POWERPC_GPR4,
+   PERF_REG_POWERPC_GPR5,
+   PERF_REG_POWERPC_GPR6,
+   PERF_REG_POWERPC_GPR7,
+   PERF_REG_POWERPC_GPR8,
+   PERF_REG_POWERPC_GPR9,
+   PERF_REG_POWERPC_GPR10,
+   PERF_REG_POWERPC_GPR11,
+   PERF_REG_POWERPC_GPR12,
+   PERF_REG_POWERPC_GPR13,
+   PERF_REG_POWERPC_GPR14,
+   PERF_REG_POWERPC_GPR15,
+   PERF_REG_POWERPC_GPR16,
+   PERF_REG_POWERPC_GPR17,
+   PERF_REG_POWERPC_GPR18,
+   PERF_REG_POWERPC_GPR19,
+   PERF_REG_POWERPC_GPR20,
+   PERF_REG_POWERPC_GPR21,
+   PERF_REG_POWERPC_GPR22,
+   PERF_REG_POWERPC_GPR23,
+   PERF_REG_POWERPC_GPR24,
+   PERF_REG_POWERPC_GPR25,
+   PERF_REG_POWERPC_GPR26,
+   PERF_REG_POWERPC_GPR27,
+   PERF_REG_POWERPC_GPR28,
+   PERF_REG_POWERPC_GPR29,
+   PERF_REG_POWERPC_GPR30,
+   PERF_REG_POWERPC_GPR31,
+   PERF_REG_POWERPC_NIP,
+   PERF_REG_POWERPC_MSR,
+   PERF_REG_POWERPC_ORIG_R3,
+   PERF_REG_POWERPC_CTR,
+   PERF_REG_POWERPC_LNK,
+   PERF_REG_POWERPC_XER,
+   PERF_REG_POWERPC_CCR,
+   PERF_REG_POWERPC_TRAP,
+   PERF_REG_POWERPC_DAR,
+   PERF_REG_POWERPC_DSISR,
+   PERF_REG_POWERPC_MAX,
+};
+#endif /* _ASM_POWERPC_PERF_REGS_H */
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V7 3/4] tools/perf: Map the ID values with register names

2016-01-06 Thread Anju T
Map ID values with corresponding register names. These names are then
displayed when user issues perf record with the -I option
followed by perf report/script with -D option.

To test this patchset,
Eg:

$ perf record -I ls   # record machine state at interrupt
$ perf script -D  # read the perf.data file

Sample output obtained for this patch / output looks like as follows:

178329381464 0x138 [0x180]: PERF_RECORD_SAMPLE(IP, 0x1): 7803/7803: 
0xc000fd9c period: 1 addr: 0
... intr regs: mask 0x3ff ABI 64-bit
 gpr0  0xc01a6420
 gpr1  0xc01e4df039b0
 gpr2  0xc0cdd100
 gpr3  0x1
 gpr4  0xc01e4a96d000
 gpr5  0x29854255ba
 gpr6  0xc00ffa3050b8
 gpr7  0x0
 gpr8  0x0
 gpr9  0x0
 gpr10 0x0
 gpr11 0x0
 gpr12 0x24022822
 gpr13 0xcfe03000
 gpr14 0x0
 gpr15 0xc0d763f8
 gpr16 0x0
 gpr17 0xc01e4ddcf000
 gpr18 0x0
 gpr19 0xc00ffa305000
 gpr20 0xc01e4df038c0
 gpr21 0xc01e40ed7a00
 gpr22 0xc00aa28c
 gpr23 0xc0cdd100
 gpr24 0x0
 gpr25 0xc0cdd100
 gpr26 0xc01e4df038b0
 gpr27 0xfeae
 gpr28 0xc01e4df03880
 gpr29 0xc0dce900
 gpr30 0xc01e4df03890
 gpr31 0xc01e355c7a30
 nip   0xc01a62d8
 msr   0x90009032
 orig_r3 0xc01a6320
 ctr   0xc00a7be0
 lnk   0xc01a6428
 xer   0x0
 ccr   0x24022888
 trap  0xf01
 dar   0xc01e40ed7a00
 dsisr 0x3000c006004
 ... thread: :7803:7803
 .. dso: /root/.debug/.build-id/d0/eb47b06c0d294143af13c50616f638c2d88658
   :7803  7803   178.329381:  1 cycles:  c000fd9c 
.arch_local_irq_restore (/boot/vmlinux)


Signed-off-by: Anju T <a...@linux.vnet.ibm.com>
Reviewed-by  : Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
---
 tools/perf/arch/powerpc/include/perf_regs.h | 65 +
 tools/perf/config/Makefile  |  5 +++
 2 files changed, 70 insertions(+)
 create mode 100644 tools/perf/arch/powerpc/include/perf_regs.h

diff --git a/tools/perf/arch/powerpc/include/perf_regs.h 
b/tools/perf/arch/powerpc/include/perf_regs.h
new file mode 100644
index 000..2114684
--- /dev/null
+++ b/tools/perf/arch/powerpc/include/perf_regs.h
@@ -0,0 +1,65 @@
+#ifndef ARCH_PERF_REGS_H
+#define ARCH_PERF_REGS_H
+
+#include 
+#include 
+#include 
+
+#define PERF_REGS_MASK  ((1ULL << PERF_REG_POWERPC_MAX) - 1)
+#define PERF_REGS_MAX   PERF_REG_POWERPC_MAX
+#define PERF_SAMPLE_REGS_ABI   PERF_SAMPLE_REGS_ABI_64
+
+#define PERF_REG_IP PERF_REG_POWERPC_NIP
+#define PERF_REG_SP PERF_REG_POWERPC_GPR1
+
+static const char *reg_names[] = {
+   [PERF_REG_POWERPC_GPR0] = "gpr0",
+   [PERF_REG_POWERPC_GPR1] = "gpr1",
+   [PERF_REG_POWERPC_GPR2] = "gpr2",
+   [PERF_REG_POWERPC_GPR3] = "gpr3",
+   [PERF_REG_POWERPC_GPR4] = "gpr4",
+   [PERF_REG_POWERPC_GPR5] = "gpr5",
+   [PERF_REG_POWERPC_GPR6] = "gpr6",
+   [PERF_REG_POWERPC_GPR7] = "gpr7",
+   [PERF_REG_POWERPC_GPR8] = "gpr8",
+   [PERF_REG_POWERPC_GPR9] = "gpr9",
+   [PERF_REG_POWERPC_GPR10] = "gpr10",
+   [PERF_REG_POWERPC_GPR11] = "gpr11",
+   [PERF_REG_POWERPC_GPR12] = "gpr12",
+   [PERF_REG_POWERPC_GPR13] = "gpr13",
+   [PERF_REG_POWERPC_GPR14] = "gpr14",
+   [PERF_REG_POWERPC_GPR15] = "gpr15",
+   [PERF_REG_POWERPC_GPR16] = "gpr16",
+   [PERF_REG_POWERPC_GPR17] = "gpr17",
+   [PERF_REG_POWERPC_GPR18] = "gpr18",
+   [PERF_REG_POWERPC_GPR19] = "gpr19",
+   [PERF_REG_POWERPC_GPR20] = "gpr20",
+   [PERF_REG_POWERPC_GPR21] = "gpr21",
+   [PERF_REG_POWERPC_GPR22] = "gpr22",
+   [PERF_REG_POWERPC_GPR23] = "gpr23",
+   [PERF_REG_POWERPC_GPR24] = "gpr24",
+   [PERF_REG_POWERPC_GPR25] = "gpr25",
+   [PERF_REG_POWERPC_GPR26] = "gpr26",
+   [PERF_REG_POWERPC_GPR27] = "gpr27",
+   [PERF_REG_POWERPC_GPR28] = "gpr28",
+   [PERF_REG_POWERPC_GPR29] = "gpr29",
+   [PERF_REG_POWERPC_GPR30] = "gpr30",
+   [PERF_REG_POWERPC_GPR31] = "gpr31",
+   [PERF_REG_POWERPC_NIP] = "nip",
+   [PERF_REG_POWERPC_MSR] = "msr",
+   [PERF_REG_POWERPC_ORIG_R3] = "orig_r3",
+   [PERF_REG_POWERPC_CTR] = "ctr",
+   [PERF_REG_POWERPC_LNK] = "lnk",
+   [PERF_REG_POWERPC_XER] = "xer",
+   [PERF_REG_POWERPC_CCR] = "ccr",
+   [PERF_REG_POWERPC_TRAP] = "trap",
+   [PERF_REG_POWERPC_DAR] = "dar",
+   [PERF_REG_POWERPC_DSISR]

[PATCH V7 0/4] perf/powerpc: Add ability to sample intr machine state in powerpc

2016-01-06 Thread Anju T
This short patch series adds the ability to sample the interrupted
machine state for each hardware sample.

To test this patchset,
Eg:

$ perf record -I?   # list supported registers 

output:

available registers: gpr0 gpr1 gpr2 gpr3 gpr4 gpr5 gpr6 gpr7 gpr8 gpr9 gpr10 
gpr11 gpr12 gpr13 gpr14 gpr15 gpr16 gpr17 gpr18 gpr19 gpr20 gpr21 gpr22 gpr23 
gpr24 gpr25 gpr26 gpr27 gpr28 gpr29 gpr30 gpr31 nip msr orig_r3 ctr link xer 
ccr trap dar dsisr
usage: perf record [] []
or: perf record [] --  []
 -I, --intr-regs[=]
sample selected machine registers on interrupt, use -I ? to list register names


$ perf record -I ls   # record machine state at interrupt
$ perf script -D  # read the perf.data file

Samplfdoutput obtained for this patchset/ output looks like as follows:

178329381464 0x138 [0x180]: PERF_RECORD_SAMPLE(IP, 0x1): 7803/7803: 
0xc000fd9c period: 1 addr: 0
... intr regs: mask 0x3ff ABI 64-bit
 gpr0  0xc01a6420
 gpr1  0xc01e4df039b0
 gpr2  0xc0cdd100
 gpr3  0x1
 gpr4  0xc01e4a96d000
 gpr5  0x29854255ba
 gpr6  0xc00ffa3050b8
 gpr7  0x0
 gpr8  0x0
 gpr9  0x0
 gpr10 0x0
 gpr11 0x0
 gpr12 0x24022822
 gpr13 0xcfe03000
 gpr14 0x0
 gpr15 0xc0d763f8
 gpr16 0x0
 gpr17 0xc01e4ddcf000
 gpr18 0x0
 gpr19 0xc00ffa305000
 gpr20 0xc01e4df038c0
 gpr21 0xc01e40ed7a00
 gpr22 0xc00aa28c
 gpr23 0xc0cdd100
 gpr24 0x0
 gpr25 0xc0cdd100
 gpr26 0xc01e4df038b0
 gpr27 0xfeae
 gpr28 0xc01e4df03880
 gpr29 0xc0dce900
 gpr30 0xc01e4df03890
 gpr31 0xc01e355c7a30
 nip   0xc01a62d8
 msr   0x90009032
 orig_r3 0xc01a6320
 ctr   0xc00a7be0
 lnk   0xc01a6428
 xer   0x0
 ccr   0x24022888
 trap  0xf01
 dar   0xc01e40ed7a00
 dsisr 0x3000c006004
 ... thread: :7803:7803
 .. dso: /root/.debug/.build-id/d0/eb47b06c0d294143af13c50616f638c2d88658
   :7803  7803   178.329381:  1 cycles:  c000fd9c 
.arch_local_irq_restore (/boot/vmlinux)

Changes from V6:

- Corrected the typo in patch  tools/perf: Map the ID values with register 
names.
  ie #define PERF_REG_SP  PERF_REG_POWERPC_R1 should be #define PERF_REG_SP   
PERF_REG_POWERPC_GPR1


Changes from V5:

- Enabled perf_sample_regs_user also in this patch set.Functions added in 
   arch/powerpc/perf/perf_regs.c
- Added Maddy's patch to this patchset for enabling -I? option which will
  list the supported register names.


Changes from V4:

- Removed the softe and MQ from all patches
- Switch case is replaced with an array in the 3rd patch

Changes from V3:

- Addressed the comments by Sukadev regarding the nits in the descriptions.
- Modified the subject of first patch.
- Included the sample output in the 3rd patch also.

Changes from V2:

- tools/perf/config/Makefile is moved to the patch tools/perf.
- The patchset is reordered.
- perf_regs_load() function is used for the dwarf unwind test.Since it is not 
required here,
  it is removed from tools/perf/arch/powerpc/include/perf_regs.h
- PERF_REGS_POWERPC_RESULT is removed.

Changes from V1:

- Solved the name missmatch issue in the from and signed-off field of the patch 
series.
- Added necessary comments in the 3rd patch ie perf/powerpc ,as suggested by 
Maddy.


Anju T (3):
  perf/powerpc: assign an id to each powerpc register
  perf/powerpc: add support for sampling intr machine state
  tools/perf: Map the ID values with register names

Madhavan Srinivasan (1):
  tool/perf: Add sample_reg_mask to include all perf_regs regs

 arch/powerpc/Kconfig|  1 +
 arch/powerpc/include/uapi/asm/perf_regs.h   | 49 +
 arch/powerpc/perf/Makefile  |  1 +
 arch/powerpc/perf/perf_regs.c   | 85 +
 tools/perf/arch/powerpc/include/perf_regs.h | 65 ++
 tools/perf/arch/powerpc/util/Build  |  1 +
 tools/perf/arch/powerpc/util/perf_regs.c| 48 
 tools/perf/config/Makefile  |  5 ++
 8 files changed, 255 insertions(+)
 create mode 100644 arch/powerpc/include/uapi/asm/perf_regs.h
 create mode 100644 arch/powerpc/perf/perf_regs.c
 create mode 100644 tools/perf/arch/powerpc/include/perf_regs.h
 create mode 100644 tools/perf/arch/powerpc/util/perf_regs.c

-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V7 2/4] perf/powerpc: add support for sampling intr machine state

2016-01-06 Thread Anju T
The perf infrastructure uses a bit mask to find out valid
registers to display. Define a register mask for supported
registers defined in asm/perf_regs.h. The bit positions also
correspond to register IDs which is used by perf infrastructure
to fetch the register values. CONFIG_HAVE_PERF_REGS enables
sampling of the interrupted machine state.

Signed-off-by: Anju T <a...@linux.vnet.ibm.com>
Reviewed-by  : Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
---
 arch/powerpc/Kconfig  |  1 +
 arch/powerpc/perf/Makefile|  1 +
 arch/powerpc/perf/perf_regs.c | 85 +++
 3 files changed, 87 insertions(+)
 create mode 100644 arch/powerpc/perf/perf_regs.c

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 9a7057e..c4ce60d 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -119,6 +119,7 @@ config PPC
select GENERIC_ATOMIC64 if PPC32
select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
select HAVE_PERF_EVENTS
+   select HAVE_PERF_REGS
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_HW_BREAKPOINT if PERF_EVENTS && PPC_BOOK3S_64
select ARCH_WANT_IPC_PARSE_VERSION
diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile
index f9c083a..2f2d3d2 100644
--- a/arch/powerpc/perf/Makefile
+++ b/arch/powerpc/perf/Makefile
@@ -8,6 +8,7 @@ obj64-$(CONFIG_PPC_PERF_CTRS)   += power4-pmu.o ppc970-pmu.o 
power5-pmu.o \
   power8-pmu.o
 obj32-$(CONFIG_PPC_PERF_CTRS)  += mpc7450-pmu.o
 
+obj-$(CONFIG_PERF_EVENTS)  += perf_regs.o
 obj-$(CONFIG_FSL_EMB_PERF_EVENT) += core-fsl-emb.o
 obj-$(CONFIG_FSL_EMB_PERF_EVENT_E500) += e500-pmu.o e6500-pmu.o
 
diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c
new file mode 100644
index 000..d32581763
--- /dev/null
+++ b/arch/powerpc/perf/perf_regs.c
@@ -0,0 +1,85 @@
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define PT_REGS_OFFSET(id, r) [id] = offsetof(struct pt_regs, r)
+
+#define REG_RESERVED (~((1ULL << PERF_REG_POWERPC_MAX) - 1))
+
+static unsigned int pt_regs_offset[PERF_REG_POWERPC_MAX] = {
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR0, gpr[0]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR1, gpr[1]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR2, gpr[2]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR3, gpr[3]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR4, gpr[4]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR5, gpr[5]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR6, gpr[6]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR7, gpr[7]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR8, gpr[8]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR9, gpr[9]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR10, gpr[10]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR11, gpr[11]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR12, gpr[12]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR13, gpr[13]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR14, gpr[14]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR15, gpr[15]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR16, gpr[16]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR17, gpr[17]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR18, gpr[18]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR19, gpr[19]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR20, gpr[20]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR21, gpr[21]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR22, gpr[22]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR23, gpr[23]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR24, gpr[24]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR25, gpr[25]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR26, gpr[26]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR27, gpr[27]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR28, gpr[28]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR29, gpr[29]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR30, gpr[30]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR31, gpr[31]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_NIP, nip),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_MSR, msr),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_ORIG_R3, orig_gpr3),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_CTR, ctr),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_LNK, link),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_XER, xer),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_CCR, ccr),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_TRAP, trap),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_DAR, dar),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_DSISR, dsisr),
+};
+
+u64 perf_reg_value(struct pt_regs *regs, int idx)
+{
+   if (WARN_ON_ONCE(idx >= PERF_REG_POWERPC_MAX))
+   return 0;
+
+   return regs_get_register(regs, pt_regs_offset[idx]);
+}
+
+int perf_reg_validate(u64 mask)
+{
+   if (!mask || mask & REG_RESERVED)
+   return -EINVAL;
+   return 0;
+}
+
+u64 perf_reg_abi(struct task_struct *task)
+{
+   return PERF_SAMPLE_REGS_ABI_64;
+}
+
+void perf_get_regs

[PATCH V1 4/4] tool/perf: Add sample_reg_mask to include all perf_regs regs

2016-01-06 Thread Anju T
From: Madhavan Srinivasan 

Add sample_reg_mask array with pt_regs registers.
This is needed for printing supported regs ( -I? option).

Signed-off-by: Madhavan Srinivasan 
---
 tools/perf/arch/powerpc/util/Build   |  1 +
 tools/perf/arch/powerpc/util/perf_regs.c | 48 
 2 files changed, 49 insertions(+)
 create mode 100644 tools/perf/arch/powerpc/util/perf_regs.c

diff --git a/tools/perf/arch/powerpc/util/Build 
b/tools/perf/arch/powerpc/util/Build
index 7b8b0d1..3deb1bc 100644
--- a/tools/perf/arch/powerpc/util/Build
+++ b/tools/perf/arch/powerpc/util/Build
@@ -1,5 +1,6 @@
 libperf-y += header.o
 libperf-y += sym-handling.o
+libperf-y += perf_regs.o
 
 libperf-$(CONFIG_DWARF) += dwarf-regs.o
 libperf-$(CONFIG_DWARF) += skip-callchain-idx.o
diff --git a/tools/perf/arch/powerpc/util/perf_regs.c 
b/tools/perf/arch/powerpc/util/perf_regs.c
new file mode 100644
index 000..0b0ec65
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/perf_regs.c
@@ -0,0 +1,48 @@
+#include "../../perf.h"
+#include "../../util/perf_regs.h"
+
+const struct sample_reg sample_reg_masks[] = {
+   SMPL_REG(gpr0, PERF_REG_POWERPC_GPR0),
+   SMPL_REG(gpr1, PERF_REG_POWERPC_GPR1),
+   SMPL_REG(gpr2, PERF_REG_POWERPC_GPR2),
+   SMPL_REG(gpr3, PERF_REG_POWERPC_GPR3),
+   SMPL_REG(gpr4, PERF_REG_POWERPC_GPR4),
+   SMPL_REG(gpr5, PERF_REG_POWERPC_GPR5),
+   SMPL_REG(gpr6, PERF_REG_POWERPC_GPR6),
+   SMPL_REG(gpr7, PERF_REG_POWERPC_GPR7),
+   SMPL_REG(gpr8, PERF_REG_POWERPC_GPR8),
+   SMPL_REG(gpr9, PERF_REG_POWERPC_GPR9),
+   SMPL_REG(gpr10, PERF_REG_POWERPC_GPR10),
+   SMPL_REG(gpr11, PERF_REG_POWERPC_GPR11),
+   SMPL_REG(gpr12, PERF_REG_POWERPC_GPR12),
+   SMPL_REG(gpr13, PERF_REG_POWERPC_GPR13),
+   SMPL_REG(gpr14, PERF_REG_POWERPC_GPR14),
+   SMPL_REG(gpr15, PERF_REG_POWERPC_GPR15),
+   SMPL_REG(gpr16, PERF_REG_POWERPC_GPR16),
+   SMPL_REG(gpr17, PERF_REG_POWERPC_GPR17),
+   SMPL_REG(gpr18, PERF_REG_POWERPC_GPR18),
+   SMPL_REG(gpr19, PERF_REG_POWERPC_GPR19),
+   SMPL_REG(gpr20, PERF_REG_POWERPC_GPR20),
+   SMPL_REG(gpr21, PERF_REG_POWERPC_GPR21),
+   SMPL_REG(gpr22, PERF_REG_POWERPC_GPR22),
+   SMPL_REG(gpr23, PERF_REG_POWERPC_GPR23),
+   SMPL_REG(gpr24, PERF_REG_POWERPC_GPR24),
+   SMPL_REG(gpr25, PERF_REG_POWERPC_GPR25),
+   SMPL_REG(gpr26, PERF_REG_POWERPC_GPR26),
+   SMPL_REG(gpr27, PERF_REG_POWERPC_GPR27),
+   SMPL_REG(gpr28, PERF_REG_POWERPC_GPR28),
+   SMPL_REG(gpr29, PERF_REG_POWERPC_GPR29),
+   SMPL_REG(gpr30, PERF_REG_POWERPC_GPR30),
+   SMPL_REG(gpr31, PERF_REG_POWERPC_GPR31),
+   SMPL_REG(nip, PERF_REG_POWERPC_NIP),
+   SMPL_REG(msr, PERF_REG_POWERPC_MSR),
+   SMPL_REG(orig_r3, PERF_REG_POWERPC_ORIG_R3),
+   SMPL_REG(ctr, PERF_REG_POWERPC_CTR),
+   SMPL_REG(link, PERF_REG_POWERPC_LNK),
+   SMPL_REG(xer, PERF_REG_POWERPC_XER),
+   SMPL_REG(ccr, PERF_REG_POWERPC_CCR),
+   SMPL_REG(trap, PERF_REG_POWERPC_TRAP),
+   SMPL_REG(dar, PERF_REG_POWERPC_DAR),
+   SMPL_REG(dsisr, PERF_REG_POWERPC_DSISR),
+   SMPL_REG_END
+};
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V1 4/4] tool/perf: Add sample_reg_mask to include all perf_regs regs

2015-12-13 Thread Anju T
From: Madhavan Srinivasan 

Add sample_reg_mask array with pt_regs registers.
This is needed for printing supported regs ( -I? option).

Signed-off-by: Madhavan Srinivasan 
---
 tools/perf/arch/powerpc/util/Build   |  1 +
 tools/perf/arch/powerpc/util/perf_regs.c | 48 
 2 files changed, 49 insertions(+)
 create mode 100644 tools/perf/arch/powerpc/util/perf_regs.c

diff --git a/tools/perf/arch/powerpc/util/Build 
b/tools/perf/arch/powerpc/util/Build
index 7b8b0d1..3deb1bc 100644
--- a/tools/perf/arch/powerpc/util/Build
+++ b/tools/perf/arch/powerpc/util/Build
@@ -1,5 +1,6 @@
 libperf-y += header.o
 libperf-y += sym-handling.o
+libperf-y += perf_regs.o
 
 libperf-$(CONFIG_DWARF) += dwarf-regs.o
 libperf-$(CONFIG_DWARF) += skip-callchain-idx.o
diff --git a/tools/perf/arch/powerpc/util/perf_regs.c 
b/tools/perf/arch/powerpc/util/perf_regs.c
new file mode 100644
index 000..0b0ec65
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/perf_regs.c
@@ -0,0 +1,48 @@
+#include "../../perf.h"
+#include "../../util/perf_regs.h"
+
+const struct sample_reg sample_reg_masks[] = {
+   SMPL_REG(gpr0, PERF_REG_POWERPC_GPR0),
+   SMPL_REG(gpr1, PERF_REG_POWERPC_GPR1),
+   SMPL_REG(gpr2, PERF_REG_POWERPC_GPR2),
+   SMPL_REG(gpr3, PERF_REG_POWERPC_GPR3),
+   SMPL_REG(gpr4, PERF_REG_POWERPC_GPR4),
+   SMPL_REG(gpr5, PERF_REG_POWERPC_GPR5),
+   SMPL_REG(gpr6, PERF_REG_POWERPC_GPR6),
+   SMPL_REG(gpr7, PERF_REG_POWERPC_GPR7),
+   SMPL_REG(gpr8, PERF_REG_POWERPC_GPR8),
+   SMPL_REG(gpr9, PERF_REG_POWERPC_GPR9),
+   SMPL_REG(gpr10, PERF_REG_POWERPC_GPR10),
+   SMPL_REG(gpr11, PERF_REG_POWERPC_GPR11),
+   SMPL_REG(gpr12, PERF_REG_POWERPC_GPR12),
+   SMPL_REG(gpr13, PERF_REG_POWERPC_GPR13),
+   SMPL_REG(gpr14, PERF_REG_POWERPC_GPR14),
+   SMPL_REG(gpr15, PERF_REG_POWERPC_GPR15),
+   SMPL_REG(gpr16, PERF_REG_POWERPC_GPR16),
+   SMPL_REG(gpr17, PERF_REG_POWERPC_GPR17),
+   SMPL_REG(gpr18, PERF_REG_POWERPC_GPR18),
+   SMPL_REG(gpr19, PERF_REG_POWERPC_GPR19),
+   SMPL_REG(gpr20, PERF_REG_POWERPC_GPR20),
+   SMPL_REG(gpr21, PERF_REG_POWERPC_GPR21),
+   SMPL_REG(gpr22, PERF_REG_POWERPC_GPR22),
+   SMPL_REG(gpr23, PERF_REG_POWERPC_GPR23),
+   SMPL_REG(gpr24, PERF_REG_POWERPC_GPR24),
+   SMPL_REG(gpr25, PERF_REG_POWERPC_GPR25),
+   SMPL_REG(gpr26, PERF_REG_POWERPC_GPR26),
+   SMPL_REG(gpr27, PERF_REG_POWERPC_GPR27),
+   SMPL_REG(gpr28, PERF_REG_POWERPC_GPR28),
+   SMPL_REG(gpr29, PERF_REG_POWERPC_GPR29),
+   SMPL_REG(gpr30, PERF_REG_POWERPC_GPR30),
+   SMPL_REG(gpr31, PERF_REG_POWERPC_GPR31),
+   SMPL_REG(nip, PERF_REG_POWERPC_NIP),
+   SMPL_REG(msr, PERF_REG_POWERPC_MSR),
+   SMPL_REG(orig_r3, PERF_REG_POWERPC_ORIG_R3),
+   SMPL_REG(ctr, PERF_REG_POWERPC_CTR),
+   SMPL_REG(link, PERF_REG_POWERPC_LNK),
+   SMPL_REG(xer, PERF_REG_POWERPC_XER),
+   SMPL_REG(ccr, PERF_REG_POWERPC_CCR),
+   SMPL_REG(trap, PERF_REG_POWERPC_TRAP),
+   SMPL_REG(dar, PERF_REG_POWERPC_DAR),
+   SMPL_REG(dsisr, PERF_REG_POWERPC_DSISR),
+   SMPL_REG_END
+};
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V6 2/4] perf/powerpc: add support for sampling intr machine state

2015-12-13 Thread Anju T
The perf infrastructure uses a bit mask to find out valid
registers to display. Define a register mask for supported
registers defined in asm/perf_regs.h. The bit positions also
correspond to register IDs which is used by perf infrastructure
to fetch the register values. CONFIG_HAVE_PERF_REGS enables
sampling of the interrupted machine state.

Signed-off-by: Anju T <a...@linux.vnet.ibm.com>
Reviewed-by  : Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
---
 arch/powerpc/Kconfig  |  1 +
 arch/powerpc/perf/Makefile|  1 +
 arch/powerpc/perf/perf_regs.c | 85 +++
 3 files changed, 87 insertions(+)
 create mode 100644 arch/powerpc/perf/perf_regs.c

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 9a7057e..c4ce60d 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -119,6 +119,7 @@ config PPC
select GENERIC_ATOMIC64 if PPC32
select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
select HAVE_PERF_EVENTS
+   select HAVE_PERF_REGS
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_HW_BREAKPOINT if PERF_EVENTS && PPC_BOOK3S_64
select ARCH_WANT_IPC_PARSE_VERSION
diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile
index f9c083a..2f2d3d2 100644
--- a/arch/powerpc/perf/Makefile
+++ b/arch/powerpc/perf/Makefile
@@ -8,6 +8,7 @@ obj64-$(CONFIG_PPC_PERF_CTRS)   += power4-pmu.o ppc970-pmu.o 
power5-pmu.o \
   power8-pmu.o
 obj32-$(CONFIG_PPC_PERF_CTRS)  += mpc7450-pmu.o
 
+obj-$(CONFIG_PERF_EVENTS)  += perf_regs.o
 obj-$(CONFIG_FSL_EMB_PERF_EVENT) += core-fsl-emb.o
 obj-$(CONFIG_FSL_EMB_PERF_EVENT_E500) += e500-pmu.o e6500-pmu.o
 
diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c
new file mode 100644
index 000..d32581763
--- /dev/null
+++ b/arch/powerpc/perf/perf_regs.c
@@ -0,0 +1,85 @@
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define PT_REGS_OFFSET(id, r) [id] = offsetof(struct pt_regs, r)
+
+#define REG_RESERVED (~((1ULL << PERF_REG_POWERPC_MAX) - 1))
+
+static unsigned int pt_regs_offset[PERF_REG_POWERPC_MAX] = {
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR0, gpr[0]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR1, gpr[1]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR2, gpr[2]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR3, gpr[3]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR4, gpr[4]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR5, gpr[5]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR6, gpr[6]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR7, gpr[7]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR8, gpr[8]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR9, gpr[9]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR10, gpr[10]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR11, gpr[11]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR12, gpr[12]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR13, gpr[13]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR14, gpr[14]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR15, gpr[15]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR16, gpr[16]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR17, gpr[17]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR18, gpr[18]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR19, gpr[19]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR20, gpr[20]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR21, gpr[21]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR22, gpr[22]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR23, gpr[23]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR24, gpr[24]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR25, gpr[25]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR26, gpr[26]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR27, gpr[27]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR28, gpr[28]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR29, gpr[29]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR30, gpr[30]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR31, gpr[31]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_NIP, nip),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_MSR, msr),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_ORIG_R3, orig_gpr3),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_CTR, ctr),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_LNK, link),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_XER, xer),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_CCR, ccr),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_TRAP, trap),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_DAR, dar),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_DSISR, dsisr),
+};
+
+u64 perf_reg_value(struct pt_regs *regs, int idx)
+{
+   if (WARN_ON_ONCE(idx >= PERF_REG_POWERPC_MAX))
+   return 0;
+
+   return regs_get_register(regs, pt_regs_offset[idx]);
+}
+
+int perf_reg_validate(u64 mask)
+{
+   if (!mask || mask & REG_RESERVED)
+   return -EINVAL;
+   return 0;
+}
+
+u64 perf_reg_abi(struct task_struct *task)
+{
+   return PERF_SAMPLE_REGS_ABI_64;
+}
+
+void perf_get_regs

[PATCH V6 0/4] perf/powerpc: Add ability to sample intr machine state in powerpc

2015-12-13 Thread Anju T
This short patch series adds the ability to sample the interrupted
machine state for each hardware sample.

To test this patchset,
Eg:

$ perf record -I?   # list supported registers 

output:

available registers: gpr0 gpr1 gpr2 gpr3 gpr4 gpr5 gpr6 gpr7 gpr8 gpr9 gpr10 
gpr11 gpr12 gpr13 gpr14 gpr15 gpr16 gpr17 gpr18 gpr19 gpr20 gpr21 gpr22 gpr23 
gpr24 gpr25 gpr26 gpr27 gpr28 gpr29 gpr30 gpr31 nip msr orig_r3 ctr link xer 
ccr trap dar dsisr
usage: perf record [] []
or: perf record [] --  []
 -I, --intr-regs[=]
sample selected machine registers on interrupt, use -I ? to list register names


$ perf record -I ls   # record machine state at interrupt
$ perf script -D  # read the perf.data file

Samplfdoutput obtained for this patchset/ output looks like as follows:

178329381464 0x138 [0x180]: PERF_RECORD_SAMPLE(IP, 0x1): 7803/7803: 
0xc000fd9c period: 1 addr: 0
... intr regs: mask 0x3ff ABI 64-bit
 gpr0  0xc01a6420
 gpr1  0xc01e4df039b0
 gpr2  0xc0cdd100
 gpr3  0x1
 gpr4  0xc01e4a96d000
 gpr5  0x29854255ba
 gpr6  0xc00ffa3050b8
 gpr7  0x0
 gpr8  0x0
 gpr9  0x0
 gpr10 0x0
 gpr11 0x0
 gpr12 0x24022822
 gpr13 0xcfe03000
 gpr14 0x0
 gpr15 0xc0d763f8
 gpr16 0x0
 gpr17 0xc01e4ddcf000
 gpr18 0x0
 gpr19 0xc00ffa305000
 gpr20 0xc01e4df038c0
 gpr21 0xc01e40ed7a00
 gpr22 0xc00aa28c
 gpr23 0xc0cdd100
 gpr24 0x0
 gpr25 0xc0cdd100
 gpr26 0xc01e4df038b0
 gpr27 0xfeae
 gpr28 0xc01e4df03880
 gpr29 0xc0dce900
 gpr30 0xc01e4df03890
 gpr31 0xc01e355c7a30
 nip   0xc01a62d8
 msr   0x90009032
 orig_r3 0xc01a6320
 ctr   0xc00a7be0
 lnk   0xc01a6428
 xer   0x0
 ccr   0x24022888
 trap  0xf01
 dar   0xc01e40ed7a00
 dsisr 0x3000c006004
 ... thread: :7803:7803
 .. dso: /root/.debug/.build-id/d0/eb47b06c0d294143af13c50616f638c2d88658
   :7803  7803   178.329381:  1 cycles:  c000fd9c 
.arch_local_irq_restore (/boot/vmlinux)


Changes from V5:

- Enabled perf_sample_regs_user also in this patch set.Functions added in 
   arch/powerpc/perf/perf_regs.c
- Added Maddy's patch to this patchset for enabling -I? option which will
  list the supported register names.


Changes from V4:

- Removed the softe and MQ from all patches
- Switch case is replaced with an array in the 3rd patch

Changes from V3:

- Addressed the comments by Sukadev regarding the nits in the descriptions.
- Modified the subject of first patch.
- Included the sample output in the 3rd patch also.

Changes from V2:

- tools/perf/config/Makefile is moved to the patch tools/perf.
- The patchset is reordered.
- perf_regs_load() function is used for the dwarf unwind test.Since it is not 
required here,
  it is removed from tools/perf/arch/powerpc/include/perf_regs.h
- PERF_REGS_POWERPC_RESULT is removed.

Changes from V1:

- Solved the name missmatch issue in the from and signed-off field of the patch 
series.
- Added necessary comments in the 3rd patch ie perf/powerpc ,as suggested by 
Maddy.


Anju T (3):
  perf/powerpc: assign an id to each powerpc register
  perf/powerpc: add support for sampling intr machine state
  tools/perf: Map the ID values with register names

Madhavan Srinivasan (1):
  tool/perf: Add sample_reg_mask to include all perf_regs regs

 arch/powerpc/Kconfig|  1 +
 arch/powerpc/include/uapi/asm/perf_regs.h   | 49 +
 arch/powerpc/perf/Makefile  |  1 +
 arch/powerpc/perf/perf_regs.c   | 85 +
 tools/perf/arch/powerpc/include/perf_regs.h | 65 ++
 tools/perf/arch/powerpc/util/Build  |  1 +
 tools/perf/arch/powerpc/util/perf_regs.c| 48 
 tools/perf/config/Makefile  |  5 ++
 8 files changed, 255 insertions(+)
 create mode 100644 arch/powerpc/include/uapi/asm/perf_regs.h
 create mode 100644 arch/powerpc/perf/perf_regs.c
 create mode 100644 tools/perf/arch/powerpc/include/perf_regs.h
 create mode 100644 tools/perf/arch/powerpc/util/perf_regs.c

-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V6 1/4] perf/powerpc: assign an id to each powerpc register

2015-12-13 Thread Anju T
The enum definition assigns an 'id' to each register in "struct pt_regs"
of arch/powerpc. The order of these values in the enum definition are
based on the corresponding macros in arch/powerpc/include/uapi/asm/ptrace.h.

Signed-off-by: Anju T <a...@linux.vnet.ibm.com>
Reviewed-by  : Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
---
 arch/powerpc/include/uapi/asm/perf_regs.h | 49 +++
 1 file changed, 49 insertions(+)
 create mode 100644 arch/powerpc/include/uapi/asm/perf_regs.h

diff --git a/arch/powerpc/include/uapi/asm/perf_regs.h 
b/arch/powerpc/include/uapi/asm/perf_regs.h
new file mode 100644
index 000..cfbd068
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/perf_regs.h
@@ -0,0 +1,49 @@
+#ifndef _ASM_POWERPC_PERF_REGS_H
+#define _ASM_POWERPC_PERF_REGS_H
+
+enum perf_event_powerpc_regs {
+   PERF_REG_POWERPC_GPR0,
+   PERF_REG_POWERPC_GPR1,
+   PERF_REG_POWERPC_GPR2,
+   PERF_REG_POWERPC_GPR3,
+   PERF_REG_POWERPC_GPR4,
+   PERF_REG_POWERPC_GPR5,
+   PERF_REG_POWERPC_GPR6,
+   PERF_REG_POWERPC_GPR7,
+   PERF_REG_POWERPC_GPR8,
+   PERF_REG_POWERPC_GPR9,
+   PERF_REG_POWERPC_GPR10,
+   PERF_REG_POWERPC_GPR11,
+   PERF_REG_POWERPC_GPR12,
+   PERF_REG_POWERPC_GPR13,
+   PERF_REG_POWERPC_GPR14,
+   PERF_REG_POWERPC_GPR15,
+   PERF_REG_POWERPC_GPR16,
+   PERF_REG_POWERPC_GPR17,
+   PERF_REG_POWERPC_GPR18,
+   PERF_REG_POWERPC_GPR19,
+   PERF_REG_POWERPC_GPR20,
+   PERF_REG_POWERPC_GPR21,
+   PERF_REG_POWERPC_GPR22,
+   PERF_REG_POWERPC_GPR23,
+   PERF_REG_POWERPC_GPR24,
+   PERF_REG_POWERPC_GPR25,
+   PERF_REG_POWERPC_GPR26,
+   PERF_REG_POWERPC_GPR27,
+   PERF_REG_POWERPC_GPR28,
+   PERF_REG_POWERPC_GPR29,
+   PERF_REG_POWERPC_GPR30,
+   PERF_REG_POWERPC_GPR31,
+   PERF_REG_POWERPC_NIP,
+   PERF_REG_POWERPC_MSR,
+   PERF_REG_POWERPC_ORIG_R3,
+   PERF_REG_POWERPC_CTR,
+   PERF_REG_POWERPC_LNK,
+   PERF_REG_POWERPC_XER,
+   PERF_REG_POWERPC_CCR,
+   PERF_REG_POWERPC_TRAP,
+   PERF_REG_POWERPC_DAR,
+   PERF_REG_POWERPC_DSISR,
+   PERF_REG_POWERPC_MAX,
+};
+#endif /* _ASM_POWERPC_PERF_REGS_H */
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V6 3/4] tools/perf: Map the ID values with register names

2015-12-13 Thread Anju T
Map ID values with corresponding register names. These names are then
displayed when user issues perf record with the -I option
followed by perf report/script with -D option.

To test this patchset,
Eg:

$ perf record -I ls   # record machine state at interrupt
$ perf script -D  # read the perf.data file

Sample output obtained for this patch / output looks like as follows:

178329381464 0x138 [0x180]: PERF_RECORD_SAMPLE(IP, 0x1): 7803/7803: 
0xc000fd9c period: 1 addr: 0
... intr regs: mask 0x3ff ABI 64-bit
 gpr0  0xc01a6420
 gpr1  0xc01e4df039b0
 gpr2  0xc0cdd100
 gpr3  0x1
 gpr4  0xc01e4a96d000
 gpr5  0x29854255ba
 gpr6  0xc00ffa3050b8
 gpr7  0x0
 gpr8  0x0
 gpr9  0x0
 gpr10 0x0
 gpr11 0x0
 gpr12 0x24022822
 gpr13 0xcfe03000
 gpr14 0x0
 gpr15 0xc0d763f8
 gpr16 0x0
 gpr17 0xc01e4ddcf000
 gpr18 0x0
 gpr19 0xc00ffa305000
 gpr20 0xc01e4df038c0
 gpr21 0xc01e40ed7a00
 gpr22 0xc00aa28c
 gpr23 0xc0cdd100
 gpr24 0x0
 gpr25 0xc0cdd100
 gpr26 0xc01e4df038b0
 gpr27 0xfeae
 gpr28 0xc01e4df03880
 gpr29 0xc0dce900
 gpr30 0xc01e4df03890
 gpr31 0xc01e355c7a30
 nip   0xc01a62d8
 msr   0x90009032
 orig_r3 0xc01a6320
 ctr   0xc00a7be0
 lnk   0xc01a6428
 xer   0x0
 ccr   0x24022888
 trap  0xf01
 dar   0xc01e40ed7a00
 dsisr 0x3000c006004
 ... thread: :7803:7803
 .. dso: /root/.debug/.build-id/d0/eb47b06c0d294143af13c50616f638c2d88658
   :7803  7803   178.329381:  1 cycles:  c000fd9c 
.arch_local_irq_restore (/boot/vmlinux)


Signed-off-by: Anju T <a...@linux.vnet.ibm.com>
Reviewed-by  : Madhavan Srinivasan <ma...@linux.vnet.ibm.com>
---
 tools/perf/arch/powerpc/include/perf_regs.h | 65 +
 tools/perf/config/Makefile  |  5 +++
 2 files changed, 70 insertions(+)
 create mode 100644 tools/perf/arch/powerpc/include/perf_regs.h

diff --git a/tools/perf/arch/powerpc/include/perf_regs.h 
b/tools/perf/arch/powerpc/include/perf_regs.h
new file mode 100644
index 000..b4f62a3
--- /dev/null
+++ b/tools/perf/arch/powerpc/include/perf_regs.h
@@ -0,0 +1,65 @@
+#ifndef ARCH_PERF_REGS_H
+#define ARCH_PERF_REGS_H
+
+#include 
+#include 
+#include 
+
+#define PERF_REGS_MASK  ((1ULL << PERF_REG_POWERPC_MAX) - 1)
+#define PERF_REGS_MAX   PERF_REG_POWERPC_MAX
+#define PERF_SAMPLE_REGS_ABI   PERF_SAMPLE_REGS_ABI_64
+
+#define PERF_REG_IP PERF_REG_POWERPC_NIP
+#define PERF_REG_SP PERF_REG_POWERPC_R1
+
+static const char *perf_reg_names[] = {
+   [PERF_REG_POWERPC_GPR0] = "gpr0",
+   [PERF_REG_POWERPC_GPR1] = "gpr1",
+   [PERF_REG_POWERPC_GPR2] = "gpr2",
+   [PERF_REG_POWERPC_GPR3] = "gpr3",
+   [PERF_REG_POWERPC_GPR4] = "gpr4",
+   [PERF_REG_POWERPC_GPR5] = "gpr5",
+   [PERF_REG_POWERPC_GPR6] = "gpr6",
+   [PERF_REG_POWERPC_GPR7] = "gpr7",
+   [PERF_REG_POWERPC_GPR8] = "gpr8",
+   [PERF_REG_POWERPC_GPR9] = "gpr9",
+   [PERF_REG_POWERPC_GPR10] = "gpr10",
+   [PERF_REG_POWERPC_GPR11] = "gpr11",
+   [PERF_REG_POWERPC_GPR12] = "gpr12",
+   [PERF_REG_POWERPC_GPR13] = "gpr13",
+   [PERF_REG_POWERPC_GPR14] = "gpr14",
+   [PERF_REG_POWERPC_GPR15] = "gpr15",
+   [PERF_REG_POWERPC_GPR16] = "gpr16",
+   [PERF_REG_POWERPC_GPR17] = "gpr17",
+   [PERF_REG_POWERPC_GPR18] = "gpr18",
+   [PERF_REG_POWERPC_GPR19] = "gpr19",
+   [PERF_REG_POWERPC_GPR20] = "gpr20",
+   [PERF_REG_POWERPC_GPR21] = "gpr21",
+   [PERF_REG_POWERPC_GPR22] = "gpr22",
+   [PERF_REG_POWERPC_GPR23] = "gpr23",
+   [PERF_REG_POWERPC_GPR24] = "gpr24",
+   [PERF_REG_POWERPC_GPR25] = "gpr25",
+   [PERF_REG_POWERPC_GPR26] = "gpr26",
+   [PERF_REG_POWERPC_GPR27] = "gpr27",
+   [PERF_REG_POWERPC_GPR28] = "gpr28",
+   [PERF_REG_POWERPC_GPR29] = "gpr29",
+   [PERF_REG_POWERPC_GPR30] = "gpr30",
+   [PERF_REG_POWERPC_GPR31] = "gpr31",
+   [PERF_REG_POWERPC_NIP] = "nip",
+   [PERF_REG_POWERPC_MSR] = "msr",
+   [PERF_REG_POWERPC_ORIG_R3] = "orig_r3",
+   [PERF_REG_POWERPC_CTR] = "ctr",
+   [PERF_REG_POWERPC_LNK] = "lnk",
+   [PERF_REG_POWERPC_XER] = "xer",
+   [PERF_REG_POWERPC_CCR] = "ccr",
+   [PERF_REG_POWERPC_TRAP] = "trap",
+   [PERF_REG_POWERPC_DAR] = "dar",
+   [PERF_REG_POWERPC_DS

Re: [PATCH V3 0/3] perf/powerpc:Add ability to sample intr machine state in powerpc

2015-11-05 Thread Anju T

Hi Denis,

On Wednesday 04 November 2015 02:26 PM, Denis Kirjanov wrote:

On 11/3/15, Anju T <a...@linux.vnet.ibm.com> wrote:

This short patch series adds the ability to sample the interrupted
machine state for each hardware sample.

To test this patchset,
Eg:

$perf record -I ls   // record machine state at interrupt
$perf script -D  //read the perf.data file

Uncovered the following warning with the series applied. Looks like
that it's not
directly related to your patches but anyway...


May I know the config you used while testing?
Did this warning appeared on boot time?

Thanks

Anju



[  507.655197] DEBUG_LOCKS_WARN_ON(!current->hardirqs_enabled)
[  507.655220] [ cut here ]
[  507.655226] WARNING: at kernel/locking/lockdep.c:3523
[  507.655230] Modules linked in: ipv6 binfmt_misc ehea
[  507.655242] CPU: 12 PID: 3746 Comm: ls Tainted: G S
4.3.0-rc3-00103-g3b0e21e-dirty #11
[  507.655249] task: c005b607b290 ti: c005b62e8000 task.ti:
c005b62e8000
[  507.655255] NIP: c010c944 LR: c010c940 CTR: c0659380
[  507.655261] REGS: c005b62eb5c0 TRAP: 0700   Tainted: G S
(4.3.0-rc3-00103-g3b0e21e-dirty)
[  507.655266] MSR: 80029032 <SF,EE,ME,IR,DR,RI>  CR: 22088422
  XER: 000e
[  507.655284] CFAR: c08977c4 SOFTE: 0
GPR00: c010c940 c005b62eb840 c102e600 002f
GPR04: 0001 c01208d8  0001
GPR08: c0eee600 c005b607b290  3fef
GPR12: 42088428 ce956600 001f 3fffd546c4b0
GPR16: 001f 0013 c0b198a8 
GPR20: c005acbb2f80 c005b5c5dd00 3fffd546c500 0001
GPR24: c024af54  0001 0001
GPR28:   c005acbb2ea0 c1e0cf78
[  507.655376] NIP [c010c944] .check_flags.part.36+0xd4/0x240
[  507.655382] LR [c010c940] .check_flags.part.36+0xd0/0x240
[  507.655387] Call Trace:
[  507.655391] [c005b62eb840] [c010c940]
.check_flags.part.36+0xd0/0x240 (unreliable)
[  507.655400] [c005b62eb8c0] [c01112b8] .lock_acquire+0x208/0x2a0
[  507.655407] [c005b62eb990] [c024af80] .__might_fault+0xb0/0xf0
[  507.655415] [c005b62eba10] [c04d5d38] .strnlen_user+0x1d8/0x200
[  507.655422] [c005b62ebad0] [c032fa0c]
.load_elf_binary+0x103c/0x1650
[  507.655430] [c005b62ebc10] [c02bac54]
.search_binary_handler+0xc4/0x260
[  507.655437] [c005b62ebcb0] [c02bcd54]
.do_execveat_common.isra.22+0x7d4/0xb40
[  507.655444] [c005b62ebda0] [c02bd4a8] .SyS_execve+0x38/0x50
[  507.655451] [c005b62ebe30] [c000916c] system_call+0x38/0xd0
[  507.655456] Instruction dump:
[  507.655461] 419e0034 3d4200e5 392a3280 8129 2f89 40fe0020
3c62ffad 3c82ffad
[  507.655475] 3863c038 38841f88 4878adfd 6000 <0fe0> 3c62ffad
38632010 4878ade9
[  507.655490] ---[ end trace 47284e8c92efaa7e ]---
[  507.655494] possible reason: unannotated irqs-on.
[  507.655498] irq event stamp: 2324
[  507.655501] hardirqs last  enabled at (2323): []
._raw_spin_unlock_irqrestore+0x54/0xd0
[  507.655510] hardirqs last disabled at (2324): []
restore_irq_off+0x24/0x28
[  507.655518] softirqs last  enabled at (2184): []
.__do_softirq+0x500/0x670
[  507.655526] softirqs last disabled at (2169): []
.irq_exit+0xd8/0x120


Sample output obtained for this patchset/ output looks like as follows:

331557004666 0x1988 [0x188]: PERF_RECORD_SAMPLE(IP, 0x1): 4807/4807:
0xc01ddf60 period: 1 addr: 0
... intr regs: mask 0x7ff ABI 64-bit
 gpr0  0xc01e6a74
 gpr1  0xc000ff33b9a0
 gpr2  0xc1523000
 gpr3  0xc00ffa9deb60
 gpr4  0xc000ff971e00
 gpr5  0x4d32564532
 gpr6  0x1e00
 gpr7  0x0
 gpr8  0x0
 gpr9  0x0
 gpr10 0x1
 gpr11 0x0
 gpr12 0x24022822
 gpr13 0xcfeeaf80
 gpr14 0x0
 gpr15 0xc000fbc21000
 gpr16 0x0
 gpr17 0xc00ffa9c5000
 gpr18 0xc000ff33b8a0
 gpr19 0xc1523000
 gpr20 0xc00a097c
 gpr21 0xc00fcac65600
 gpr22 0xc01e55a8
 gpr23 0xc1523000
 gpr24 0xc000ff33b850
 gpr25 0xc00fcac65600
 gpr26 0xc01e4b378210
 gpr27 0xfead
 gpr28 0x1
 gpr29 0xc00fcac65600
 gpr30 0x1
 gpr31 0x0
 nip   0xc01ddf68
 msr   0x90009032
 orig_r3 0xc01e5fcc
 ctr   0xc009e1b0
 link  0xc01e6a74
 xer   0x0
 ccr   0x84022882
 softe 0x0
 trap  0xf01
 dar   0x0
 dsisr 0xf0004006004
  ... thread: :4807:4807
  .. dso:
/root/.debug/.build-id/1c/011201a1082e91b8449e6dd528f224d7a16535
:4807  4807   331.557004:  1 cycles:  c01d

Re: [PATCH V3 2/3] perf/powerpc :add support for sampling intr machine state

2015-11-03 Thread Anju T

Hi Michael,
On Tuesday 03 November 2015 02:46 PM, Michael Ellerman wrote:

On Tue, 2015-11-03 at 11:40 +0530, Anju T wrote:


The perf infrastructure uses a bit mask to find out
valid registers to display. Define a register mask
for supported registers defined in asm/perf_regs.h.
The bit positions also correspond to register IDs
which is used by perf infrastructure to fetch the register
values.CONFIG_HAVE_PERF_REGS enables
sampling of the interrupted machine state.
diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c
new file mode 100644
index 000..0520492
--- /dev/null
+++ b/arch/powerpc/perf/perf_regs.c
@@ -0,0 +1,92 @@
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define PT_REGS_OFFSET(id, r) [id] = offsetof(struct pt_regs, r)
+
+#define REG_RESERVED (~((1ULL << PERF_REG_POWERPC_MAX) - 1))
+
+static unsigned int pt_regs_offset[PERF_REG_POWERPC_MAX] = {
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR0, gpr[0]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR1, gpr[1]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR2, gpr[2]),



I realise you're following the example of other architectures, but we have
almost this exact same structure in ptrace.c, see regoffset_table.

It would be really nice if we could share them between ptrace and perf.

cheers




Thank you for reviewing the patch.

That is a great suggestion.

In ptrace.c the structure doesn't include ORIG_R3. So,in that case what 
should we do?




Thanks and Regards

Anju


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH V3 2/3] perf/powerpc :add support for sampling intr machine state

2015-11-03 Thread Anju T

Hi Michael,
On Tuesday 03 November 2015 02:46 PM, Michael Ellerman wrote:

On Tue, 2015-11-03 at 11:40 +0530, Anju T wrote:


The perf infrastructure uses a bit mask to find out
valid registers to display. Define a register mask
for supported registers defined in asm/perf_regs.h.
The bit positions also correspond to register IDs
which is used by perf infrastructure to fetch the register
values.CONFIG_HAVE_PERF_REGS enables
sampling of the interrupted machine state.
diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c
new file mode 100644
index 000..0520492
--- /dev/null
+++ b/arch/powerpc/perf/perf_regs.c
@@ -0,0 +1,92 @@
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define PT_REGS_OFFSET(id, r) [id] = offsetof(struct pt_regs, r)
+
+#define REG_RESERVED (~((1ULL << PERF_REG_POWERPC_MAX) - 1))
+
+static unsigned int pt_regs_offset[PERF_REG_POWERPC_MAX] = {
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR0, gpr[0]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR1, gpr[1]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR2, gpr[2]),



I realise you're following the example of other architectures, but we have
almost this exact same structure in ptrace.c, see regoffset_table.

It would be really nice if we could share them between ptrace and perf.

cheers



Thank you for reviewing the patch.

That is a great suggestion.

In ptrace.c the structure doesn't include ORIG_R3. So,in that case what 
should we do?





Thanks and Regard
Anju


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V3 0/3] perf/powerpc:Add ability to sample intr machine state in powerpc

2015-11-02 Thread Anju T
This short patch series adds the ability to sample the interrupted
machine state for each hardware sample.

To test this patchset,
Eg:

$perf record -I ls   // record machine state at interrupt
$perf script -D  //read the perf.data file

Sample output obtained for this patchset/ output looks like as follows:

331557004666 0x1988 [0x188]: PERF_RECORD_SAMPLE(IP, 0x1): 4807/4807: 
0xc01ddf60 period: 1 addr: 0
... intr regs: mask 0x7ff ABI 64-bit
 gpr0  0xc01e6a74
 gpr1  0xc000ff33b9a0
 gpr2  0xc1523000
 gpr3  0xc00ffa9deb60
 gpr4  0xc000ff971e00
 gpr5  0x4d32564532
 gpr6  0x1e00
 gpr7  0x0
 gpr8  0x0
 gpr9  0x0
 gpr10 0x1
 gpr11 0x0
 gpr12 0x24022822
 gpr13 0xcfeeaf80
 gpr14 0x0
 gpr15 0xc000fbc21000
 gpr16 0x0
 gpr17 0xc00ffa9c5000
 gpr18 0xc000ff33b8a0
 gpr19 0xc1523000
 gpr20 0xc00a097c
 gpr21 0xc00fcac65600
 gpr22 0xc01e55a8
 gpr23 0xc1523000
 gpr24 0xc000ff33b850
 gpr25 0xc00fcac65600
 gpr26 0xc01e4b378210
 gpr27 0xfead
 gpr28 0x1
 gpr29 0xc00fcac65600
 gpr30 0x1
 gpr31 0x0
 nip   0xc01ddf68
 msr   0x90009032
 orig_r3 0xc01e5fcc
 ctr   0xc009e1b0
 link  0xc01e6a74
 xer   0x0
 ccr   0x84022882
 softe 0x0
 trap  0xf01
 dar   0x0
 dsisr 0xf0004006004
 ... thread: :4807:4807
 .. dso: /root/.debug/.build-id/1c/011201a1082e91b8449e6dd528f224d7a16535
   :4807  4807   331.557004:  1 cycles:  c01ddf60 
.perf_ctx_unlock (/boot/vmlinux)

0x1b10 [0x188]: event: 9


Changes from V2:

- tools/perf/config/Makefile is moved to the patch tools/perf.
- The patchset is reordered.
- perf_regs_load() function is used for the dwarf unwind test.Since it is not 
required here,
  it is removed from tools/perf/arch/powerpc/include/perf_regs.h
- PERF_REGS_POWERPC_RESULT is removed.





Anju T (3):
  perf/powerpc:add ability to sample intr machine state in power
  perf/powerpc :add support for sampling intr machine state
  tools/perf:Map the ID values with register names

 arch/powerpc/Kconfig|   1 +
 arch/powerpc/include/uapi/asm/perf_regs.h   |  54 +
 arch/powerpc/perf/Makefile  |   2 +
 arch/powerpc/perf/perf_regs.c   |  92 ++
 tools/perf/arch/powerpc/include/perf_regs.h | 114 
 tools/perf/config/Makefile  |   5 ++
 6 files changed, 268 insertions(+)
 create mode 100644 arch/powerpc/include/uapi/asm/perf_regs.h
 create mode 100644 arch/powerpc/perf/perf_regs.c
 create mode 100644 tools/perf/arch/powerpc/include/perf_regs.h

-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V3 1/3] perf/powerpc:add ability to sample intr machine state in power

2015-11-02 Thread Anju T
The enum definition assigns an 'id' to each register in "struct pt_regs"
of arch/powerpc.The order of these values in the enum definition are
based on the corresponding macros in
arch/powerpc/include/uapi/asm/ptrace.h .

Signed-off-by: Anju T <a...@linux.vnet.ibm.com>
---
 arch/powerpc/include/uapi/asm/perf_regs.h | 54 +++
 1 file changed, 54 insertions(+)
 create mode 100644 arch/powerpc/include/uapi/asm/perf_regs.h

diff --git a/arch/powerpc/include/uapi/asm/perf_regs.h 
b/arch/powerpc/include/uapi/asm/perf_regs.h
new file mode 100644
index 000..30fb601
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/perf_regs.h
@@ -0,0 +1,54 @@
+#ifndef _ASM_POWERPC_PERF_REGS_H
+#define _ASM_POWERPC_PERF_REGS_H
+
+enum perf_event_powerpc_regs {
+   PERF_REG_POWERPC_GPR0,
+   PERF_REG_POWERPC_GPR1,
+   PERF_REG_POWERPC_GPR2,
+   PERF_REG_POWERPC_GPR3,
+   PERF_REG_POWERPC_GPR4,
+   PERF_REG_POWERPC_GPR5,
+   PERF_REG_POWERPC_GPR6,
+   PERF_REG_POWERPC_GPR7,
+   PERF_REG_POWERPC_GPR8,
+   PERF_REG_POWERPC_GPR9,
+   PERF_REG_POWERPC_GPR10,
+   PERF_REG_POWERPC_GPR11,
+   PERF_REG_POWERPC_GPR12,
+   PERF_REG_POWERPC_GPR13,
+   PERF_REG_POWERPC_GPR14,
+   PERF_REG_POWERPC_GPR15,
+   PERF_REG_POWERPC_GPR16,
+   PERF_REG_POWERPC_GPR17,
+   PERF_REG_POWERPC_GPR18,
+   PERF_REG_POWERPC_GPR19,
+   PERF_REG_POWERPC_GPR20,
+   PERF_REG_POWERPC_GPR21,
+   PERF_REG_POWERPC_GPR22,
+   PERF_REG_POWERPC_GPR23,
+   PERF_REG_POWERPC_GPR24,
+   PERF_REG_POWERPC_GPR25,
+   PERF_REG_POWERPC_GPR26,
+   PERF_REG_POWERPC_GPR27,
+   PERF_REG_POWERPC_GPR28,
+   PERF_REG_POWERPC_GPR29,
+   PERF_REG_POWERPC_GPR30,
+   PERF_REG_POWERPC_GPR31,
+   PERF_REG_POWERPC_NIP,
+   PERF_REG_POWERPC_MSR,
+   PERF_REG_POWERPC_ORIG_R3,
+   PERF_REG_POWERPC_CTR,
+   PERF_REG_POWERPC_LNK,
+   PERF_REG_POWERPC_XER,
+   PERF_REG_POWERPC_CCR,
+#ifdef __powerpc64__
+   PERF_REG_POWERPC_SOFTE,
+#else
+   PERF_REG_POWERPC_MQ,
+#endif
+   PERF_REG_POWERPC_TRAP,
+   PERF_REG_POWERPC_DAR,
+   PERF_REG_POWERPC_DSISR,
+   PERF_REG_POWERPC_MAX,
+};
+#endif /* _ASM_POWERPC_PERF_REGS_H */
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V3 3/3] tools/perf:Map the ID values with register names

2015-11-02 Thread Anju T
Map ID values with corresponding register names.These names are then
displayed when user issues perf record with the -I option
followed by perf report/script with -D option.

Signed-off-by: Anju T <a...@linux.vnet.ibm.com>
---
 tools/perf/arch/powerpc/include/perf_regs.h | 114 
 tools/perf/config/Makefile  |   5 ++
 2 files changed, 119 insertions(+)
 create mode 100644 tools/perf/arch/powerpc/include/perf_regs.h

diff --git a/tools/perf/arch/powerpc/include/perf_regs.h 
b/tools/perf/arch/powerpc/include/perf_regs.h
new file mode 100644
index 000..47307ca
--- /dev/null
+++ b/tools/perf/arch/powerpc/include/perf_regs.h
@@ -0,0 +1,114 @@
+#ifndef ARCH_PERF_REGS_H
+#define ARCH_PERF_REGS_H
+
+#include 
+#include 
+#include 
+
+#define PERF_REGS_MASK  ((1ULL << PERF_REG_POWERPC_MAX) - 1)
+#define PERF_REGS_MAX   PERF_REG_POWERPC_MAX
+#define PERF_SAMPLE_REGS_ABI   PERF_SAMPLE_REGS_ABI_64
+
+#define PERF_REG_IP PERF_REG_POWERPC_NIP
+#define PERF_REG_SP PERF_REG_POWERPC_R1
+
+static inline const char *perf_reg_name(int id)
+{
+   switch (id) {
+   case PERF_REG_POWERPC_GPR0:
+   return "gpr0";
+   case PERF_REG_POWERPC_GPR1:
+   return "gpr1";
+   case PERF_REG_POWERPC_GPR2:
+   return "gpr2";
+   case PERF_REG_POWERPC_GPR3:
+   return "gpr3";
+   case PERF_REG_POWERPC_GPR4:
+   return "gpr4";
+   case PERF_REG_POWERPC_GPR5:
+   return "gpr5";
+   case PERF_REG_POWERPC_GPR6:
+   return "gpr6";
+   case PERF_REG_POWERPC_GPR7:
+   return "gpr7";
+   case PERF_REG_POWERPC_GPR8:
+   return "gpr8";
+   case PERF_REG_POWERPC_GPR9:
+   return "gpr9";
+   case PERF_REG_POWERPC_GPR10:
+   return "gpr10";
+   case PERF_REG_POWERPC_GPR11:
+   return "gpr11";
+   case PERF_REG_POWERPC_GPR12:
+   return "gpr12";
+   case PERF_REG_POWERPC_GPR13:
+   return "gpr13";
+   case PERF_REG_POWERPC_GPR14:
+   return "gpr14";
+   case PERF_REG_POWERPC_GPR15:
+   return "gpr15";
+   case PERF_REG_POWERPC_GPR16:
+   return "gpr16";
+   case PERF_REG_POWERPC_GPR17:
+   return "gpr17";
+   case PERF_REG_POWERPC_GPR18:
+   return "gpr18";
+   case PERF_REG_POWERPC_GPR19:
+   return "gpr19";
+   case PERF_REG_POWERPC_GPR20:
+   return "gpr20";
+   case PERF_REG_POWERPC_GPR21:
+   return "gpr21";
+   case PERF_REG_POWERPC_GPR22:
+   return "gpr22";
+   case PERF_REG_POWERPC_GPR23:
+   return "gpr23";
+   case PERF_REG_POWERPC_GPR24:
+   return "gpr24";
+   case PERF_REG_POWERPC_GPR25:
+   return "gpr25";
+   case PERF_REG_POWERPC_GPR26:
+   return "gpr26";
+   case PERF_REG_POWERPC_GPR27:
+   return "gpr27";
+   case PERF_REG_POWERPC_GPR28:
+   return "gpr28";
+   case PERF_REG_POWERPC_GPR29:
+   return "gpr29";
+   case PERF_REG_POWERPC_GPR30:
+   return "gpr30";
+   case PERF_REG_POWERPC_GPR31:
+   return "gpr31";
+   case PERF_REG_POWERPC_NIP:
+   return "nip";
+   case PERF_REG_POWERPC_MSR:
+   return "msr";
+   case PERF_REG_POWERPC_ORIG_R3:
+   return "orig_r3";
+   case PERF_REG_POWERPC_CTR:
+   return "ctr";
+   case PERF_REG_POWERPC_LNK:
+   return "link";
+   case PERF_REG_POWERPC_XER:
+   return "xer";
+   case PERF_REG_POWERPC_CCR:
+   return "ccr";
+#ifdef __powerpc64__
+   case PERF_REG_POWERPC_SOFTE:
+   return "softe";
+#else
+   case PERF_REG_POWERPC_MQ:
+   return "mq";
+#endif
+   case PERF_REG_POWERPC_TRAP:
+   return "trap";
+   case PERF_REG_POWERPC_DAR:
+   return "dar";
+   case PERF_REG_POWERPC_DSISR:
+   return "dsisr";
+   default:
+   return NULL;
+   }
+   return NULL;
+}
+#endif /*ARCH_PERF_REGS_H */
diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile
index 38a0853..3db9b5d 100644
--- a/tools/perf/config/Makefile
+++ b/tools/perf/config/Makefile
@@ -23,6 +23,11 @@ $(call detected_var,ARCH)
 
 NO_PERF_REGS := 1
 
+#Additional ARCH settings for ppc64
+ifeq ($(ARCH),powerpc)
+   NO_PERF_REGS := 0
+endif
+
 # Additional ARCH settings for x86
 ifeq ($(ARCH),x86)
   $(call detected,CONFIG_X86)
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V3 2/3] perf/powerpc :add support for sampling intr machine state

2015-11-02 Thread Anju T
The perf infrastructure uses a bit mask to find out
valid registers to display. Define a register mask
for supported registers defined in asm/perf_regs.h.
The bit positions also correspond to register IDs
which is used by perf infrastructure to fetch the register
values.CONFIG_HAVE_PERF_REGS enables
sampling of the interrupted machine state.

Signed-off-by: Anju T <a...@linux.vnet.ibm.com>
---
 arch/powerpc/Kconfig  |  1 +
 arch/powerpc/perf/Makefile|  2 +
 arch/powerpc/perf/perf_regs.c | 92 +++
 3 files changed, 95 insertions(+)
 create mode 100644 arch/powerpc/perf/perf_regs.c

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 9a7057e..c4ce60d 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -119,6 +119,7 @@ config PPC
select GENERIC_ATOMIC64 if PPC32
select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
select HAVE_PERF_EVENTS
+   select HAVE_PERF_REGS
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_HW_BREAKPOINT if PERF_EVENTS && PPC_BOOK3S_64
select ARCH_WANT_IPC_PARSE_VERSION
diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile
index f9c083a..cbae78a 100644
--- a/arch/powerpc/perf/Makefile
+++ b/arch/powerpc/perf/Makefile
@@ -7,6 +7,8 @@ obj64-$(CONFIG_PPC_PERF_CTRS)   += power4-pmu.o ppc970-pmu.o 
power5-pmu.o \
   power5+-pmu.o power6-pmu.o power7-pmu.o \
   power8-pmu.o
 obj32-$(CONFIG_PPC_PERF_CTRS)  += mpc7450-pmu.o
+obj-$(CONFIG_PERF_EVENTS)  += perf_regs.o
+
 
 obj-$(CONFIG_FSL_EMB_PERF_EVENT) += core-fsl-emb.o
 obj-$(CONFIG_FSL_EMB_PERF_EVENT_E500) += e500-pmu.o e6500-pmu.o
diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c
new file mode 100644
index 000..0520492
--- /dev/null
+++ b/arch/powerpc/perf/perf_regs.c
@@ -0,0 +1,92 @@
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define PT_REGS_OFFSET(id, r) [id] = offsetof(struct pt_regs, r)
+
+#define REG_RESERVED (~((1ULL << PERF_REG_POWERPC_MAX) - 1))
+
+static unsigned int pt_regs_offset[PERF_REG_POWERPC_MAX] = {
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR0, gpr[0]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR1, gpr[1]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR2, gpr[2]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR3, gpr[3]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR4, gpr[4]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR5, gpr[5]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR6, gpr[6]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR7, gpr[7]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR8, gpr[8]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR9, gpr[9]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR10, gpr[10]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR11, gpr[11]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR12, gpr[12]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR13, gpr[13]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR14, gpr[14]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR15, gpr[15]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR16, gpr[16]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR17, gpr[17]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR18, gpr[18]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR19, gpr[19]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR20, gpr[20]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR21, gpr[21]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR22, gpr[22]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR23, gpr[23]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR24, gpr[24]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR25, gpr[25]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR26, gpr[26]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR27, gpr[27]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR28, gpr[28]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR29, gpr[29]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR30, gpr[30]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR31, gpr[31]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_NIP, nip),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_MSR, msr),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_ORIG_R3, orig_gpr3),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_CTR, ctr),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_LNK, link),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_XER, xer),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_CCR, ccr),
+#ifdef __powerpc64__
+   PT_REGS_OFFSET(PERF_REG_POWERPC_SOFTE, softe),
+#else
+   PT_REGS_OFFSET(PERF_REG_POWERPC_MQ, mq),
+#endif
+   PT_REGS_OFFSET(PERF_REG_POWERPC_TRAP, trap),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_DAR, dar),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_DSISR, dsisr),
+};
+
+u64 perf_reg_value(struct pt_regs *regs, int idx)
+{
+   if (WARN_ON_ONCE(idx >= PERF_REG_POWERPC_MAX))
+   return 0;
+   return regs_get_register(regs, pt_regs_offset[idx]);
+}
+
+int perf_reg_validate(u64 mask)
+{
+   if (!mask || mask & REG_RESERVED)
+   retur

Re: [PATCH V2 0/3] perf/powerpc:Add ability to sample intr machine state in powerpc

2015-10-27 Thread Anju T

Hi Denis,
On Monday 26 October 2015 06:47 PM, Denis Kirjanov wrote:

On 10/26/15, Anju T <a...@linux.vnet.ibm.com> wrote:

This short patch series add the ability to sample the interrupted
machine state for each hardware sample

Hi,
how can we check your patch series without testing details?


I have mentioned about the commands to test this feature in the 3rd 
patch of this series,but not detailed.


To test this patch,
Eg:

$perf record -I ls   // record machine state at interrupt
$perf script -D  //read the perf.data file

Sample output obtained for this patch / output looks like as follows:

179614739048 0xb90 [0x190]: PERF_RECORD_SAMPLE(IP, 0x1): 4130/4130: 
0xc01dde88 period: 1 addr: 0


... intr regs: mask 0xfff ABI 64-bit

 gpr0 0xc01e6974

 gpr1 0xc01e48ad79a0

 gpr2 0xc1523400

 gpr3 0x1

 gpr4 0xc00fd7c76600

 gpr5 0x29d1df51d8

 gpr6 0x1e00

 gpr7 0x0

 gpr8 0x0

 gpr9 0x0

 gpr10 0x1

 gpr11 0x0

 gpr12 0x24022822

 gpr13 0xcfee4c80

 gpr14 0x0

 gpr15 0xc000fcab3800

 gpr16 0x0

 gpr17 0xc00ffa445000

 gpr18 0xc01e48ad78a0

 gpr19 0xc1523400

 gpr20 0xc00a09bc

 gpr21 0xc01e3c11d100

 gpr22 0xc01e54a8

 gpr23 0xc1523400

 gpr24 0xc01e48ad7850

 gpr25 0xc01e3c11d100

 gpr26 0xc01e48a71790

 gpr27 0xfead

 gpr28 0x0

 gpr29 0xc01e3c11d100

 gpr30 0x1

 gpr31 0x0

 nip 0xc000fe8c

 msr 0x90009032

 orig_r3 0xc01e6978

 ctr 0xc009e1f0

 link 0xc01e697c

 xer 0x0

 ccr 0x84022884

 softe 0x1

 trap 0xf01

 dar 0x0

 dsisr 0x30004006004

 result 0x0

... thread: :4130:4130

.. dso: /root/.debug/.build-id/82/8d2c7bac560dc9aac8bf6289f26504e22d6883

:4130 4130 179.614739: 1 cycles: c01dde88 .perf_ctx_unlock 
(/boot/vmlinux)



0xd20 [0x190]: event: 9



Anju (3):
   perf/powerpc:add ability to sample intr machine state in power
   tools/perf:Map the ID values with register names
   perf/powerpc:add support for sampling intr machine state


  arch/powerpc/Kconfig|   1 +
  arch/powerpc/include/uapi/asm/perf_regs.h   |  55 +
  arch/powerpc/perf/Makefile  |   1 +
  arch/powerpc/perf/perf_regs.c   |  87 
  tools/perf/arch/powerpc/include/perf_regs.h | 118

  tools/perf/config/Makefile  |   5 ++
  6 files changed, 267 insertions(+)
  create mode 100644 arch/powerpc/include/uapi/asm/perf_regs.h
  create mode 100644 arch/powerpc/perf/perf_regs.c
  create mode 100644 tools/perf/arch/powerpc/include/perf_regs.h

--
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev


Thanks and regards
Anju
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V2 2/3] tools/perf:Map the ID values with register names

2015-10-26 Thread Anju T
The id values are mapped with the corresponding register names.
This names are displayed while using a perf report/perf script command.

Signed-off-by: Anju T <a...@linux.vnet.ibm.com>
---
 tools/perf/arch/powerpc/include/perf_regs.h | 118 
 1 file changed, 118 insertions(+)
 create mode 100644 tools/perf/arch/powerpc/include/perf_regs.h

diff --git a/tools/perf/arch/powerpc/include/perf_regs.h 
b/tools/perf/arch/powerpc/include/perf_regs.h
new file mode 100644
index 000..621aa94
--- /dev/null
+++ b/tools/perf/arch/powerpc/include/perf_regs.h
@@ -0,0 +1,118 @@
+#ifndef ARCH_PERF_REGS_H
+#define ARCH_PERF_REGS_H
+
+#include 
+#include 
+#include 
+
+void perf_regs_load(u64 *regs);
+
+#define PERF_REGS_MASK  ((1ULL << PERF_REG_POWERPC_MAX) - 1)
+#define PERF_REGS_MAX   PERF_REG_POWERPC_MAX
+#define PERF_SAMPLE_REGS_ABI   PERF_SAMPLE_REGS_ABI_64
+
+#define PERF_REG_IP PERF_REG_POWERPC_NIP
+#define PERF_REG_SP PERF_REG_POWERPC_R1
+
+static inline const char *perf_reg_name(int id)
+{
+   switch (id) {
+   case PERF_REG_POWERPC_GPR0:
+   return "gpr0";
+   case PERF_REG_POWERPC_GPR1:
+   return "gpr1";
+   case PERF_REG_POWERPC_GPR2:
+   return "gpr2";
+   case PERF_REG_POWERPC_GPR3:
+   return "gpr3";
+   case PERF_REG_POWERPC_GPR4:
+   return "gpr4";
+   case PERF_REG_POWERPC_GPR5:
+   return "gpr5";
+   case PERF_REG_POWERPC_GPR6:
+   return "gpr6";
+   case PERF_REG_POWERPC_GPR7:
+   return "gpr7";
+   case PERF_REG_POWERPC_GPR8:
+   return "gpr8";
+   case PERF_REG_POWERPC_GPR9:
+   return "gpr9";
+   case PERF_REG_POWERPC_GPR10:
+   return "gpr10";
+   case PERF_REG_POWERPC_GPR11:
+   return "gpr11";
+   case PERF_REG_POWERPC_GPR12:
+   return "gpr12";
+   case PERF_REG_POWERPC_GPR13:
+   return "gpr13";
+   case PERF_REG_POWERPC_GPR14:
+   return "gpr14";
+   case PERF_REG_POWERPC_GPR15:
+   return "gpr15";
+   case PERF_REG_POWERPC_GPR16:
+   return "gpr16";
+   case PERF_REG_POWERPC_GPR17:
+   return "gpr17";
+   case PERF_REG_POWERPC_GPR18:
+   return "gpr18";
+   case PERF_REG_POWERPC_GPR19:
+   return "gpr19";
+   case PERF_REG_POWERPC_GPR20:
+   return "gpr20";
+   case PERF_REG_POWERPC_GPR21:
+   return "gpr21";
+   case PERF_REG_POWERPC_GPR22:
+   return "gpr22";
+   case PERF_REG_POWERPC_GPR23:
+   return "gpr23";
+   case PERF_REG_POWERPC_GPR24:
+   return "gpr24";
+   case PERF_REG_POWERPC_GPR25:
+   return "gpr25";
+   case PERF_REG_POWERPC_GPR26:
+   return "gpr26";
+   case PERF_REG_POWERPC_GPR27:
+   return "gpr27";
+   case PERF_REG_POWERPC_GPR28:
+   return "gpr28";
+   case PERF_REG_POWERPC_GPR29:
+   return "gpr29";
+   case PERF_REG_POWERPC_GPR30:
+   return "gpr30";
+   case PERF_REG_POWERPC_GPR31:
+   return "gpr31";
+   case PERF_REG_POWERPC_NIP:
+   return "nip";
+   case PERF_REG_POWERPC_MSR:
+   return "msr";
+   case PERF_REG_POWERPC_ORIG_R3:
+   return "orig_r3";
+   case PERF_REG_POWERPC_CTR:
+   return "ctr";
+   case PERF_REG_POWERPC_LNK:
+   return "link";
+   case PERF_REG_POWERPC_XER:
+   return "xer";
+   case PERF_REG_POWERPC_CCR:
+   return "ccr";
+#ifdef __powerpc64__
+   case PERF_REG_POWERPC_SOFTE:
+   return "softe";
+#else
+   case PERF_REG_POWERPC_MQ:
+   return "mq";
+#endif
+   case PERF_REG_POWERPC_TRAP:
+   return "trap";
+   case PERF_REG_POWERPC_DAR:
+   return "dar";
+   case PERF_REG_POWERPC_DSISR:
+   return "dsisr";
+   case PERF_REG_POWERPC_RESULT:
+   return "result";
+   default:
+   return NULL;
+   }
+   return NULL;
+}
+#endif /*ARCH_PERF_REGS_H */
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V2 0/3] perf/powerpc:Add ability to sample intr machine state in powerpc

2015-10-26 Thread Anju T
This short patch series add the ability to sample the interrupted
machine state for each hardware sample

Anju (3):
  perf/powerpc:add ability to sample intr machine state in power
  tools/perf:Map the ID values with register names
  perf/powerpc:add support for sampling intr machine state 


 arch/powerpc/Kconfig|   1 +
 arch/powerpc/include/uapi/asm/perf_regs.h   |  55 +
 arch/powerpc/perf/Makefile  |   1 +
 arch/powerpc/perf/perf_regs.c   |  87 
 tools/perf/arch/powerpc/include/perf_regs.h | 118 
 tools/perf/config/Makefile  |   5 ++
 6 files changed, 267 insertions(+)
 create mode 100644 arch/powerpc/include/uapi/asm/perf_regs.h
 create mode 100644 arch/powerpc/perf/perf_regs.c
 create mode 100644 tools/perf/arch/powerpc/include/perf_regs.h

-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH V2 3/3] perf/powerpc :add support for sampling intr machine state

2015-10-26 Thread Anju T
The registers to sample are passed through the sample_regs_intr bitmask.
The name and bit position for each register is defined in asm/perf_regs.h.
This feature can be enabled by using -I option with perf  record command.
To display the sampled register values use perf script -D.
The kernel uses the "PERF" register ids to find offset of the register in 
'struct pt_regs'.
CONFIG_HAVE_PERF_REGS will enable sampling of the interrupted machine state.

Signed-off-by: Anju T <a...@linux.vnet.ibm.com>
---
 arch/powerpc/Kconfig  |  1 +
 arch/powerpc/perf/Makefile|  1 +
 arch/powerpc/perf/perf_regs.c | 87 +++
 tools/perf/config/Makefile|  5 +++
 4 files changed, 94 insertions(+)
 create mode 100644 arch/powerpc/perf/perf_regs.c

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 5ef2711..768d700 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -116,6 +116,7 @@ config PPC
select GENERIC_ATOMIC64 if PPC32
select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
select HAVE_PERF_EVENTS
+   select HAVE_PERF_REGS
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_HW_BREAKPOINT if PERF_EVENTS && PPC_BOOK3S_64
select ARCH_WANT_IPC_PARSE_VERSION
diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile
index f9c083a..0d53815 100644
--- a/arch/powerpc/perf/Makefile
+++ b/arch/powerpc/perf/Makefile
@@ -12,6 +12,7 @@ obj-$(CONFIG_FSL_EMB_PERF_EVENT) += core-fsl-emb.o
 obj-$(CONFIG_FSL_EMB_PERF_EVENT_E500) += e500-pmu.o e6500-pmu.o
 
 obj-$(CONFIG_HV_PERF_CTRS) += hv-24x7.o hv-gpci.o hv-common.o
+obj-$(CONFIG_PERF_EVENTS)  += perf_regs.o
 
 obj-$(CONFIG_PPC64)+= $(obj64-y)
 obj-$(CONFIG_PPC32)+= $(obj32-y)
diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c
new file mode 100644
index 000..2474dc4
--- /dev/null
+++ b/arch/powerpc/perf/perf_regs.c
@@ -0,0 +1,87 @@
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#define PT_REGS_OFFSET(id, r) [id] = offsetof(struct pt_regs, r)
+
+#define REG_RESERVED (~((1ULL << PERF_REG_POWERPC_MAX) - 1))
+
+static unsigned int pt_regs_offset[PERF_REG_POWERPC_MAX] = {
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR0, gpr[0]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR1, gpr[1]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR2, gpr[2]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR3, gpr[3]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR4, gpr[4]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR5, gpr[5]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR6, gpr[6]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR7, gpr[7]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR8, gpr[8]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR9, gpr[9]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR10, gpr[10]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR11, gpr[11]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR12, gpr[12]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR13, gpr[13]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR14, gpr[14]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR15, gpr[15]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR16, gpr[16]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR17, gpr[17]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR18, gpr[18]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR19, gpr[19]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR20, gpr[20]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR21, gpr[21]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR22, gpr[22]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR23, gpr[23]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR24, gpr[24]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR25, gpr[25]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR26, gpr[26]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR27, gpr[27]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR28, gpr[28]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR29, gpr[29]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR30, gpr[30]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_GPR31, gpr[31]),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_NIP, nip),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_MSR, msr),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_ORIG_R3, orig_gpr3),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_CTR, ctr),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_LNK, link),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_XER, xer),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_CCR, ccr),
+#ifdef __powerpc64__
+   PT_REGS_OFFSET(PERF_REG_POWERPC_SOFTE, softe),
+#else
+   PT_REGS_OFFSET(PERF_REG_POWERPC_MQ, mq),
+#endif
+   PT_REGS_OFFSET(PERF_REG_POWERPC_TRAP, trap),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_DAR, dar),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_DSISR, dsisr),
+   PT_REGS_OFFSET(PERF_REG_POWERPC_RESULT, result),
+};
+u64 perf_reg_value(struct pt_regs *regs, int idx)
+{
+   if (WARN_ON_ONCE(idx >= PERF_REG_POWERPC_MAX))
+   return 0;
+   return regs_get_register(re

[PATCH V2 1/3] perf/powerpc:add ability to sample intr machine state in power

2015-10-26 Thread Anju T
The enum definition assigns an 'id' to each register in "struct pt_regs" 
of arch/powerpc.The order of these values in the enum definition are 
based on the corresponding macros in 
arch/powerpc/include/uapi/asm/ptrace.h .

Signed-off-by: Anju T <a...@linux.vnet.ibm.com>
---
 arch/powerpc/include/uapi/asm/perf_regs.h | 55 +++
 1 file changed, 55 insertions(+)
 create mode 100644 arch/powerpc/include/uapi/asm/perf_regs.h

diff --git a/arch/powerpc/include/uapi/asm/perf_regs.h 
b/arch/powerpc/include/uapi/asm/perf_regs.h
new file mode 100644
index 000..b97727c
--- /dev/null
+++ b/arch/powerpc/include/uapi/asm/perf_regs.h
@@ -0,0 +1,55 @@
+#ifndef _ASM_POWERPC_PERF_REGS_H
+#define _ASM_POWERPC_PERF_REGS_H
+
+enum perf_event_powerpc_regs {
+   PERF_REG_POWERPC_GPR0,
+   PERF_REG_POWERPC_GPR1,
+   PERF_REG_POWERPC_GPR2,
+   PERF_REG_POWERPC_GPR3,
+   PERF_REG_POWERPC_GPR4,
+   PERF_REG_POWERPC_GPR5,
+   PERF_REG_POWERPC_GPR6,
+   PERF_REG_POWERPC_GPR7,
+   PERF_REG_POWERPC_GPR8,
+   PERF_REG_POWERPC_GPR9,
+   PERF_REG_POWERPC_GPR10,
+   PERF_REG_POWERPC_GPR11,
+   PERF_REG_POWERPC_GPR12,
+   PERF_REG_POWERPC_GPR13,
+   PERF_REG_POWERPC_GPR14,
+   PERF_REG_POWERPC_GPR15,
+   PERF_REG_POWERPC_GPR16,
+   PERF_REG_POWERPC_GPR17,
+   PERF_REG_POWERPC_GPR18,
+   PERF_REG_POWERPC_GPR19,
+   PERF_REG_POWERPC_GPR20,
+   PERF_REG_POWERPC_GPR21,
+   PERF_REG_POWERPC_GPR22,
+   PERF_REG_POWERPC_GPR23,
+   PERF_REG_POWERPC_GPR24,
+   PERF_REG_POWERPC_GPR25,
+   PERF_REG_POWERPC_GPR26,
+   PERF_REG_POWERPC_GPR27,
+   PERF_REG_POWERPC_GPR28,
+   PERF_REG_POWERPC_GPR29,
+   PERF_REG_POWERPC_GPR30,
+   PERF_REG_POWERPC_GPR31,
+   PERF_REG_POWERPC_NIP,
+   PERF_REG_POWERPC_MSR,
+   PERF_REG_POWERPC_ORIG_R3,
+   PERF_REG_POWERPC_CTR,
+   PERF_REG_POWERPC_LNK,
+   PERF_REG_POWERPC_XER,
+   PERF_REG_POWERPC_CCR,
+#ifdef __powerpc64__
+   PERF_REG_POWERPC_SOFTE,
+#else
+   PERF_REG_POWERPC_MQ,
+#endif
+   PERF_REG_POWERPC_TRAP,
+   PERF_REG_POWERPC_DAR,
+   PERF_REG_POWERPC_DSISR,
+   PERF_REG_POWERPC_RESULT,
+   PERF_REG_POWERPC_MAX,
+};
+#endif /* _ASM_POWERPC_PERF_REGS_H */
-- 
2.1.0

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

<    1   2   3