[PATCH 1/4] perf: Utility function to fetch arch from evsel

2016-06-28 Thread Ravi Bangoria
Add Utility function to fetch 'arch' from 'evsel'. (evsel->env->arch)

Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
---
 tools/perf/util/evsel.c | 7 +++
 tools/perf/util/evsel.h | 2 ++
 2 files changed, 9 insertions(+)

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 1d8f2bb..0fea724 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -2422,3 +2422,10 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, 
struct target *target,
 err, strerror_r(err, sbuf, sizeof(sbuf)),
 perf_evsel__name(evsel));
 }
+
+char *perf_evsel__env_arch(struct perf_evsel *evsel)
+{
+   if (evsel && evsel->evlist && evsel->evlist->env)
+   return evsel->evlist->env->arch;
+   return NULL;
+}
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 828ddd1..86fed7a 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -435,4 +435,6 @@ typedef int (*attr__fprintf_f)(FILE *, const char *, const 
char *, void *);
 int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
 attr__fprintf_f attr__fprintf, void *priv);
 
+char *perf_evsel__env_arch(struct perf_evsel *evsel);
+
 #endif /* __PERF_EVSEL_H */
-- 
2.5.5

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 4/4] perf annotate: Define macro for arch names

2016-06-28 Thread Ravi Bangoria
Define macro for each arch name and use them instead of using arch
name as string.

Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
---
 tools/perf/arch/common.c   | 36 ++--
 tools/perf/arch/common.h   | 11 +++
 tools/perf/util/annotate.c | 10 +-
 tools/perf/util/unwind-libunwind.c |  4 ++--
 4 files changed, 36 insertions(+), 25 deletions(-)

diff --git a/tools/perf/arch/common.c b/tools/perf/arch/common.c
index ee69668..feb2113 100644
--- a/tools/perf/arch/common.c
+++ b/tools/perf/arch/common.c
@@ -122,25 +122,25 @@ static int lookup_triplets(const char *const *triplets, 
const char *name)
 const char *normalize_arch(char *arch)
 {
if (!strcmp(arch, "x86_64"))
-   return "x86";
+   return NORM_X86;
if (arch[0] == 'i' && arch[2] == '8' && arch[3] == '6')
-   return "x86";
+   return NORM_X86;
if (!strcmp(arch, "sun4u") || !strncmp(arch, "sparc", 5))
-   return "sparc";
+   return NORM_SPARC;
if (!strcmp(arch, "aarch64") || !strcmp(arch, "arm64"))
-   return "arm64";
+   return NORM_ARM64;
if (!strncmp(arch, "arm", 3) || !strcmp(arch, "sa110"))
-   return "arm";
+   return NORM_ARM;
if (!strncmp(arch, "s390", 4))
-   return "s390";
+   return NORM_S390;
if (!strncmp(arch, "parisc", 6))
-   return "parisc";
+   return NORM_PARISC;
if (!strncmp(arch, "powerpc", 7) || !strncmp(arch, "ppc", 3))
-   return "powerpc";
+   return NORM_POWERPC;
if (!strncmp(arch, "mips", 4))
-   return "mips";
+   return NORM_MIPS;
if (!strncmp(arch, "sh", 2) && isdigit(arch[2]))
-   return "sh";
+   return NORM_SH;
 
return arch;
 }
@@ -180,21 +180,21 @@ static int perf_env__lookup_binutils_path(struct perf_env 
*env,
zfree();
}
 
-   if (!strcmp(arch, "arm"))
+   if (!strcmp(arch, NORM_ARM))
path_list = arm_triplets;
-   else if (!strcmp(arch, "arm64"))
+   else if (!strcmp(arch, NORM_ARM64))
path_list = arm64_triplets;
-   else if (!strcmp(arch, "powerpc"))
+   else if (!strcmp(arch, NORM_POWERPC))
path_list = powerpc_triplets;
-   else if (!strcmp(arch, "sh"))
+   else if (!strcmp(arch, NORM_SH))
path_list = sh_triplets;
-   else if (!strcmp(arch, "s390"))
+   else if (!strcmp(arch, NORM_S390))
path_list = s390_triplets;
-   else if (!strcmp(arch, "sparc"))
+   else if (!strcmp(arch, NORM_SPARC))
path_list = sparc_triplets;
-   else if (!strcmp(arch, "x86"))
+   else if (!strcmp(arch, NORM_X86))
path_list = x86_triplets;
-   else if (!strcmp(arch, "mips"))
+   else if (!strcmp(arch, NORM_MIPS))
path_list = mips_triplets;
else {
ui__error("binutils for %s not supported.\n", arch);
diff --git a/tools/perf/arch/common.h b/tools/perf/arch/common.h
index 6b01c73..14ca8ca 100644
--- a/tools/perf/arch/common.h
+++ b/tools/perf/arch/common.h
@@ -5,6 +5,17 @@
 
 extern const char *objdump_path;
 
+/* Macro for normalized arch names */
+#define NORM_X86   "x86"
+#define NORM_SPARC "sparc"
+#define NORM_ARM64 "arm64"
+#define NORM_ARM   "arm"
+#define NORM_S390  "s390"
+#define NORM_PARISC"parisc"
+#define NORM_POWERPC   "powerpc"
+#define NORM_MIPS  "mips"
+#define NORM_SH"sh"
+
 int perf_env__lookup_objdump(struct perf_env *env);
 const char *normalize_arch(char *arch);
 
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 96c6610..8146a25 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -68,7 +68,7 @@ static int call__parse(struct ins_operands *ops,
 
name++;
 
-   if (!strcmp(norm_arch, "arm") && strchr(name, '+'))
+   if (!strcmp(norm_arch, NORM_ARM) && strchr(name, '+'))
return -1;
 
tok = strchr(name, '>');
@@ -255,7 +255,7 @@ static int mov__parse(struct ins_operands *ops,
 
target = ++s;
 
-   if (!strcmp(norm_arch, "arm"))
+   if (!strcmp(norm_arch, NORM_ARM))
comment = strchr(s, ';');
else
comment = strchr(

[PATCH 3/4] perf annotate: add powerpc support

2016-06-28 Thread Ravi Bangoria
From: "Naveen N. Rao" <naveen.n@linux.vnet.ibm.com>

Powerpc has long list of branch instructions and hardcoding them in table
appears to be error-prone. So, add new function to find instruction
instead of creating table.

Signed-off-by: Naveen N. Rao <naveen.n@linux.vnet.ibm.com>
Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
---
 tools/perf/util/annotate.c | 64 ++
 1 file changed, 64 insertions(+)

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 36a5825..96c6610 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -476,6 +476,68 @@ static int ins__cmp(const void *a, const void *b)
return strcmp(ia->name, ib->name);
 }
 
+static struct ins *ins__find_powerpc(const char *name)
+{
+   int i;
+   struct ins *ins;
+
+   ins = zalloc(sizeof(struct ins));
+   if (!ins)
+   return NULL;
+
+   ins->name = strdup(name);
+   if (!ins->name)
+   return NULL;
+
+   if (name[0] == 'b') {
+   /* branch instructions */
+   ins->ops = _ops;
+
+   /*
+* - Few start with 'b', but aren't branch instructions.
+* - Let's also ignore instructions involving 'ctr' and
+*   'tar' since target branch addresses for those can't
+*   be determined statically.
+*/
+   if (!strncmp(name, "bcd", 3)   ||
+   !strncmp(name, "brinc", 5) ||
+   !strncmp(name, "bper", 4)  ||
+   strstr(name, "ctr")||
+   strstr(name, "tar"))
+   return NULL;
+
+   i = strlen(name) - 1;
+   if (i < 0)
+   return NULL;
+
+   /* ignore optional hints at the end of the instructions */
+   if (name[i] == '+' || name[i] == '-')
+   i--;
+
+   if (name[i] == 'l' || (name[i] == 'a' && name[i-1] == 'l')) {
+   /*
+* if the instruction ends up with 'l' or 'la', then
+* those are considered 'calls' since they update LR.
+* ... except for 'bnl' which is branch if not less than
+* and the absolute form of the same.
+*/
+   if (strcmp(name, "bnl") && strcmp(name, "bnl+") &&
+   strcmp(name, "bnl-") && strcmp(name, "bnla") &&
+   strcmp(name, "bnla+") && strcmp(name, "bnla-"))
+   ins->ops = _ops;
+   }
+   if (name[i] == 'r' && name[i-1] == 'l')
+   /*
+* instructions ending with 'lr' are considered to be
+* return instructions
+*/
+   ins->ops = _ops;
+
+   return ins;
+   }
+   return NULL;
+}
+
 static void ins__sort(struct ins *instructions, int nmemb)
 {
qsort(instructions, nmemb, sizeof(struct ins), ins__cmp);
@@ -511,6 +573,8 @@ static struct ins *ins__find(const char *name, const char 
*norm_arch)
} else if (!strcmp(norm_arch, "arm")) {
instructions = instructions_arm;
nmemb = ARRAY_SIZE(instructions_arm);
+   } else if (!strcmp(norm_arch, "powerpc")) {
+   return ins__find_powerpc(name);
} else {
pr_err("perf annotate not supported by %s arch\n", norm_arch);
return NULL;
-- 
2.5.5

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 2/4] perf annotate: Enable cross arch annotate

2016-06-28 Thread Ravi Bangoria
Change current data structures and function to enable cross arch
annotate.

Current implementation does not contain logic of record on one arch
and annotate on other. This remote annotate is partially possible
with current implementation for x86 (or may be arm as well) only.
But, to make remote annotation work properly, all architecture
instruction tables need to be included in the perf binary. And while
annotating, look for instruction table where perf.data was recorded.

For arm, few instructions were defined under #if __arm__ which I've
used as a table for arm. But I'm not sure whether instruction defined
outside of that also contains arm instructions. Apart from that,
'call__parse()' and 'move__parse()' contains #ifdef __arm__ directive.
I've changed it to  if (!strcmp(norm_arch, "arm")). But I've not
tested this as well.

Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
---
 tools/perf/builtin-top.c  |   2 +-
 tools/perf/ui/browsers/annotate.c |   3 +-
 tools/perf/ui/gtk/annotate.c  |   2 +-
 tools/perf/util/annotate.c| 136 --
 tools/perf/util/annotate.h|   5 +-
 5 files changed, 95 insertions(+), 53 deletions(-)

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 07fc792..d4fd947 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -128,7 +128,7 @@ static int perf_top__parse_source(struct perf_top *top, 
struct hist_entry *he)
return err;
}
 
-   err = symbol__annotate(sym, map, 0);
+   err = symbol__annotate(sym, map, 0, NULL);
if (err == 0) {
 out_assign:
top->sym_filter_entry = he;
diff --git a/tools/perf/ui/browsers/annotate.c 
b/tools/perf/ui/browsers/annotate.c
index 29dc6d2..3a652a6f 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -1050,7 +1050,8 @@ int symbol__tui_annotate(struct symbol *sym, struct map 
*map,
  (nr_pcnt - 1);
}
 
-   if (symbol__annotate(sym, map, sizeof_bdl) < 0) {
+   if (symbol__annotate(sym, map, sizeof_bdl,
+perf_evsel__env_arch(evsel)) < 0) {
ui__error("%s", ui_helpline__last_msg);
goto out_free_offsets;
}
diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c
index 9c7ff8d..d7150b3 100644
--- a/tools/perf/ui/gtk/annotate.c
+++ b/tools/perf/ui/gtk/annotate.c
@@ -166,7 +166,7 @@ static int symbol__gtk_annotate(struct symbol *sym, struct 
map *map,
if (map->dso->annotate_warned)
return -1;
 
-   if (symbol__annotate(sym, map, 0) < 0) {
+   if (symbol__annotate(sym, map, 0, perf_evsel__env_arch(evsel)) < 0) {
ui__error("%s", ui_helpline__current);
return -1;
}
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index c385fec..36a5825 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -20,12 +20,14 @@
 #include 
 #include 
 #include 
+#include 
+#include "../arch/common.h"
 
 const char *disassembler_style;
 const char *objdump_path;
 static regex_t  file_lineno;
 
-static struct ins *ins__find(const char *name);
+static struct ins *ins__find(const char *name, const char *norm_arch);
 static int disasm_line__parse(char *line, char **namep, char **rawp);
 
 static void ins__delete(struct ins_operands *ops)
@@ -53,7 +55,8 @@ int ins__scnprintf(struct ins *ins, char *bf, size_t size,
return ins__raw_scnprintf(ins, bf, size, ops);
 }
 
-static int call__parse(struct ins_operands *ops)
+static int call__parse(struct ins_operands *ops,
+  __maybe_unused const char *norm_arch)
 {
char *endptr, *tok, *name;
 
@@ -65,10 +68,8 @@ static int call__parse(struct ins_operands *ops)
 
name++;
 
-#ifdef __arm__
-   if (strchr(name, '+'))
+   if (!strcmp(norm_arch, "arm") && strchr(name, '+'))
return -1;
-#endif
 
tok = strchr(name, '>');
if (tok == NULL)
@@ -117,7 +118,8 @@ bool ins__is_call(const struct ins *ins)
return ins->ops == _ops;
 }
 
-static int jump__parse(struct ins_operands *ops)
+static int jump__parse(struct ins_operands *ops,
+  __maybe_unused const char *norm_arch)
 {
const char *s = strchr(ops->raw, '+');
 
@@ -172,7 +174,7 @@ static int comment__symbol(char *raw, char *comment, u64 
*addrp, char **namep)
return 0;
 }
 
-static int lock__parse(struct ins_operands *ops)
+static int lock__parse(struct ins_operands *ops, const char *norm_arch)
 {
char *name;
 
@@ -183,7 +185,7 @@ static int lock__parse(struct ins_operands *ops)
if (disasm_line__parse(ops->raw, , >locked.ops->raw) < 0)
goto out_free_ops;
 
-   ops->locked.ins = ins__find(name);
+   ops-&

[PATCH 0/4] perf annotate: Enable cross arch annotate

2016-06-28 Thread Ravi Bangoria
Perf can currently only support code navigation (branches and calls) in
annotate when run on the same architecture where perf.data was recorded.
But cross arch annotate is not supported.

This patchset enables cross arch annotate. Currently I've used x86
and arm instructions which are already available and adding support
for powerpc as well. Adding support for other arch will be easy.

I've created this patch on top of acme/perf/core. And tested it with
x86 and powerpc only.

Example:

  Record on powerpc:
  $ ./perf record -a

  Report -> Annotate on x86:
  $ ./perf report -i perf.data.powerpc --vmlinux vmlinux.powerpc

Changes in [PATCH] vs [RFC]
  - Removed global var 'arch__ins' and pass arch info till ins__find

Naveen N. Rao (1):
  perf annotate: add powerpc support

Ravi Bangoria (3):
  perf: Utility function to fetch arch
  perf annotate: Enable cross arch annotate
  perf annotate: Define macro for arch names

 tools/perf/arch/common.c   |  36 +++
 tools/perf/arch/common.h   |  11 +++
 tools/perf/builtin-top.c   |   2 +-
 tools/perf/ui/browsers/annotate.c  |   3 +-
 tools/perf/ui/gtk/annotate.c   |   2 +-
 tools/perf/util/annotate.c | 198 -
 tools/perf/util/annotate.h |   5 +-
 tools/perf/util/evsel.c|   7 ++
 tools/perf/util/evsel.h|   2 +
 tools/perf/util/unwind-libunwind.c |   4 +-
 10 files changed, 198 insertions(+), 72 deletions(-)

--
2.5.5

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 3/4] perf annotate: add powerpc support

2016-06-29 Thread Ravi Bangoria

Thanks David.

On Tuesday 28 June 2016 09:37 PM, David Laight wrote:

From: Ravi Bangoria

Sent: 28 June 2016 12:37

Powerpc has long list of branch instructions and hardcoding them in table
appears to be error-prone. So, add new function to find instruction
instead of creating table.

Signed-off-by: Naveen N. Rao <naveen.n@linux.vnet.ibm.com>
Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
---
  tools/perf/util/annotate.c | 64 ++
  1 file changed, 64 insertions(+)

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 36a5825..96c6610 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -476,6 +476,68 @@ static int ins__cmp(const void *a, const void *b)
return strcmp(ia->name, ib->name);
  }

+static struct ins *ins__find_powerpc(const char *name)

It would be better if the function name include 'branch'.


+{
+   int i;
+   struct ins *ins;
+
+   ins = zalloc(sizeof(struct ins));
+   if (!ins)
+   return NULL;
+
+   ins->name = strdup(name);
+   if (!ins->name)
+   return NULL;

You leak 'ins' here.


+
+   if (name[0] == 'b') {
+   /* branch instructions */
+   ins->ops = _ops;
+
+   /*
+* - Few start with 'b', but aren't branch instructions.
+* - Let's also ignore instructions involving 'ctr' and
+*   'tar' since target branch addresses for those can't
+*   be determined statically.
+*/
+   if (!strncmp(name, "bcd", 3)   ||
+   !strncmp(name, "brinc", 5) ||
+   !strncmp(name, "bper", 4)  ||
+   strstr(name, "ctr")||
+   strstr(name, "tar"))
+   return NULL;

More importantly you leak 'ins' and 'ins->name' here.
And on other paths below.


Yes. Fair points.

I can create linked list that maintain allocated instructions and
lookup it every time before allocating memory. But for this,
I need to free memory at the end and it's becoming complicated.

I can go back to normal approach of creating table for powerpc.
This is simplest. But only problem is powerpc has around 400 branch
instructions(which includes call and ret as well). And list them all is
bit error-prone.

Suggestions?

- Ravi


...

David



___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH] hw_breakpoint: Fix Oops at destroying hw_breakpoint event on powerpc

2016-03-02 Thread Ravi Bangoria
At a time of destroying hw_breakpoint event, kernel ends up with Oops.
Here is the sample output from 4.5.0-rc6 kernel.

  [  450.708568] Unable to handle kernel paging request for data at address 
0x0c07
  [  450.708684] Faulting instruction address: 0xc00291d0
  [  450.708750] Oops: Kernel access of bad area, sig: 11 [#1]
  [  450.708798] SMP NR_CPUS=1024 NUMA pSeries
  [  450.708856] Modules linked in: 
stap_4c2bdcf3e1aee79b646bb9a844e600f7__4962(O) xt_CHECKSUM ...
  [  450.709539] CPU: 5 PID: 5106 Comm: perf_fuzzer Tainted: G   O
4.5.0-rc5+ #1
  [  450.709620] task: c000f8795c80 ti: c000e334 task.ti: 
c000e334
  [  450.709691] NIP: c00291d0 LR: c020b6b4 CTR: 
c020b6f0
  [  450.709760] REGS: c000e3343760 TRAP: 0300   Tainted: G   O 
(4.5.0-rc5+)
  [  450.709831] MSR: 80009033 <SF,EE,ME,IR,DR,RI,LE>  CR: 22008828  
XER: 2000
  [  450.710001] CFAR: c0010708 DAR: 0c07 DSISR: 4200 
SOFTE: 1
  GPR00: c020b6b4 c000e33439e0 c1350900 c0009efa7000
  GPR04: 0001 c0009efa7000  0001
  GPR08:    
  GPR12: c020b6f0 c7e02800 c0009efa5208 
  GPR16: 0001   c000f3ad7f10
  GPR20: c000f87964c8 0001 c000f8795c80 fffd
  GPR24:  c000f3ad7f08 c000f3ad7f68 c0009efa6800
  GPR28: c000f3ad7f00 c0009efa5000 c1259520 c0009efa7000
  [  450.710996] NIP [c00291d0] arch_unregister_hw_breakpoint+0x40/0x60
  [  450.711066] LR [c020b6b4] release_bp_slot+0x44/0x80
  [  450.77] Call Trace:
  [  450.711165] [c000e33439e0] [c09c1e38] mutex_lock+0x28/0x70 
(unreliable)
  [  450.711257] [c000e3343a10] [c020b6b4] release_bp_slot+0x44/0x80
  [  450.711332] [c000e3343a40] [c02036c8] _free_event+0xd8/0x350
  [  450.711404] [c000e3343a70] [c0208260] 
perf_event_exit_task+0x2b0/0x4c0
  [  450.711490] [c000e3343b20] [c00b8ac8] do_exit+0x388/0xc60
  [  450.711563] [c000e3343be0] [c00b9484] do_group_exit+0x64/0x100
  [  450.711641] [c000e3343c20] [c00c9100] get_signal+0x220/0x770
  [  450.711716] [c000e3343d10] [c0017884] do_signal+0x54/0x2b0
  [  450.711793] [c000e3343e00] [c0017cac] 
do_notify_resume+0xbc/0xd0
  [  450.711865] [c000e3343e30] [c0009838] 
ret_from_except_lite+0x64/0x68
  [  450.711948] Instruction dump:
  [  450.711986] f8010010 f821ffd1 7c7f1b78 6000 6000 e93f01e8 2fa9 
419e0018
  [  450.712107] e9290098 2fa9 419e000c 3940  38210030 
e8010010 ebe1fff8
  [  450.712230] ---[ end trace 3cf087de955e9358 ]---

Call chain:

  hw_breakpoint_event_init()
bp->destroy = bp_perf_event_destroy;

  do_exit()
perf_event_exit_task()
  perf_event_exit_task_context()
WRITE_ONCE(child_ctx->task, TASK_TOMBSTONE);
perf_event_exit_event()
  free_event()
_free_event()
  bp_perf_event_destroy()//event->destroy(event);
release_bp_slot()
  arch_unregister_hw_breakpoint()

perf_event_exit_task_context sets child_ctx->task as TASK_TOMBSTONE
which is (void *)-1. arch_unregister_hw_breakpoint tries to fetch
'thread' attribute of 'task' resulting in Oops.

This patch adds one more condition before accessing data from 'task'.

Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/hw_breakpoint.c | 3 ++-
 include/linux/perf_event.h  | 2 ++
 kernel/events/core.c| 2 --
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/hw_breakpoint.c 
b/arch/powerpc/kernel/hw_breakpoint.c
index 05e804c..43d8496 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -29,6 +29,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -110,7 +111,7 @@ void arch_unregister_hw_breakpoint(struct perf_event *bp)
 * and the single_step_dabr_instruction(), then cleanup the breakpoint
 * restoration variables to prevent dangling pointers.
 */
-   if (bp->ctx && bp->ctx->task)
+   if (bp->ctx && bp->ctx->task && bp->ctx->task != TASK_TOMBSTONE)
bp->ctx->task->thread.last_hit_ubp = NULL;
 }
 
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index f5c5a3f..491c50e 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1192,4 +1192,6 @@ _name##_show(struct device *dev,  
\
\
 static struct device_attribute format_

Re: [PATCH v4 0/3] perf annotate: Enable cross arch annotate

2016-07-13 Thread Ravi Bangoria

Arnaldo, Michael,

I've tested this patchset on ppc64 BE and LE both. Please review this.

-Ravi

On Friday 08 July 2016 10:10 AM, Ravi Bangoria wrote:

Perf can currently only support code navigation (branches and calls) in
annotate when run on the same architecture where perf.data was recorded.
But cross arch annotate is not supported.

This patchset enables cross arch annotate. Currently I've used x86
and arm instructions which are already available and adding support
for powerpc as well. Adding support for other arch will be easy.

I've created this patch on top of acme/perf/core. And tested it with
x86 and powerpc only.

Note for arm:
Few instructions were defined under #if __arm__ which I've used as a
table for arm. But I'm not sure whether instruction defined outside of
that also contains arm instructions. Apart from that, 'call__parse()'
and 'move__parse()' contains #ifdef __arm__ directive. I've changed it
to  if (!strcmp(norm_arch, arm)). I don't have a arm machine to test
these changes.

Example:

   Record on powerpc:
   $ ./perf record -a

   Report -> Annotate on x86:
   $ ./perf report -i perf.data.powerpc --vmlinux vmlinux.powerpc

Changes in v4:
   - powerpc: Added support for branch instructions that includes 'ctr'
   - __maybe_unused was misplaced at few location. Corrected it.
   - Moved position of v3 last patch that define macro for each arch name

v3 link: https://lkml.org/lkml/2016/6/30/99

Naveen N. Rao (1):
   perf annotate: add powerpc support

Ravi Bangoria (2):
   perf: Define macro for normalized arch names
   perf annotate: Enable cross arch annotate

  tools/perf/arch/common.c   |  36 ++---
  tools/perf/arch/common.h   |  11 ++
  tools/perf/builtin-top.c   |   2 +-
  tools/perf/ui/browsers/annotate.c  |   3 +-
  tools/perf/ui/gtk/annotate.c   |   2 +-
  tools/perf/util/annotate.c | 273 ++---
  tools/perf/util/annotate.h |   6 +-
  tools/perf/util/unwind-libunwind.c |   4 +-
  8 files changed, 265 insertions(+), 72 deletions(-)

--
2.5.5



___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v3 3/4] perf annotate: add powerpc support

2016-07-13 Thread Ravi Bangoria



On Wednesday 13 July 2016 01:09 PM, Michael Ellerman wrote:

Arnaldo Carvalho de Melo <a...@kernel.org> writes:


Em Tue, Jul 12, 2016 at 07:51:46AM +0530, Ravi Bangoria escreveu:

Hi Arnaldo,

On Friday 08 July 2016 02:01 PM, Michael Ellerman wrote:

Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com> writes:


On Wednesday 06 July 2016 03:38 PM, Michael Ellerman wrote:

I've sent v4 which enables annotate for bctr' instructions.

for 'bctr', it will show down arrow(indicate jump) and 'bctrl' will show
right arrow(indicate call). But no navigation options will be provided.
By pressing Enter key on that, message will be shown that like
"Invalid target"

Great thanks.

I've sent v4 series. Please review it.

If somebody else could do it and provide acks/reviewed by, that would
help,

Michael, can I get your comments as such?

It looks OK to me. But I don't know the code really, and I haven't had
time to test it personally.

Ravi, have you tested on a big endian machine?


Yes Michael, I've tested annotate on BE and LE both.

-Ravi



cheers



___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v3 3/4] perf annotate: add powerpc support

2016-07-11 Thread Ravi Bangoria

Hi Arnaldo,

On Friday 08 July 2016 02:01 PM, Michael Ellerman wrote:

Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com> writes:


On Wednesday 06 July 2016 03:38 PM, Michael Ellerman wrote:

I've sent v4 which enables annotate for bctr' instructions.

for 'bctr', it will show down arrow(indicate jump) and 'bctrl' will show
right arrow(indicate call). But no navigation options will be provided.
By pressing Enter key on that, message will be shown that like
"Invalid target"

Great thanks.


I've sent v4 series. Please review it.

-Ravi


It doesn't look like we have the opcode handy here? Could we get it somehow?
That would make this a *lot* more robust.

objdump prints machine code, but I don't know how difficult that would
be to parse to get opcode.

Normal objdump -d output includes the opcode, eg:

c000886c:   2c 2c 00 00 cmpdi   r12,0
  ^^^

The only thing you need to know is the endian and you can reconstruct
the raw instruction.

Then you can just decode the opcode, see how we do it in the kernel with
eg. instr_is_relative_branch().

I'm sorry. I was thinking that you wants to show opcodes with perf
annotate. But you were asking to use opcode instead of parsing
instructions.

Yeah.


This looks like rewrite parsing code. I don't know whether there is any
library already available for this which we can directly use. I'm thinking
about this.

OK don't worry about it for now. We should get this merged for starters
and we can always improve it later.

cheers



___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v3 3/4] perf annotate: add powerpc support

2016-07-04 Thread Ravi Bangoria

Hi Michael,

On Friday 01 July 2016 02:13 PM, Ravi Bangoria wrote:

Thanks Michael for your suggestion.

On Thursday 30 June 2016 11:51 AM, Michael Ellerman wrote:

On Thu, 2016-06-30 at 11:44 +0530, Ravi Bangoria wrote:

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 36a5825..b87eac7 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -476,6 +481,125 @@ static int ins__cmp(const void *a, const void *b)

...

+
+static struct ins *ins__find_powerpc(const char *name)
+{
+int i;
+struct ins *ins;
+struct ins_ops *ops;
+static struct instructions_powerpc head;
+static bool list_initialized;
+
+/*
+ * - Interested only if instruction starts with 'b'.
+ * - Few start with 'b', but aren't branch instructions.
+ * - Let's also ignore instructions involving 'ctr' and
+ *   'tar' since target branch addresses for those can't
+ *   be determined statically.
+ */
+if (name[0] != 'b' ||
+!strncmp(name, "bcd", 3)   ||
+!strncmp(name, "brinc", 5) ||
+!strncmp(name, "bper", 4)  ||
+strstr(name, "ctr")||
+strstr(name, "tar"))
+return NULL;
It would be good if 'bctr' was at least recognised as a branch, even 
if we

can't determine the target. They are very common.


We can not show arrow for this since we don't know the target location.
can you please suggest how you intends perf to display bctr?

bctr can be classified into two variants -- 'bctr' and 'bctrl'.

'bctr' will be considered as jump instruction but jump__parse() won't
be able to find any target location and hence it will set target to
UINT64_MAX which transform 'bctr' to 'bctr UINT64_MAX'. This
looks misleading.

bctrl will be considered as call instruction but call_parse() won't
be able to find any target function and hence it won't show any
navigation arrow for this instruction. Which is same as filter it
beforehand.

It doesn't look like we have the opcode handy here? Could we get it 
somehow?

That would make this a *lot* more robust.


objdump prints machine code, but I don't know how difficult that would
be to parse to get opcode.


Perf uses  --no-show-raw with objdump and hence objdump output does not
show opcodes. So change in current  objdump output may requires changes
in current parsing logic. Additionally I need to change tui as well to show
opcodes. This looks quite more work.

And this patchset is about enabling annotate for cross arch. So if you 
really

need opcode with perf anotate, can we do it separately?

Please let me know your thoughts.

-Ravi



-Ravi


cheers





___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v4 0/3] perf annotate: Enable cross arch annotate

2016-07-07 Thread Ravi Bangoria
Perf can currently only support code navigation (branches and calls) in
annotate when run on the same architecture where perf.data was recorded.
But cross arch annotate is not supported.

This patchset enables cross arch annotate. Currently I've used x86
and arm instructions which are already available and adding support
for powerpc as well. Adding support for other arch will be easy.

I've created this patch on top of acme/perf/core. And tested it with
x86 and powerpc only.

Note for arm:
Few instructions were defined under #if __arm__ which I've used as a
table for arm. But I'm not sure whether instruction defined outside of
that also contains arm instructions. Apart from that, 'call__parse()'
and 'move__parse()' contains #ifdef __arm__ directive. I've changed it
to  if (!strcmp(norm_arch, arm)). I don't have a arm machine to test
these changes.

Example:

  Record on powerpc:
  $ ./perf record -a

  Report -> Annotate on x86:
  $ ./perf report -i perf.data.powerpc --vmlinux vmlinux.powerpc

Changes in v4:
  - powerpc: Added support for branch instructions that includes 'ctr'
  - __maybe_unused was misplaced at few location. Corrected it.
  - Moved position of v3 last patch that define macro for each arch name

v3 link: https://lkml.org/lkml/2016/6/30/99

Naveen N. Rao (1):
  perf annotate: add powerpc support

Ravi Bangoria (2):
  perf: Define macro for normalized arch names
  perf annotate: Enable cross arch annotate

 tools/perf/arch/common.c   |  36 ++---
 tools/perf/arch/common.h   |  11 ++
 tools/perf/builtin-top.c   |   2 +-
 tools/perf/ui/browsers/annotate.c  |   3 +-
 tools/perf/ui/gtk/annotate.c   |   2 +-
 tools/perf/util/annotate.c | 273 ++---
 tools/perf/util/annotate.h |   6 +-
 tools/perf/util/unwind-libunwind.c |   4 +-
 8 files changed, 265 insertions(+), 72 deletions(-)

--
2.5.5

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v4 3/3] perf annotate: add powerpc support

2016-07-07 Thread Ravi Bangoria
From: Naveen N. Rao <naveen.n@linux.vnet.ibm.com>

Powerpc has long list of branch instructions and hardcoding them in
table appears to be error-prone. So, add new function to find
instruction instead of creating table. This function dynamically
create table(list of 'struct ins'), and instead of creating object
every time, first check if list already contain object for that
instruction.

Signed-off-by: Naveen N. Rao <naveen.n@linux.vnet.ibm.com>
Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
---
Chnages in v4:
  - Added support for branch instructions that includes 'ctr'

 tools/perf/util/annotate.c | 155 +++--
 tools/perf/util/annotate.h |   3 +-
 2 files changed, 150 insertions(+), 8 deletions(-)

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 32889ce..9de1271 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -55,10 +55,15 @@ int ins__scnprintf(struct ins *ins, char *bf, size_t size,
return ins__raw_scnprintf(ins, bf, size, ops);
 }
 
-static int call__parse(struct ins_operands *ops, const char *norm_arch)
+static int call__parse(char *ins_name, struct ins_operands *ops,
+  const char *norm_arch)
 {
char *endptr, *tok, *name;
 
+   /* Special case for powerpc */
+   if (!strcmp(norm_arch, NORM_POWERPC) && strstr(ins_name, "ctr"))
+   return 0;
+
ops->target.addr = strtoull(ops->raw, , 16);
 
name = strchr(endptr, '<');
@@ -117,7 +122,7 @@ bool ins__is_call(const struct ins *ins)
return ins->ops == _ops;
 }
 
-static int jump__parse(struct ins_operands *ops,
+static int jump__parse(char *ins_name __maybe_unused, struct ins_operands *ops,
   const char *norm_arch __maybe_unused)
 {
const char *s = strchr(ops->raw, '+');
@@ -135,6 +140,13 @@ static int jump__parse(struct ins_operands *ops,
 static int jump__scnprintf(struct ins *ins, char *bf, size_t size,
   struct ins_operands *ops)
 {
+   /*
+* Instructions that does not include target address in operand
+* like 'bctr' for powerpc.
+*/
+   if (!ops->target.addr)
+   return scnprintf(bf, size, "%-6.6s", ins->name);
+
return scnprintf(bf, size, "%-6.6s %" PRIx64, ins->name, 
ops->target.offset);
 }
 
@@ -173,7 +185,8 @@ static int comment__symbol(char *raw, char *comment, u64 
*addrp, char **namep)
return 0;
 }
 
-static int lock__parse(struct ins_operands *ops, const char *norm_arch)
+static int lock__parse(char *ins_name, struct ins_operands *ops,
+  const char *norm_arch)
 {
char *name;
 
@@ -194,7 +207,8 @@ static int lock__parse(struct ins_operands *ops, const char 
*norm_arch)
return 0;
 
if (ops->locked.ins->ops->parse &&
-   ops->locked.ins->ops->parse(ops->locked.ops, norm_arch) < 0)
+   ops->locked.ins->ops->parse(ins_name,
+   ops->locked.ops, norm_arch) < 0)
goto out_free_ops;
 
return 0;
@@ -237,7 +251,8 @@ static struct ins_ops lock_ops = {
.scnprintf = lock__scnprintf,
 };
 
-static int mov__parse(struct ins_operands *ops, const char *norm_arch)
+static int mov__parse(char *ins_name __maybe_unused, struct ins_operands *ops,
+ const char *norm_arch)
 {
char *s = strchr(ops->raw, ','), *target, *comment, prev;
 
@@ -304,7 +319,7 @@ static struct ins_ops mov_ops = {
.scnprintf = mov__scnprintf,
 };
 
-static int dec__parse(struct ins_operands *ops,
+static int dec__parse(char *ins_name __maybe_unused, struct ins_operands *ops,
  const char *norm_arch __maybe_unused)
 {
char *target, *comment, *s, prev;
@@ -459,6 +474,11 @@ static struct ins instructions_arm[] = {
{ .name = "bne",   .ops  = _ops, },
 };
 
+struct instructions_powerpc {
+   struct ins *ins;
+   struct list_head list;
+};
+
 static int ins__key_cmp(const void *name, const void *insp)
 {
const struct ins *ins = insp;
@@ -474,6 +494,125 @@ static int ins__cmp(const void *a, const void *b)
return strcmp(ia->name, ib->name);
 }
 
+static struct ins *list_add__ins_powerpc(struct instructions_powerpc *head,
+const char *name, struct ins_ops *ops)
+{
+   struct instructions_powerpc *ins_powerpc;
+   struct ins *ins;
+
+   ins = zalloc(sizeof(struct ins));
+   if (!ins)
+   return NULL;
+
+   ins_powerpc = zalloc(sizeof(struct instructions_powerpc));
+   if (!ins_powerpc)
+   goto out_free_ins;
+
+   ins->name = strdup(name);
+   if (!ins->name)
+   goto out_free_ins_power;
+
+   ins->ops = ops;
+

[PATCH v4 2/3] perf annotate: Enable cross arch annotate

2016-07-07 Thread Ravi Bangoria
Change current data structures and function to enable cross arch
annotate.

Current implementation does not contain logic of record on one arch
and annotating on other. This remote annotate is partially possible
with current implementation for x86 (or may be arm as well) only.
But, to make remote annotation work properly, all architecture
instruction tables need to be included in the perf binary. And while
annotating, look for instruction table where perf.data was recorded.

Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
---
Changes in v4:
  - __maybe_unused was misplaced at few location. Corrected it

 tools/perf/builtin-top.c  |   2 +-
 tools/perf/ui/browsers/annotate.c |   3 +-
 tools/perf/ui/gtk/annotate.c  |   2 +-
 tools/perf/util/annotate.c| 134 --
 tools/perf/util/annotate.h|   5 +-
 5 files changed, 93 insertions(+), 53 deletions(-)

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 07fc792..d4fd947 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -128,7 +128,7 @@ static int perf_top__parse_source(struct perf_top *top, 
struct hist_entry *he)
return err;
}
 
-   err = symbol__annotate(sym, map, 0);
+   err = symbol__annotate(sym, map, 0, NULL);
if (err == 0) {
 out_assign:
top->sym_filter_entry = he;
diff --git a/tools/perf/ui/browsers/annotate.c 
b/tools/perf/ui/browsers/annotate.c
index 29dc6d2..3a652a6f 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -1050,7 +1050,8 @@ int symbol__tui_annotate(struct symbol *sym, struct map 
*map,
  (nr_pcnt - 1);
}
 
-   if (symbol__annotate(sym, map, sizeof_bdl) < 0) {
+   if (symbol__annotate(sym, map, sizeof_bdl,
+perf_evsel__env_arch(evsel)) < 0) {
ui__error("%s", ui_helpline__last_msg);
goto out_free_offsets;
}
diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c
index 9c7ff8d..d7150b3 100644
--- a/tools/perf/ui/gtk/annotate.c
+++ b/tools/perf/ui/gtk/annotate.c
@@ -166,7 +166,7 @@ static int symbol__gtk_annotate(struct symbol *sym, struct 
map *map,
if (map->dso->annotate_warned)
return -1;
 
-   if (symbol__annotate(sym, map, 0) < 0) {
+   if (symbol__annotate(sym, map, 0, perf_evsel__env_arch(evsel)) < 0) {
ui__error("%s", ui_helpline__current);
return -1;
}
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index e9825fe..32889ce 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -20,12 +20,14 @@
 #include 
 #include 
 #include 
+#include 
+#include "../arch/common.h"
 
 const char *disassembler_style;
 const char *objdump_path;
 static regex_t  file_lineno;
 
-static struct ins *ins__find(const char *name);
+static struct ins *ins__find(const char *name, const char *norm_arch);
 static int disasm_line__parse(char *line, char **namep, char **rawp);
 
 static void ins__delete(struct ins_operands *ops)
@@ -53,7 +55,7 @@ int ins__scnprintf(struct ins *ins, char *bf, size_t size,
return ins__raw_scnprintf(ins, bf, size, ops);
 }
 
-static int call__parse(struct ins_operands *ops)
+static int call__parse(struct ins_operands *ops, const char *norm_arch)
 {
char *endptr, *tok, *name;
 
@@ -65,10 +67,8 @@ static int call__parse(struct ins_operands *ops)
 
name++;
 
-#ifdef __arm__
-   if (strchr(name, '+'))
+   if (!strcmp(norm_arch, NORM_ARM) && strchr(name, '+'))
return -1;
-#endif
 
tok = strchr(name, '>');
if (tok == NULL)
@@ -117,7 +117,8 @@ bool ins__is_call(const struct ins *ins)
return ins->ops == _ops;
 }
 
-static int jump__parse(struct ins_operands *ops)
+static int jump__parse(struct ins_operands *ops,
+  const char *norm_arch __maybe_unused)
 {
const char *s = strchr(ops->raw, '+');
 
@@ -172,7 +173,7 @@ static int comment__symbol(char *raw, char *comment, u64 
*addrp, char **namep)
return 0;
 }
 
-static int lock__parse(struct ins_operands *ops)
+static int lock__parse(struct ins_operands *ops, const char *norm_arch)
 {
char *name;
 
@@ -183,7 +184,7 @@ static int lock__parse(struct ins_operands *ops)
if (disasm_line__parse(ops->raw, , >locked.ops->raw) < 0)
goto out_free_ops;
 
-   ops->locked.ins = ins__find(name);
+   ops->locked.ins = ins__find(name, norm_arch);
free(name);
 
if (ops->locked.ins == NULL)
@@ -193,7 +194,7 @@ static int lock__parse(struct ins_operands *ops)
return 0;
 
if (ops->locked.ins->ops->parse &&
-   ops->locked.ins->ops->parse(ops->

Re: [PATCH v3 3/4] perf annotate: add powerpc support

2016-07-07 Thread Ravi Bangoria

Hi Michael,

On Wednesday 06 July 2016 03:38 PM, Michael Ellerman wrote:

Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com> writes:


On Thursday 30 June 2016 11:51 AM, Michael Ellerman wrote:

On Thu, 2016-06-30 at 11:44 +0530, Ravi Bangoria wrote:

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 36a5825..b87eac7 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -476,6 +481,125 @@ static int ins__cmp(const void *a, const void *b)

...

+
+static struct ins *ins__find_powerpc(const char *name)
+{
+   int i;
+   struct ins *ins;
+   struct ins_ops *ops;
+   static struct instructions_powerpc head;
+   static bool list_initialized;
+
+   /*
+* - Interested only if instruction starts with 'b'.
+* - Few start with 'b', but aren't branch instructions.
+* - Let's also ignore instructions involving 'ctr' and
+*   'tar' since target branch addresses for those can't
+*   be determined statically.
+*/
+   if (name[0] != 'b' ||
+   !strncmp(name, "bcd", 3)   ||
+   !strncmp(name, "brinc", 5) ||
+   !strncmp(name, "bper", 4)  ||
+   strstr(name, "ctr")||
+   strstr(name, "tar"))
+   return NULL;

It would be good if 'bctr' was at least recognised as a branch, even if we
can't determine the target. They are very common.

We can not show arrow for this since we don't know the target location.
can you please suggest how you intends perf to display bctr?

Yeah I understand you can't show an arrow.

I guess it could just be an unterminated arrow? But I'm not sure if
that's easy to do with the way the UI is constructed. eg. something
like:

 ld  r12,0(r12)
 mtctr   r12
 bctrl  -->
 ld  r3,-32704(r2)

But that's just an idea.


I've sent v4 which enables annotate for bctr' instructions.

for 'bctr', it will show down arrow(indicate jump) and 'bctrl' will show
right arrow(indicate call). But no navigation options will be provided.
By pressing Enter key on that, message will be shown that like
"Invalid target"

Please review it.


bctr can be classified into two variants -- 'bctr' and 'bctrl'.

'bctr' will be considered as jump instruction but jump__parse() won't
be able to find any target location and hence it will set target to
UINT64_MAX which transform 'bctr' to 'bctr UINT64_MAX'. This
looks misleading.

Agreed.


bctrl will be considered as call instruction but call_parse() won't
be able to find any target function and hence it won't show any
navigation arrow for this instruction. Which is same as filter it
beforehand.

OK.

Maybe what I'm asking for is an enhancement and can be done later.


It doesn't look like we have the opcode handy here? Could we get it somehow?
That would make this a *lot* more robust.

objdump prints machine code, but I don't know how difficult that would
be to parse to get opcode.

Normal objdump -d output includes the opcode, eg:

c000886c:   2c 2c 00 00 cmpdi   r12,0
 ^^^

The only thing you need to know is the endian and you can reconstruct
the raw instruction.

Then you can just decode the opcode, see how we do it in the kernel with
eg. instr_is_relative_branch().


I'm sorry. I was thinking that you wants to show opcodes with perf
annotate. But you were asking to use opcode instead of parsing
instructions.

This looks like rewrite parsing code. I don't know whether there is any
library already available for this which we can directly use. I'm thinking
about this.

- Ravi


cheers



___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v4 1/3] perf: Define macro for normalized arch names

2016-07-07 Thread Ravi Bangoria
Define macro for each normalized arch name and use them instead
of using arch name as string

Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
---
Changes in v4:
  - Moved position of patch

 tools/perf/arch/common.c   | 36 ++--
 tools/perf/arch/common.h   | 11 +++
 tools/perf/util/unwind-libunwind.c |  4 ++--
 3 files changed, 31 insertions(+), 20 deletions(-)

diff --git a/tools/perf/arch/common.c b/tools/perf/arch/common.c
index ee69668..feb2113 100644
--- a/tools/perf/arch/common.c
+++ b/tools/perf/arch/common.c
@@ -122,25 +122,25 @@ static int lookup_triplets(const char *const *triplets, 
const char *name)
 const char *normalize_arch(char *arch)
 {
if (!strcmp(arch, "x86_64"))
-   return "x86";
+   return NORM_X86;
if (arch[0] == 'i' && arch[2] == '8' && arch[3] == '6')
-   return "x86";
+   return NORM_X86;
if (!strcmp(arch, "sun4u") || !strncmp(arch, "sparc", 5))
-   return "sparc";
+   return NORM_SPARC;
if (!strcmp(arch, "aarch64") || !strcmp(arch, "arm64"))
-   return "arm64";
+   return NORM_ARM64;
if (!strncmp(arch, "arm", 3) || !strcmp(arch, "sa110"))
-   return "arm";
+   return NORM_ARM;
if (!strncmp(arch, "s390", 4))
-   return "s390";
+   return NORM_S390;
if (!strncmp(arch, "parisc", 6))
-   return "parisc";
+   return NORM_PARISC;
if (!strncmp(arch, "powerpc", 7) || !strncmp(arch, "ppc", 3))
-   return "powerpc";
+   return NORM_POWERPC;
if (!strncmp(arch, "mips", 4))
-   return "mips";
+   return NORM_MIPS;
if (!strncmp(arch, "sh", 2) && isdigit(arch[2]))
-   return "sh";
+   return NORM_SH;
 
return arch;
 }
@@ -180,21 +180,21 @@ static int perf_env__lookup_binutils_path(struct perf_env 
*env,
zfree();
}
 
-   if (!strcmp(arch, "arm"))
+   if (!strcmp(arch, NORM_ARM))
path_list = arm_triplets;
-   else if (!strcmp(arch, "arm64"))
+   else if (!strcmp(arch, NORM_ARM64))
path_list = arm64_triplets;
-   else if (!strcmp(arch, "powerpc"))
+   else if (!strcmp(arch, NORM_POWERPC))
path_list = powerpc_triplets;
-   else if (!strcmp(arch, "sh"))
+   else if (!strcmp(arch, NORM_SH))
path_list = sh_triplets;
-   else if (!strcmp(arch, "s390"))
+   else if (!strcmp(arch, NORM_S390))
path_list = s390_triplets;
-   else if (!strcmp(arch, "sparc"))
+   else if (!strcmp(arch, NORM_SPARC))
path_list = sparc_triplets;
-   else if (!strcmp(arch, "x86"))
+   else if (!strcmp(arch, NORM_X86))
path_list = x86_triplets;
-   else if (!strcmp(arch, "mips"))
+   else if (!strcmp(arch, NORM_MIPS))
path_list = mips_triplets;
else {
ui__error("binutils for %s not supported.\n", arch);
diff --git a/tools/perf/arch/common.h b/tools/perf/arch/common.h
index 6b01c73..14ca8ca 100644
--- a/tools/perf/arch/common.h
+++ b/tools/perf/arch/common.h
@@ -5,6 +5,17 @@
 
 extern const char *objdump_path;
 
+/* Macro for normalized arch names */
+#define NORM_X86   "x86"
+#define NORM_SPARC "sparc"
+#define NORM_ARM64 "arm64"
+#define NORM_ARM   "arm"
+#define NORM_S390  "s390"
+#define NORM_PARISC"parisc"
+#define NORM_POWERPC   "powerpc"
+#define NORM_MIPS  "mips"
+#define NORM_SH"sh"
+
 int perf_env__lookup_objdump(struct perf_env *env);
 const char *normalize_arch(char *arch);
 
diff --git a/tools/perf/util/unwind-libunwind.c 
b/tools/perf/util/unwind-libunwind.c
index 6d542a4..6199102 100644
--- a/tools/perf/util/unwind-libunwind.c
+++ b/tools/perf/util/unwind-libunwind.c
@@ -40,10 +40,10 @@ int unwind__prepare_access(struct thread *thread, struct 
map *map,
 
arch = normalize_arch(thread->mg->machine->env->arch);
 
-   if (!strcmp(arch, "x86")) {
+   if (!strcmp(arch, NORM_X86)) {
if (dso_type != DSO__TYPE_64BIT)
ops = x86_32_unwind_libunwind_ops;
-   } else if (!strcmp(arch, "arm64") || !strcmp(arch, "arm")) {
+   } else if (!strcmp(arch, NORM_ARM64) || !strcmp(arch, NORM_ARM)) {
if (dso_type == DSO__TYPE_64BIT)
ops = arm64_unwind_libunwind_ops;
}
-- 
2.5.5

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v3 3/4] perf annotate: add powerpc support

2016-07-01 Thread Ravi Bangoria

Thanks Michael for your suggestion.

On Thursday 30 June 2016 11:51 AM, Michael Ellerman wrote:

On Thu, 2016-06-30 at 11:44 +0530, Ravi Bangoria wrote:

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 36a5825..b87eac7 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -476,6 +481,125 @@ static int ins__cmp(const void *a, const void *b)

...

+
+static struct ins *ins__find_powerpc(const char *name)
+{
+   int i;
+   struct ins *ins;
+   struct ins_ops *ops;
+   static struct instructions_powerpc head;
+   static bool list_initialized;
+
+   /*
+* - Interested only if instruction starts with 'b'.
+* - Few start with 'b', but aren't branch instructions.
+* - Let's also ignore instructions involving 'ctr' and
+*   'tar' since target branch addresses for those can't
+*   be determined statically.
+*/
+   if (name[0] != 'b' ||
+   !strncmp(name, "bcd", 3)   ||
+   !strncmp(name, "brinc", 5) ||
+   !strncmp(name, "bper", 4)  ||
+   strstr(name, "ctr")||
+   strstr(name, "tar"))
+   return NULL;

It would be good if 'bctr' was at least recognised as a branch, even if we
can't determine the target. They are very common.


We can not show arrow for this since we don't know the target location.
can you please suggest how you intends perf to display bctr?

bctr can be classified into two variants -- 'bctr' and 'bctrl'.

'bctr' will be considered as jump instruction but jump__parse() won't
be able to find any target location and hence it will set target to
UINT64_MAX which transform 'bctr' to 'bctr UINT64_MAX'. This
looks misleading.

bctrl will be considered as call instruction but call_parse() won't
be able to find any target function and hence it won't show any
navigation arrow for this instruction. Which is same as filter it
beforehand.


It doesn't look like we have the opcode handy here? Could we get it somehow?
That would make this a *lot* more robust.


objdump prints machine code, but I don't know how difficult that would
be to parse to get opcode.

-Ravi


cheers



___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v3 3/4] perf annotate: add powerpc support

2016-07-01 Thread Ravi Bangoria

Hi Balbir,

On Friday 01 July 2016 06:18 PM, Balbir Singh wrote:

On Fri, 2016-07-01 at 14:13 +0530, Ravi Bangoria wrote:

Thanks Michael for your suggestion.
  
On Thursday 30 June 2016 11:51 AM, Michael Ellerman wrote:
  
On Thu, 2016-06-30 at 11:44 +0530, Ravi Bangoria wrote:
  
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c

index 36a5825..b87eac7 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -476,6 +481,125 @@ static int ins__cmp(const void *a, const void *b)

...
  
+

+static struct ins *ins__find_powerpc(const char *name)
+{
+   int i;
+   struct ins *ins;
+   struct ins_ops *ops;
+   static struct instructions_powerpc head;
+   static bool list_initialized;
+
+   /*
+* - Interested only if instruction starts with 'b'.
+* - Few start with 'b', but aren't branch instructions.
+* - Let's also ignore instructions involving 'ctr' and
+*   'tar' since target branch addresses for those can't
+*   be determined statically.
+*/
+   if (name[0] != 'b' ||
+   !strncmp(name, "bcd", 3)   ||
+   !strncmp(name, "brinc", 5) ||
+   !strncmp(name, "bper", 4)  ||
+   strstr(name, "ctr")||
+   strstr(name, "tar"))
+   return NULL;

It would be good if 'bctr' was at least recognised as a branch, even if we
can't determine the target. They are very common.

We can not show arrow for this since we don't know the target location.
can you please suggest how you intends perf to display bctr?
  
bctr can be classified into two variants -- 'bctr' and 'bctrl'.
  
'bctr' will be considered as jump instruction but jump__parse() won't

be able to find any target location and hence it will set target to
UINT64_MAX which transform 'bctr' to 'bctr UINT64_MAX'. This
looks misleading.
  
bctrl will be considered as call instruction but call_parse() won't

be able to find any target function and hence it won't show any
navigation arrow for this instruction. Which is same as filter it
beforehand.


The target location and function are in the counter. Can't we add
this to instruction ops? Is it a major change to add it?


Of course we can add it.

What I mean is we can not determine target location statically by parsing
objdump output. For example, consider snippet:

objdump output:

  c0143848:   lwarx   r8,0,r10
  c014384c:   addic   r8,r8,1
  c0143850:   stwcx.  r8,0,r10
  c0143854:   bne-c0143848 <.rcu_idle_exit+0x58>

corresponding perf annotate output:

  58:  lwarx  r8,0,r10
 addic  r8,r8,1
 stwcx. r8,0,r10
 bne-   58

tui will show up arrow before 'bne- 58' instruction, that indicate it as
a jump instruction. When we focus on 'bne- 58' instruction, arrow will
span from that instruction to instruction with 58th offset( lwarx ).
By pressing Enter, it will jump focus to the target.

In case of 'bctr', we can not determine target location statically
and hence we can not provide any navigation options. Same for
'bctrl' as well.

Please correct me if I misunderstood anything.

-Ravi

  
Balbir Singh.




___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v2 3/4] perf annotate: add powerpc support

2016-06-29 Thread Ravi Bangoria

Thanks Naveen,

On Wednesday 29 June 2016 08:15 PM, Naveen N. Rao wrote:

On 2016/06/29 04:45PM, Ravi Bangoria wrote:

From: Naveen N. Rao <naveen.n@linux.vnet.ibm.com>

Powerpc has long list of branch instructions and hardcoding them in
table appears to be error-prone. So, add new function to find
instruction instead of creating table. This function dynamically
create table(list of 'struct ins'), and instead of creating object
every time, first check if list already contain object for that
nemonics.

Signed-off-by: Naveen N. Rao <naveen.n@linux.vnet.ibm.com>
Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
---
Changes in v2:
   - Corrected few memory leaks.
   - Created Dynamic list for powerpc to optimize memory consumption

  tools/perf/util/annotate.c | 121 +
  1 file changed, 121 insertions(+)

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 36a5825..812bfad 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -461,6 +461,11 @@ static struct ins instructions_arm[] = {
{ .name = "bne",   .ops  = _ops, },
  };

+struct instructions_powerpc {
+   struct ins *ins;
+   struct list_head list;
+};
+
  static int ins__key_cmp(const void *name, const void *insp)
  {
const struct ins *ins = insp;
@@ -476,6 +481,120 @@ static int ins__cmp(const void *a, const void *b)
return strcmp(ia->name, ib->name);
  }

+static int list_add__ins_powerpc(struct instructions_powerpc *head,
+struct ins *ins)
+{
+   struct instructions_powerpc *ins_powerpc;
+
+   ins_powerpc = zalloc(sizeof(struct instructions_powerpc));
+   if (!ins_powerpc)
+   return -1;
+
+   ins_powerpc->ins = ins;
+   list_add_tail(&(ins_powerpc->list), &(head->list));
+
+   return 0;
+}
+
+static struct ins *list_search__ins_powerpc(struct instructions_powerpc *head,
+   const char *name)
+{
+   struct instructions_powerpc *pos;
+
+   list_for_each_entry(pos, >list, list) {
+   if (!strcmp(pos->ins->name, name))
+   return pos->ins;
+   }
+   return NULL;
+}
+
+static struct ins *ins__find_powerpc(const char *name)
+{
+   int i;
+   struct ins *ins;
+   static struct instructions_powerpc head;
+   static bool list_initialized;
+
+   if (!list_initialized) {
+   INIT_LIST_HEAD();
+   list_initialized = true;
+   }
+
+   /*
+* Search if we already created object of 'struct ins'
+* for this instruction
+*/
+   ins = list_search__ins_powerpc(, name);
+   if (ins)
+   return ins;
+
+   ins = zalloc(sizeof(struct ins));
+   if (!ins)
+   return NULL;
+
+   ins->name = strdup(name);
+   if (!ins->name)
+   goto err;

You can move the above two inside the below if condition, so that you
only allocate memory if needed.

Or, what would be better would be to pass 'name' and the appropriate ops
pointer to the helper above (list_add__ins_powerpc) and have that
allocate 'struct ins' and insert into the list.


Yes I will think about this.


+
+   if (name[0] == 'b') {
+   /* branch instructions */
+   ins->ops = _ops;
+
+   /*
+* - Few start with 'b', but aren't branch instructions.
+* - Let's also ignore instructions involving 'ctr' and
+*   'tar' since target branch addresses for those can't
+*   be determined statically.
+*/
+   if (!strncmp(name, "bcd", 3)   ||
+   !strncmp(name, "brinc", 5) ||
+   !strncmp(name, "bper", 4)  ||
+   strstr(name, "ctr")||
+   strstr(name, "tar"))
+   goto err;

You are still leaking ins->name here.


Ah!! Sorry. I missed that we are using strdup here. Will correct it.

-Ravi

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 3/4] perf annotate: add powerpc support

2016-06-29 Thread Ravi Bangoria
From: Naveen N. Rao <naveen.n@linux.vnet.ibm.com>

Powerpc has long list of branch instructions and hardcoding them in
table appears to be error-prone. So, add new function to find
instruction instead of creating table. This function dynamically
create table(list of 'struct ins'), and instead of creating object
every time, first check if list already contain object for that
nemonics.

Signed-off-by: Naveen N. Rao <naveen.n@linux.vnet.ibm.com>
Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
---
Changes in v2:
  - Corrected few memory leaks.
  - Created Dynamic list for powerpc to optimize memory consumption

 tools/perf/util/annotate.c | 121 +
 1 file changed, 121 insertions(+)

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 36a5825..812bfad 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -461,6 +461,11 @@ static struct ins instructions_arm[] = {
{ .name = "bne",   .ops  = _ops, },
 };
 
+struct instructions_powerpc {
+   struct ins *ins;
+   struct list_head list;
+};
+
 static int ins__key_cmp(const void *name, const void *insp)
 {
const struct ins *ins = insp;
@@ -476,6 +481,120 @@ static int ins__cmp(const void *a, const void *b)
return strcmp(ia->name, ib->name);
 }
 
+static int list_add__ins_powerpc(struct instructions_powerpc *head,
+struct ins *ins)
+{
+   struct instructions_powerpc *ins_powerpc;
+
+   ins_powerpc = zalloc(sizeof(struct instructions_powerpc));
+   if (!ins_powerpc)
+   return -1;
+
+   ins_powerpc->ins = ins;
+   list_add_tail(&(ins_powerpc->list), &(head->list));
+
+   return 0;
+}
+
+static struct ins *list_search__ins_powerpc(struct instructions_powerpc *head,
+   const char *name)
+{
+   struct instructions_powerpc *pos;
+
+   list_for_each_entry(pos, >list, list) {
+   if (!strcmp(pos->ins->name, name))
+   return pos->ins;
+   }
+   return NULL;
+}
+
+static struct ins *ins__find_powerpc(const char *name)
+{
+   int i;
+   struct ins *ins;
+   static struct instructions_powerpc head;
+   static bool list_initialized;
+
+   if (!list_initialized) {
+   INIT_LIST_HEAD();
+   list_initialized = true;
+   }
+
+   /*
+* Search if we already created object of 'struct ins'
+* for this instruction
+*/
+   ins = list_search__ins_powerpc(, name);
+   if (ins)
+   return ins;
+
+   ins = zalloc(sizeof(struct ins));
+   if (!ins)
+   return NULL;
+
+   ins->name = strdup(name);
+   if (!ins->name)
+   goto err;
+
+   if (name[0] == 'b') {
+   /* branch instructions */
+   ins->ops = _ops;
+
+   /*
+* - Few start with 'b', but aren't branch instructions.
+* - Let's also ignore instructions involving 'ctr' and
+*   'tar' since target branch addresses for those can't
+*   be determined statically.
+*/
+   if (!strncmp(name, "bcd", 3)   ||
+   !strncmp(name, "brinc", 5) ||
+   !strncmp(name, "bper", 4)  ||
+   strstr(name, "ctr")||
+   strstr(name, "tar"))
+   goto err;
+
+   i = strlen(name) - 1;
+   if (i < 0)
+   goto err;
+
+   /* ignore optional hints at the end of the instructions */
+   if (name[i] == '+' || name[i] == '-')
+   i--;
+
+   if (name[i] == 'l' || (name[i] == 'a' && name[i-1] == 'l')) {
+   /*
+* if the instruction ends up with 'l' or 'la', then
+* those are considered 'calls' since they update LR.
+* ... except for 'bnl' which is branch if not less than
+* and the absolute form of the same.
+*/
+   if (strcmp(name, "bnl") && strcmp(name, "bnl+") &&
+   strcmp(name, "bnl-") && strcmp(name, "bnla") &&
+   strcmp(name, "bnla+") && strcmp(name, "bnla-"))
+   ins->ops = _ops;
+   }
+   if (name[i] == 'r' && name[i-1] == 'l')
+   /*
+* instructions ending with 'lr' are considered to be
+* return instructions
+*/

[PATCH v2 4/4] perf annotate: Define macro for arch names

2016-06-29 Thread Ravi Bangoria
Define macro for each arch name and use them instead of using arch
name as string.

Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
---
Changes in v2:
  - No changes

 tools/perf/arch/common.c   | 36 ++--
 tools/perf/arch/common.h   | 11 +++
 tools/perf/util/annotate.c | 10 +-
 tools/perf/util/unwind-libunwind.c |  4 ++--
 4 files changed, 36 insertions(+), 25 deletions(-)

diff --git a/tools/perf/arch/common.c b/tools/perf/arch/common.c
index ee69668..feb2113 100644
--- a/tools/perf/arch/common.c
+++ b/tools/perf/arch/common.c
@@ -122,25 +122,25 @@ static int lookup_triplets(const char *const *triplets, 
const char *name)
 const char *normalize_arch(char *arch)
 {
if (!strcmp(arch, "x86_64"))
-   return "x86";
+   return NORM_X86;
if (arch[0] == 'i' && arch[2] == '8' && arch[3] == '6')
-   return "x86";
+   return NORM_X86;
if (!strcmp(arch, "sun4u") || !strncmp(arch, "sparc", 5))
-   return "sparc";
+   return NORM_SPARC;
if (!strcmp(arch, "aarch64") || !strcmp(arch, "arm64"))
-   return "arm64";
+   return NORM_ARM64;
if (!strncmp(arch, "arm", 3) || !strcmp(arch, "sa110"))
-   return "arm";
+   return NORM_ARM;
if (!strncmp(arch, "s390", 4))
-   return "s390";
+   return NORM_S390;
if (!strncmp(arch, "parisc", 6))
-   return "parisc";
+   return NORM_PARISC;
if (!strncmp(arch, "powerpc", 7) || !strncmp(arch, "ppc", 3))
-   return "powerpc";
+   return NORM_POWERPC;
if (!strncmp(arch, "mips", 4))
-   return "mips";
+   return NORM_MIPS;
if (!strncmp(arch, "sh", 2) && isdigit(arch[2]))
-   return "sh";
+   return NORM_SH;
 
return arch;
 }
@@ -180,21 +180,21 @@ static int perf_env__lookup_binutils_path(struct perf_env 
*env,
zfree();
}
 
-   if (!strcmp(arch, "arm"))
+   if (!strcmp(arch, NORM_ARM))
path_list = arm_triplets;
-   else if (!strcmp(arch, "arm64"))
+   else if (!strcmp(arch, NORM_ARM64))
path_list = arm64_triplets;
-   else if (!strcmp(arch, "powerpc"))
+   else if (!strcmp(arch, NORM_POWERPC))
path_list = powerpc_triplets;
-   else if (!strcmp(arch, "sh"))
+   else if (!strcmp(arch, NORM_SH))
path_list = sh_triplets;
-   else if (!strcmp(arch, "s390"))
+   else if (!strcmp(arch, NORM_S390))
path_list = s390_triplets;
-   else if (!strcmp(arch, "sparc"))
+   else if (!strcmp(arch, NORM_SPARC))
path_list = sparc_triplets;
-   else if (!strcmp(arch, "x86"))
+   else if (!strcmp(arch, NORM_X86))
path_list = x86_triplets;
-   else if (!strcmp(arch, "mips"))
+   else if (!strcmp(arch, NORM_MIPS))
path_list = mips_triplets;
else {
ui__error("binutils for %s not supported.\n", arch);
diff --git a/tools/perf/arch/common.h b/tools/perf/arch/common.h
index 6b01c73..14ca8ca 100644
--- a/tools/perf/arch/common.h
+++ b/tools/perf/arch/common.h
@@ -5,6 +5,17 @@
 
 extern const char *objdump_path;
 
+/* Macro for normalized arch names */
+#define NORM_X86   "x86"
+#define NORM_SPARC "sparc"
+#define NORM_ARM64 "arm64"
+#define NORM_ARM   "arm"
+#define NORM_S390  "s390"
+#define NORM_PARISC"parisc"
+#define NORM_POWERPC   "powerpc"
+#define NORM_MIPS  "mips"
+#define NORM_SH"sh"
+
 int perf_env__lookup_objdump(struct perf_env *env);
 const char *normalize_arch(char *arch);
 
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 812bfad..8c27486 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -68,7 +68,7 @@ static int call__parse(struct ins_operands *ops,
 
name++;
 
-   if (!strcmp(norm_arch, "arm") && strchr(name, '+'))
+   if (!strcmp(norm_arch, NORM_ARM) && strchr(name, '+'))
return -1;
 
tok = strchr(name, '>');
@@ -255,7 +255,7 @@ static int mov__parse(struct ins_operands *ops,
 
target = ++s;
 
-   if (!strcmp(norm_arch, "arm"))
+   if (!strcmp(norm_arch, NORM_ARM))
comment = strchr(s, ';');
else
  

[PATCH v3 0/4] perf annotate: Enable cross arch annotate

2016-06-30 Thread Ravi Bangoria
Perf can currently only support code navigation (branches and calls) in
annotate when run on the same architecture where perf.data was recorded.
But cross arch annotate is not supported.

This patchset enables cross arch annotate. Currently I've used x86
and arm instructions which are already available and adding support
for powerpc as well. Adding support for other arch will be easy.

I've created this patch on top of acme/perf/core. And tested it with
x86 and powerpc only.

Example:

  Record on powerpc:
  $ ./perf record -a

  Report -> Annotate on x86:
  $ ./perf report -i perf.data.powerpc --vmlinux vmlinux.powerpc

Changes in v3:
  - Optimized patch that enables annotate on powerpc
  - Corrected one memory leak

v2 link: https://lkml.org/lkml/2016/6/29/278

Naveen N. Rao (1):
  perf annotate: add powerpc support

Ravi Bangoria (4):
  perf: Utility function to fetch arch
  perf annotate: Enable cross arch annotate
  perf: Define macro for normalized arch names

 tools/perf/arch/common.c   |  36 ++---
 tools/perf/arch/common.h   |  11 ++
 tools/perf/builtin-top.c   |   2 +-
 tools/perf/ui/browsers/annotate.c  |   3 +-
 tools/perf/ui/gtk/annotate.c   |   2 +-
 tools/perf/util/annotate.c | 260 ++---
 tools/perf/util/annotate.h |   5 +-
 tools/perf/util/evsel.c|   7 +
 tools/perf/util/evsel.h|   2 +
 tools/perf/util/unwind-libunwind.c |   4 +-
 10 files changed, 260 insertions(+), 72 deletions(-)

--
2.5.5

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 1/4] perf: Utility function to fetch arch

2016-06-30 Thread Ravi Bangoria
Add Utility function to fetch arch using evsel. (evsel->env->arch)

Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
---
Change in v3:
  - No changes

 tools/perf/util/evsel.c | 7 +++
 tools/perf/util/evsel.h | 2 ++
 2 files changed, 9 insertions(+)

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 1d8f2bb..0fea724 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -2422,3 +2422,10 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, 
struct target *target,
 err, strerror_r(err, sbuf, sizeof(sbuf)),
 perf_evsel__name(evsel));
 }
+
+char *perf_evsel__env_arch(struct perf_evsel *evsel)
+{
+   if (evsel && evsel->evlist && evsel->evlist->env)
+   return evsel->evlist->env->arch;
+   return NULL;
+}
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 828ddd1..86fed7a 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -435,4 +435,6 @@ typedef int (*attr__fprintf_f)(FILE *, const char *, const 
char *, void *);
 int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
 attr__fprintf_f attr__fprintf, void *priv);
 
+char *perf_evsel__env_arch(struct perf_evsel *evsel);
+
 #endif /* __PERF_EVSEL_H */
-- 
2.5.5

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 2/4] perf annotate: Enable cross arch annotate

2016-06-30 Thread Ravi Bangoria
Change current data structures and function to enable cross arch
annotate.

Current implementation does not contain logic of record on one arch
and annotating on other. This remote annotate is partially possible
with current implementation for x86 (or may be arm as well) only.
But, to make remote annotation work properly, all architecture
instruction tables need to be included in the perf binary. And while
annotating, look for instruction table where perf.data was recorded.

For arm, few instructions were defined under #if __arm__ which I've
used as a table for arm. But I'm not sure whether instruction defined
outside of that also contains arm instructions. Apart from that,
'call__parse()' and 'move__parse()' contains #ifdef __arm__ directive.
I've changed it to  if (!strcmp(norm_arch, arm)). But I've not
tested this as well.

Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
---
Changes in v3:
  - No changes

 tools/perf/builtin-top.c  |   2 +-
 tools/perf/ui/browsers/annotate.c |   3 +-
 tools/perf/ui/gtk/annotate.c  |   2 +-
 tools/perf/util/annotate.c| 136 --
 tools/perf/util/annotate.h|   5 +-
 5 files changed, 95 insertions(+), 53 deletions(-)

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 07fc792..d4fd947 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -128,7 +128,7 @@ static int perf_top__parse_source(struct perf_top *top, 
struct hist_entry *he)
return err;
}
 
-   err = symbol__annotate(sym, map, 0);
+   err = symbol__annotate(sym, map, 0, NULL);
if (err == 0) {
 out_assign:
top->sym_filter_entry = he;
diff --git a/tools/perf/ui/browsers/annotate.c 
b/tools/perf/ui/browsers/annotate.c
index 29dc6d2..3a652a6f 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -1050,7 +1050,8 @@ int symbol__tui_annotate(struct symbol *sym, struct map 
*map,
  (nr_pcnt - 1);
}
 
-   if (symbol__annotate(sym, map, sizeof_bdl) < 0) {
+   if (symbol__annotate(sym, map, sizeof_bdl,
+perf_evsel__env_arch(evsel)) < 0) {
ui__error("%s", ui_helpline__last_msg);
goto out_free_offsets;
}
diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c
index 9c7ff8d..d7150b3 100644
--- a/tools/perf/ui/gtk/annotate.c
+++ b/tools/perf/ui/gtk/annotate.c
@@ -166,7 +166,7 @@ static int symbol__gtk_annotate(struct symbol *sym, struct 
map *map,
if (map->dso->annotate_warned)
return -1;
 
-   if (symbol__annotate(sym, map, 0) < 0) {
+   if (symbol__annotate(sym, map, 0, perf_evsel__env_arch(evsel)) < 0) {
ui__error("%s", ui_helpline__current);
return -1;
}
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index c385fec..36a5825 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -20,12 +20,14 @@
 #include 
 #include 
 #include 
+#include 
+#include "../arch/common.h"
 
 const char *disassembler_style;
 const char *objdump_path;
 static regex_t  file_lineno;
 
-static struct ins *ins__find(const char *name);
+static struct ins *ins__find(const char *name, const char *norm_arch);
 static int disasm_line__parse(char *line, char **namep, char **rawp);
 
 static void ins__delete(struct ins_operands *ops)
@@ -53,7 +55,8 @@ int ins__scnprintf(struct ins *ins, char *bf, size_t size,
return ins__raw_scnprintf(ins, bf, size, ops);
 }
 
-static int call__parse(struct ins_operands *ops)
+static int call__parse(struct ins_operands *ops,
+  __maybe_unused const char *norm_arch)
 {
char *endptr, *tok, *name;
 
@@ -65,10 +68,8 @@ static int call__parse(struct ins_operands *ops)
 
name++;
 
-#ifdef __arm__
-   if (strchr(name, '+'))
+   if (!strcmp(norm_arch, "arm") && strchr(name, '+'))
return -1;
-#endif
 
tok = strchr(name, '>');
if (tok == NULL)
@@ -117,7 +118,8 @@ bool ins__is_call(const struct ins *ins)
return ins->ops == _ops;
 }
 
-static int jump__parse(struct ins_operands *ops)
+static int jump__parse(struct ins_operands *ops,
+  __maybe_unused const char *norm_arch)
 {
const char *s = strchr(ops->raw, '+');
 
@@ -172,7 +174,7 @@ static int comment__symbol(char *raw, char *comment, u64 
*addrp, char **namep)
return 0;
 }
 
-static int lock__parse(struct ins_operands *ops)
+static int lock__parse(struct ins_operands *ops, const char *norm_arch)
 {
char *name;
 
@@ -183,7 +185,7 @@ static int lock__parse(struct ins_operands *ops)
if (disasm_line__parse(ops->raw, , >locked.ops->raw) < 0)
goto out_free_ops;
 
-   ops->locked.ins = ins__find

[PATCH v3 3/4] perf annotate: add powerpc support

2016-06-30 Thread Ravi Bangoria
From: Naveen N. Rao <naveen.n@linux.vnet.ibm.com> 

Powerpc has long list of branch instructions and hardcoding them in
table appears to be error-prone. So, add new function to find
instruction instead of creating table. This function dynamically
create table(list of 'struct ins'), and instead of creating object
every time, first check if list already contain object for that
instruction.

Signed-off-by: Naveen N. Rao <naveen.n@linux.vnet.ibm.com>
Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
---
Changes in v3:
  - Optimized code
  - Corrected one memory leak

 tools/perf/util/annotate.c | 126 +
 1 file changed, 126 insertions(+)

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 36a5825..b87eac7 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -461,6 +461,11 @@ static struct ins instructions_arm[] = {
{ .name = "bne",   .ops  = _ops, },
 };
 
+struct instructions_powerpc {
+   struct ins *ins;
+   struct list_head list;
+};
+
 static int ins__key_cmp(const void *name, const void *insp)
 {
const struct ins *ins = insp;
@@ -476,6 +481,125 @@ static int ins__cmp(const void *a, const void *b)
return strcmp(ia->name, ib->name);
 }
 
+static struct ins *list_add__ins_powerpc(struct instructions_powerpc *head,
+const char *name, struct ins_ops *ops)
+{
+   struct instructions_powerpc *ins_powerpc;
+   struct ins *ins;
+
+   ins = zalloc(sizeof(struct ins));
+   if (!ins)
+   return NULL;
+
+   ins_powerpc = zalloc(sizeof(struct instructions_powerpc));
+   if (!ins_powerpc)
+   goto out_free_ins;
+
+   ins->name = strdup(name);
+   if (!ins->name)
+   goto out_free_ins_power;
+
+   ins->ops = ops;
+   ins_powerpc->ins = ins;
+   list_add_tail(&(ins_powerpc->list), &(head->list));
+
+   return ins;
+
+out_free_ins_power:
+   zfree(_powerpc);
+out_free_ins:
+   zfree();
+   return NULL;
+}
+
+static struct ins *list_search__ins_powerpc(struct instructions_powerpc *head,
+   const char *name)
+{
+   struct instructions_powerpc *pos;
+
+   list_for_each_entry(pos, >list, list) {
+   if (!strcmp(pos->ins->name, name))
+   return pos->ins;
+   }
+   return NULL;
+}
+
+static struct ins *ins__find_powerpc(const char *name)
+{
+   int i;
+   struct ins *ins;
+   struct ins_ops *ops;
+   static struct instructions_powerpc head;
+   static bool list_initialized;
+
+   /*
+* - Interested only if instruction starts with 'b'.
+* - Few start with 'b', but aren't branch instructions.
+* - Let's also ignore instructions involving 'ctr' and
+*   'tar' since target branch addresses for those can't
+*   be determined statically.
+*/
+   if (name[0] != 'b' ||
+   !strncmp(name, "bcd", 3)   ||
+   !strncmp(name, "brinc", 5) ||
+   !strncmp(name, "bper", 4)  ||
+   strstr(name, "ctr")||
+   strstr(name, "tar"))
+   return NULL;
+
+   if (!list_initialized) {
+   INIT_LIST_HEAD();
+   list_initialized = true;
+   }
+
+   /*
+* Return if we already have object of 'struct ins' for this
+* instruction
+*/
+   ins = list_search__ins_powerpc(, name);
+   if (ins)
+   return ins;
+
+   ops = _ops;
+
+   i = strlen(name) - 1;
+   if (i < 0)
+   return NULL;
+
+   /* ignore optional hints at the end of the instructions */
+   if (name[i] == '+' || name[i] == '-')
+   i--;
+
+   if (name[i] == 'l' || (name[i] == 'a' && name[i-1] == 'l')) {
+   /*
+* if the instruction ends up with 'l' or 'la', then
+* those are considered 'calls' since they update LR.
+* ... except for 'bnl' which is branch if not less than
+* and the absolute form of the same.
+*/
+   if (strcmp(name, "bnl") && strcmp(name, "bnl+") &&
+   strcmp(name, "bnl-") && strcmp(name, "bnla") &&
+   strcmp(name, "bnla+") && strcmp(name, "bnla-"))
+   ops = _ops;
+   }
+   if (name[i] == 'r' && name[i-1] == 'l')
+   /*
+* instructions ending with 'lr' are considered to be
+* return instructions
+*/
+   ops = _ops;
+
+   /*
+* Add instruction to list so next tim

[PATCH v3 4/4] perf: Define macro for normalized arch names

2016-06-30 Thread Ravi Bangoria
Define macro for each normalized arch name and use them instead
of using arch name as string

Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
---
Changes in v3:
  - No changes

 tools/perf/arch/common.c   | 36 ++--
 tools/perf/arch/common.h   | 11 +++
 tools/perf/util/annotate.c | 10 +-
 tools/perf/util/unwind-libunwind.c |  4 ++--
 4 files changed, 36 insertions(+), 25 deletions(-)

diff --git a/tools/perf/arch/common.c b/tools/perf/arch/common.c
index ee69668..feb2113 100644
--- a/tools/perf/arch/common.c
+++ b/tools/perf/arch/common.c
@@ -122,25 +122,25 @@ static int lookup_triplets(const char *const *triplets, 
const char *name)
 const char *normalize_arch(char *arch)
 {
if (!strcmp(arch, "x86_64"))
-   return "x86";
+   return NORM_X86;
if (arch[0] == 'i' && arch[2] == '8' && arch[3] == '6')
-   return "x86";
+   return NORM_X86;
if (!strcmp(arch, "sun4u") || !strncmp(arch, "sparc", 5))
-   return "sparc";
+   return NORM_SPARC;
if (!strcmp(arch, "aarch64") || !strcmp(arch, "arm64"))
-   return "arm64";
+   return NORM_ARM64;
if (!strncmp(arch, "arm", 3) || !strcmp(arch, "sa110"))
-   return "arm";
+   return NORM_ARM;
if (!strncmp(arch, "s390", 4))
-   return "s390";
+   return NORM_S390;
if (!strncmp(arch, "parisc", 6))
-   return "parisc";
+   return NORM_PARISC;
if (!strncmp(arch, "powerpc", 7) || !strncmp(arch, "ppc", 3))
-   return "powerpc";
+   return NORM_POWERPC;
if (!strncmp(arch, "mips", 4))
-   return "mips";
+   return NORM_MIPS;
if (!strncmp(arch, "sh", 2) && isdigit(arch[2]))
-   return "sh";
+   return NORM_SH;
 
return arch;
 }
@@ -180,21 +180,21 @@ static int perf_env__lookup_binutils_path(struct perf_env 
*env,
zfree();
}
 
-   if (!strcmp(arch, "arm"))
+   if (!strcmp(arch, NORM_ARM))
path_list = arm_triplets;
-   else if (!strcmp(arch, "arm64"))
+   else if (!strcmp(arch, NORM_ARM64))
path_list = arm64_triplets;
-   else if (!strcmp(arch, "powerpc"))
+   else if (!strcmp(arch, NORM_POWERPC))
path_list = powerpc_triplets;
-   else if (!strcmp(arch, "sh"))
+   else if (!strcmp(arch, NORM_SH))
path_list = sh_triplets;
-   else if (!strcmp(arch, "s390"))
+   else if (!strcmp(arch, NORM_S390))
path_list = s390_triplets;
-   else if (!strcmp(arch, "sparc"))
+   else if (!strcmp(arch, NORM_SPARC))
path_list = sparc_triplets;
-   else if (!strcmp(arch, "x86"))
+   else if (!strcmp(arch, NORM_X86))
path_list = x86_triplets;
-   else if (!strcmp(arch, "mips"))
+   else if (!strcmp(arch, NORM_MIPS))
path_list = mips_triplets;
else {
ui__error("binutils for %s not supported.\n", arch);
diff --git a/tools/perf/arch/common.h b/tools/perf/arch/common.h
index 6b01c73..14ca8ca 100644
--- a/tools/perf/arch/common.h
+++ b/tools/perf/arch/common.h
@@ -5,6 +5,17 @@
 
 extern const char *objdump_path;
 
+/* Macro for normalized arch names */
+#define NORM_X86   "x86"
+#define NORM_SPARC "sparc"
+#define NORM_ARM64 "arm64"
+#define NORM_ARM   "arm"
+#define NORM_S390  "s390"
+#define NORM_PARISC"parisc"
+#define NORM_POWERPC   "powerpc"
+#define NORM_MIPS  "mips"
+#define NORM_SH"sh"
+
 int perf_env__lookup_objdump(struct perf_env *env);
 const char *normalize_arch(char *arch);
 
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index b87eac7..fce60b4 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -68,7 +68,7 @@ static int call__parse(struct ins_operands *ops,
 
name++;
 
-   if (!strcmp(norm_arch, "arm") && strchr(name, '+'))
+   if (!strcmp(norm_arch, NORM_ARM) && strchr(name, '+'))
return -1;
 
tok = strchr(name, '>');
@@ -255,7 +255,7 @@ static int mov__parse(struct ins_operands *ops,
 
target = ++s;
 
-   if (!strcmp(norm_arch, "arm"))
+   if (!strcmp(norm_arch, NORM_ARM))
comment = strchr(s, 

[PATCH v2 0/4] perf annotate: Enable cross arch annotate

2016-06-29 Thread Ravi Bangoria
Perf can currently only support code navigation (branches and calls) in
annotate when run on the same architecture where perf.data was recorded.
But cross arch annotate is not supported.

This patchset enables cross arch annotate. Currently I've used x86
and arm instructions which are already available and adding support
for powerpc as well. Adding support for other arch will be easy.

I've created this patch on top of acme/perf/core. And tested it with
x86 and powerpc only.

Example:

  Record on powerpc:
  $ ./perf record -a

  Report -> Annotate on x86:
  $ ./perf report -i perf.data.powerpc --vmlinux vmlinux.powerpc

Changes in v2:
  - Corrected few memory leaks.
  - Created Dynamic list for powerpc to optimize memory consumption

Naveen N. Rao (1):
  perf annotate: add powerpc support

Ravi Bangoria (3):
  perf: Utility function to fetch arch
  perf annotate: Enable cross arch annotate
  perf: Define macro for arch names

 tools/perf/arch/common.c   |  36 +++---
 tools/perf/arch/common.h   |  11 ++
 tools/perf/builtin-top.c   |   2 +-
 tools/perf/ui/browsers/annotate.c  |   3 +-
 tools/perf/ui/gtk/annotate.c   |   2 +-
 tools/perf/util/annotate.c | 255 ++---
 tools/perf/util/annotate.h |   5 +-
 tools/perf/util/evsel.c|   7 +
 tools/perf/util/evsel.h|   2 +
 tools/perf/util/unwind-libunwind.c |   4 +-
 10 files changed, 255 insertions(+), 72 deletions(-)

--
2.5.5

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 2/4] perf annotate: Enable cross arch annotate

2016-06-29 Thread Ravi Bangoria
Change current data structures and function to enable cross arch
annotate.

Current implementation does not contain logic of record on one arch
and annotating on other. This remote annotate is partially possible
with current implementation for x86 (or may be arm as well) only.
But, to make remote annotation work properly, all architecture
instruction tables need to be included in the perf binary. And while
annotating, look for instruction table where perf.data was recorded.

For arm, few instructions were defined under #if __arm__ which I've
used as a table for arm. But I'm not sure whether instruction defined
outside of that also contains arm instructions. Apart from that,
'call__parse()' and 'move__parse()' contains #ifdef __arm__ directive.
I've changed it to  if (!strcmp(norm_arch, "arm")). But I've not
tested this as well.

Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
---
Changes in v2:
  - No changes

 tools/perf/builtin-top.c  |   2 +-
 tools/perf/ui/browsers/annotate.c |   3 +-
 tools/perf/ui/gtk/annotate.c  |   2 +-
 tools/perf/util/annotate.c| 136 --
 tools/perf/util/annotate.h|   5 +-
 5 files changed, 95 insertions(+), 53 deletions(-)

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 07fc792..d4fd947 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -128,7 +128,7 @@ static int perf_top__parse_source(struct perf_top *top, 
struct hist_entry *he)
return err;
}
 
-   err = symbol__annotate(sym, map, 0);
+   err = symbol__annotate(sym, map, 0, NULL);
if (err == 0) {
 out_assign:
top->sym_filter_entry = he;
diff --git a/tools/perf/ui/browsers/annotate.c 
b/tools/perf/ui/browsers/annotate.c
index 29dc6d2..3a652a6f 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -1050,7 +1050,8 @@ int symbol__tui_annotate(struct symbol *sym, struct map 
*map,
  (nr_pcnt - 1);
}
 
-   if (symbol__annotate(sym, map, sizeof_bdl) < 0) {
+   if (symbol__annotate(sym, map, sizeof_bdl,
+perf_evsel__env_arch(evsel)) < 0) {
ui__error("%s", ui_helpline__last_msg);
goto out_free_offsets;
}
diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c
index 9c7ff8d..d7150b3 100644
--- a/tools/perf/ui/gtk/annotate.c
+++ b/tools/perf/ui/gtk/annotate.c
@@ -166,7 +166,7 @@ static int symbol__gtk_annotate(struct symbol *sym, struct 
map *map,
if (map->dso->annotate_warned)
return -1;
 
-   if (symbol__annotate(sym, map, 0) < 0) {
+   if (symbol__annotate(sym, map, 0, perf_evsel__env_arch(evsel)) < 0) {
ui__error("%s", ui_helpline__current);
return -1;
}
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index c385fec..36a5825 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -20,12 +20,14 @@
 #include 
 #include 
 #include 
+#include 
+#include "../arch/common.h"
 
 const char *disassembler_style;
 const char *objdump_path;
 static regex_t  file_lineno;
 
-static struct ins *ins__find(const char *name);
+static struct ins *ins__find(const char *name, const char *norm_arch);
 static int disasm_line__parse(char *line, char **namep, char **rawp);
 
 static void ins__delete(struct ins_operands *ops)
@@ -53,7 +55,8 @@ int ins__scnprintf(struct ins *ins, char *bf, size_t size,
return ins__raw_scnprintf(ins, bf, size, ops);
 }
 
-static int call__parse(struct ins_operands *ops)
+static int call__parse(struct ins_operands *ops,
+  __maybe_unused const char *norm_arch)
 {
char *endptr, *tok, *name;
 
@@ -65,10 +68,8 @@ static int call__parse(struct ins_operands *ops)
 
name++;
 
-#ifdef __arm__
-   if (strchr(name, '+'))
+   if (!strcmp(norm_arch, "arm") && strchr(name, '+'))
return -1;
-#endif
 
tok = strchr(name, '>');
if (tok == NULL)
@@ -117,7 +118,8 @@ bool ins__is_call(const struct ins *ins)
return ins->ops == _ops;
 }
 
-static int jump__parse(struct ins_operands *ops)
+static int jump__parse(struct ins_operands *ops,
+  __maybe_unused const char *norm_arch)
 {
const char *s = strchr(ops->raw, '+');
 
@@ -172,7 +174,7 @@ static int comment__symbol(char *raw, char *comment, u64 
*addrp, char **namep)
return 0;
 }
 
-static int lock__parse(struct ins_operands *ops)
+static int lock__parse(struct ins_operands *ops, const char *norm_arch)
 {
char *name;
 
@@ -183,7 +185,7 @@ static int lock__parse(struct ins_operands *ops)
if (disasm_line__parse(ops->raw, , >locked.ops->raw) < 0)
goto out_free_ops;
 
-   

[PATCH v2 1/4] perf: Utility function to fetch arch

2016-06-29 Thread Ravi Bangoria
Add Utility function to fetch arch using evsel. (evsel->env->arch)

Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
---
Changes in v2:
  - No changes

 tools/perf/util/evsel.c | 7 +++
 tools/perf/util/evsel.h | 2 ++
 2 files changed, 9 insertions(+)

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 1d8f2bb..0fea724 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -2422,3 +2422,10 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, 
struct target *target,
 err, strerror_r(err, sbuf, sizeof(sbuf)),
 perf_evsel__name(evsel));
 }
+
+char *perf_evsel__env_arch(struct perf_evsel *evsel)
+{
+   if (evsel && evsel->evlist && evsel->evlist->env)
+   return evsel->evlist->env->arch;
+   return NULL;
+}
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 828ddd1..86fed7a 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -435,4 +435,6 @@ typedef int (*attr__fprintf_f)(FILE *, const char *, const 
char *, void *);
 int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
 attr__fprintf_f attr__fprintf, void *priv);
 
+char *perf_evsel__env_arch(struct perf_evsel *evsel);
+
 #endif /* __PERF_EVSEL_H */
-- 
2.5.5

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 2/2] powerpc: emulate_step tests for load/store instructions

2017-02-14 Thread Ravi Bangoria
Add new selftest that test emulate_step for Normal, Floating Point,
Vector and Vector Scalar - load/store instructions. Test should run
at boot time if CONFIG_KPROBES_SANITY_TEST and CONFIG_PPC64 is set.

Sample log:

  [0.762063] emulate_step smoke test: start.
  [0.762219] emulate_step smoke test: ld : PASS
  [0.762434] emulate_step smoke test: lwz: PASS
  [0.762653] emulate_step smoke test: lwzx   : PASS
  [0.762867] emulate_step smoke test: std: PASS
  [0.763082] emulate_step smoke test: ldarx / stdcx. : PASS
  [0.763302] emulate_step smoke test: lfsx   : PASS
  [0.763514] emulate_step smoke test: stfsx  : PASS
  [0.763727] emulate_step smoke test: lfdx   : PASS
  [0.763942] emulate_step smoke test: stfdx  : PASS
  [0.764134] emulate_step smoke test: lvx: PASS
  [0.764349] emulate_step smoke test: stvx   : PASS
  [0.764575] emulate_step smoke test: lxvd2x : PASS
  [0.764788] emulate_step smoke test: stxvd2x: PASS
  [0.764997] emulate_step smoke test: complete.

Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/ppc-opcode.h |   7 +
 arch/powerpc/include/asm/sstep.h  |   8 +
 arch/powerpc/kernel/kprobes.c |   2 +
 arch/powerpc/lib/Makefile |   4 +
 arch/powerpc/lib/test_emulate_step.c  | 439 ++
 5 files changed, 460 insertions(+)
 create mode 100644 arch/powerpc/lib/test_emulate_step.c

diff --git a/arch/powerpc/include/asm/ppc-opcode.h 
b/arch/powerpc/include/asm/ppc-opcode.h
index c4ced1d..691 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -284,6 +284,13 @@
 #define PPC_INST_BRANCH_COND   0x4080
 #define PPC_INST_LBZCIX0x7c0006aa
 #define PPC_INST_STBCIX0x7c0007aa
+#define PPC_INST_LWZX  0x7c2e
+#define PPC_INST_LFSX  0x7c00042e
+#define PPC_INST_STFSX 0x7c00052e
+#define PPC_INST_LFDX  0x7c0004ae
+#define PPC_INST_STFDX 0x7c0005ae
+#define PPC_INST_LVX   0x7cce
+#define PPC_INST_STVX  0x7c0001ce
 
 /* macros to insert fields into opcodes */
 #define ___PPC_RA(a)   (((a) & 0x1f) << 16)
diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h
index d3a42cc..d6d3630 100644
--- a/arch/powerpc/include/asm/sstep.h
+++ b/arch/powerpc/include/asm/sstep.h
@@ -87,3 +87,11 @@ struct instruction_op {
 
 extern int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
 unsigned int instr);
+
+#if defined(CONFIG_KPROBES_SANITY_TEST) && defined(CONFIG_PPC64)
+void test_emulate_step(void);
+#else
+static inline void test_emulate_step(void)
+{
+}
+#endif
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 735ff3d..c867347 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -529,6 +529,8 @@ int __kprobes longjmp_break_handler(struct kprobe *p, 
struct pt_regs *regs)
 
 int __init arch_init_kprobes(void)
 {
+   test_emulate_step();
+
return register_kprobe(_p);
 }
 
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 309361e8..7d046ca 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -35,3 +35,7 @@ obj-$(CONFIG_ALTIVEC) += xor_vmx.o
 CFLAGS_xor_vmx.o += -maltivec $(call cc-option,-mabi=altivec)
 
 obj-$(CONFIG_PPC64) += $(obj64-y)
+
+ifeq ($(CONFIG_PPC64), y)
+obj-$(CONFIG_KPROBES_SANITY_TEST) += test_emulate_step.o
+endif
diff --git a/arch/powerpc/lib/test_emulate_step.c 
b/arch/powerpc/lib/test_emulate_step.c
new file mode 100644
index 000..887d1db
--- /dev/null
+++ b/arch/powerpc/lib/test_emulate_step.c
@@ -0,0 +1,439 @@
+/*
+ * test_emulate_step.c - simple sanity test for emulate_step load/store
+ *  instructions
+ *
+ * Copyright IBM Corp. 2016
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ */
+
+#define pr_fmt(fmt) "emulate_step smoke test: " fmt
+
+#include 
+#include 
+#include 
+
+#define IMM_L(i)   ((uintptr_t)(i) & 0x)
+
+/*
+ * Defined with TEST_ prefix so it does not conflict with other
+ * definitions.
+ */
+#define TEST_LD(r, base, i)(PPC_INST_LD | ___PPC_RT(r) |   \
+   

[PATCH v2 1/2] powerpc: Emulation support for load/store instructions on LE

2017-02-14 Thread Ravi Bangoria
emulate_step() uses a number of underlying kernel functions that were
initially not enabled for LE. This has been rectified since. So, fix
emulate_step() for LE for the corresponding instructions.

Reported-by: Anton Blanchard <an...@samba.org>
Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
---
 arch/powerpc/lib/sstep.c | 20 
 1 file changed, 20 deletions(-)

diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index 06c7e9b..e14a2fb 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -1799,8 +1799,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned 
int instr)
goto instr_done;
 
case LARX:
-   if (regs->msr & MSR_LE)
-   return 0;
if (op.ea & (size - 1))
break;  /* can't handle misaligned */
err = -EFAULT;
@@ -1824,8 +1822,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned 
int instr)
goto ldst_done;
 
case STCX:
-   if (regs->msr & MSR_LE)
-   return 0;
if (op.ea & (size - 1))
break;  /* can't handle misaligned */
err = -EFAULT;
@@ -1851,8 +1847,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned 
int instr)
goto ldst_done;
 
case LOAD:
-   if (regs->msr & MSR_LE)
-   return 0;
err = read_mem(>gpr[op.reg], op.ea, size, regs);
if (!err) {
if (op.type & SIGNEXT)
@@ -1864,8 +1858,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned 
int instr)
 
 #ifdef CONFIG_PPC_FPU
case LOAD_FP:
-   if (regs->msr & MSR_LE)
-   return 0;
if (size == 4)
err = do_fp_load(op.reg, do_lfs, op.ea, size, regs);
else
@@ -1874,15 +1866,11 @@ int __kprobes emulate_step(struct pt_regs *regs, 
unsigned int instr)
 #endif
 #ifdef CONFIG_ALTIVEC
case LOAD_VMX:
-   if (regs->msr & MSR_LE)
-   return 0;
err = do_vec_load(op.reg, do_lvx, op.ea & ~0xfUL, regs);
goto ldst_done;
 #endif
 #ifdef CONFIG_VSX
case LOAD_VSX:
-   if (regs->msr & MSR_LE)
-   return 0;
err = do_vsx_load(op.reg, do_lxvd2x, op.ea, regs);
goto ldst_done;
 #endif
@@ -1905,8 +1893,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned 
int instr)
goto instr_done;
 
case STORE:
-   if (regs->msr & MSR_LE)
-   return 0;
if ((op.type & UPDATE) && size == sizeof(long) &&
op.reg == 1 && op.update_reg == 1 &&
!(regs->msr & MSR_PR) &&
@@ -1919,8 +1905,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned 
int instr)
 
 #ifdef CONFIG_PPC_FPU
case STORE_FP:
-   if (regs->msr & MSR_LE)
-   return 0;
if (size == 4)
err = do_fp_store(op.reg, do_stfs, op.ea, size, regs);
else
@@ -1929,15 +1913,11 @@ int __kprobes emulate_step(struct pt_regs *regs, 
unsigned int instr)
 #endif
 #ifdef CONFIG_ALTIVEC
case STORE_VMX:
-   if (regs->msr & MSR_LE)
-   return 0;
err = do_vec_store(op.reg, do_stvx, op.ea & ~0xfUL, regs);
goto ldst_done;
 #endif
 #ifdef CONFIG_VSX
case STORE_VSX:
-   if (regs->msr & MSR_LE)
-   return 0;
err = do_vsx_store(op.reg, do_stxvd2x, op.ea, regs);
goto ldst_done;
 #endif
-- 
1.8.3.1



Re: [PATCH v3 1/2] powerpc: Emulation support for load/store instructions on LE

2017-02-14 Thread Ravi Bangoria
Thanks Michael,

On Tuesday 14 February 2017 03:50 PM, Michael Ellerman wrote:
> Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com> writes:
>
>> emulate_step() uses a number of underlying kernel functions that were
>> initially not enabled for LE. This has been rectified since.
> When exactly? ie. which commit.

I found couple of commits:

6506b4718b ("powerpc: Fix Unaligned Loads and Stores")
dbc2fbd7c2 ("powerpc: Fix Unaligned LE Floating Point Loads and Stores")

There may be more.

Patch2 is to test emulate_step() for basic load/store instructions and it
seems to be working fine on LE.

>
> Should we backport this? ie. is it actually a bug people are hitting in
> the real world much?

Yes, we should backport this. kernel-space hw-breakpoint feature is broken
on LE without this. This is on ppc64le:

  $ sudo cat /proc/kallsyms  | grep pid_max
c116998c D pid_max

  $ sudo ./perf record -a --event=mem:0xc116998c sleep 10


Before patch:
  It does not record any data and throws below warning.

  $ dmesg
[  817.895573] Unable to handle hardware breakpoint. Breakpoint at 
0xc116998c will be disabled.
[  817.895581] [ cut here ]
[  817.895588] WARNING: CPU: 24 PID: 2032 at 
arch/powerpc/kernel/hw_breakpoint.c:277 hw_breakpoint_handler+0x124/0x230
...

After patch:
  It records data properly.

  $ sudo ./perf report --stdio
...
# Samples: 36  of event 'mem:0xc116998c'
# Event count (approx.): 36
#
# Overhead  CommandShared Object Symbol 
#   .    .
#
63.89%  kdumpctl   [kernel.vmlinux]  [k] alloc_pid
27.78%  opal_errd  [kernel.vmlinux]  [k] alloc_pid
 5.56%  kworker/u97:4  [kernel.vmlinux]  [k] alloc_pid
 2.78%  systemd[kernel.vmlinux]  [k] alloc_pid



>
> cheers
>



Re: [PATCH v3 2/2] powerpc: emulate_step tests for load/store instructions

2017-02-14 Thread Ravi Bangoria


On Tuesday 14 February 2017 04:16 PM, Michael Ellerman wrote:
> Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com> writes:
>
>> diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
>> index 0e649d7..ddc879d 100644
>> --- a/arch/powerpc/lib/Makefile
>> +++ b/arch/powerpc/lib/Makefile
>> @@ -33,3 +33,7 @@ obj-$(CONFIG_ALTIVEC)  += xor_vmx.o
>>  CFLAGS_xor_vmx.o += -maltivec $(call cc-option,-mabi=altivec)
>>  
>>  obj-$(CONFIG_PPC64) += $(obj64-y)
>> +
>> +ifeq ($(CONFIG_PPC64), y)
>> +obj-$(CONFIG_KPROBES_SANITY_TEST) += test_emulate_step.o
>> +endif
> FYI, the right way to do that is:
>
>   obj64-$(CONFIG_KPROBES_SANITY_TEST) += test_emulate_step.o
>   obj-$(CONFIG_PPC64) += $(obj64-y)
>
> And in this Makefile you don't need to add the second line because it's
> already there.

Thanks for this. Will change accordingly and send next version.

Ravi

>
> cheers
>



[PATCH v3 2/2] powerpc: emulate_step tests for load/store instructions

2017-02-14 Thread Ravi Bangoria
Add new selftest that test emulate_step for Normal, Floating Point,
Vector and Vector Scalar - load/store instructions. Test should run
at boot time if CONFIG_KPROBES_SANITY_TEST and CONFIG_PPC64 is set.

Sample log:

  [0.762063] emulate_step smoke test: start.
  [0.762219] emulate_step smoke test: ld : PASS
  [0.762434] emulate_step smoke test: lwz: PASS
  [0.762653] emulate_step smoke test: lwzx   : PASS
  [0.762867] emulate_step smoke test: std: PASS
  [0.763082] emulate_step smoke test: ldarx / stdcx. : PASS
  [0.763302] emulate_step smoke test: lfsx   : PASS
  [0.763514] emulate_step smoke test: stfsx  : PASS
  [0.763727] emulate_step smoke test: lfdx   : PASS
  [0.763942] emulate_step smoke test: stfdx  : PASS
  [0.764134] emulate_step smoke test: lvx: PASS
  [0.764349] emulate_step smoke test: stvx   : PASS
  [0.764575] emulate_step smoke test: lxvd2x : PASS
  [0.764788] emulate_step smoke test: stxvd2x: PASS
  [0.764997] emulate_step smoke test: complete.

Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/ppc-opcode.h |   7 +
 arch/powerpc/include/asm/sstep.h  |   8 +
 arch/powerpc/kernel/kprobes.c |   2 +
 arch/powerpc/lib/Makefile |   4 +
 arch/powerpc/lib/test_emulate_step.c  | 439 ++
 5 files changed, 460 insertions(+)
 create mode 100644 arch/powerpc/lib/test_emulate_step.c

diff --git a/arch/powerpc/include/asm/ppc-opcode.h 
b/arch/powerpc/include/asm/ppc-opcode.h
index d99bd44..e7d6d86 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -284,6 +284,13 @@
 #define PPC_INST_BRANCH_COND   0x4080
 #define PPC_INST_LBZCIX0x7c0006aa
 #define PPC_INST_STBCIX0x7c0007aa
+#define PPC_INST_LWZX  0x7c2e
+#define PPC_INST_LFSX  0x7c00042e
+#define PPC_INST_STFSX 0x7c00052e
+#define PPC_INST_LFDX  0x7c0004ae
+#define PPC_INST_STFDX 0x7c0005ae
+#define PPC_INST_LVX   0x7cce
+#define PPC_INST_STVX  0x7c0001ce
 
 /* macros to insert fields into opcodes */
 #define ___PPC_RA(a)   (((a) & 0x1f) << 16)
diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h
index d3a42cc..d6d3630 100644
--- a/arch/powerpc/include/asm/sstep.h
+++ b/arch/powerpc/include/asm/sstep.h
@@ -87,3 +87,11 @@ struct instruction_op {
 
 extern int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
 unsigned int instr);
+
+#if defined(CONFIG_KPROBES_SANITY_TEST) && defined(CONFIG_PPC64)
+void test_emulate_step(void);
+#else
+static inline void test_emulate_step(void)
+{
+}
+#endif
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index fce05a3..5c5ae66 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -528,6 +528,8 @@ int __kprobes longjmp_break_handler(struct kprobe *p, 
struct pt_regs *regs)
 
 int __init arch_init_kprobes(void)
 {
+   test_emulate_step();
+
return register_kprobe(_p);
 }
 
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 0e649d7..ddc879d 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -33,3 +33,7 @@ obj-$(CONFIG_ALTIVEC) += xor_vmx.o
 CFLAGS_xor_vmx.o += -maltivec $(call cc-option,-mabi=altivec)
 
 obj-$(CONFIG_PPC64) += $(obj64-y)
+
+ifeq ($(CONFIG_PPC64), y)
+obj-$(CONFIG_KPROBES_SANITY_TEST) += test_emulate_step.o
+endif
diff --git a/arch/powerpc/lib/test_emulate_step.c 
b/arch/powerpc/lib/test_emulate_step.c
new file mode 100644
index 000..887d1db
--- /dev/null
+++ b/arch/powerpc/lib/test_emulate_step.c
@@ -0,0 +1,439 @@
+/*
+ * test_emulate_step.c - simple sanity test for emulate_step load/store
+ *  instructions
+ *
+ * Copyright IBM Corp. 2016
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ */
+
+#define pr_fmt(fmt) "emulate_step smoke test: " fmt
+
+#include 
+#include 
+#include 
+
+#define IMM_L(i)   ((uintptr_t)(i) & 0x)
+
+/*
+ * Defined with TEST_ prefix so it does not conflict with other
+ * definitions.
+ */
+#define TEST_LD(r, base, i)(PPC_INST_LD | ___PPC_RT(r) |   \
+   

[PATCH v3 1/2] powerpc: Emulation support for load/store instructions on LE

2017-02-14 Thread Ravi Bangoria
emulate_step() uses a number of underlying kernel functions that were
initially not enabled for LE. This has been rectified since. So, fix
emulate_step() for LE for the corresponding instructions.

Reported-by: Anton Blanchard <an...@samba.org>
Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
---
 arch/powerpc/lib/sstep.c | 20 
 1 file changed, 20 deletions(-)

diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index 846dba2..9c542ec 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -1799,8 +1799,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned 
int instr)
goto instr_done;
 
case LARX:
-   if (regs->msr & MSR_LE)
-   return 0;
if (op.ea & (size - 1))
break;  /* can't handle misaligned */
if (!address_ok(regs, op.ea, size))
@@ -1823,8 +1821,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned 
int instr)
goto ldst_done;
 
case STCX:
-   if (regs->msr & MSR_LE)
-   return 0;
if (op.ea & (size - 1))
break;  /* can't handle misaligned */
if (!address_ok(regs, op.ea, size))
@@ -1849,8 +1845,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned 
int instr)
goto ldst_done;
 
case LOAD:
-   if (regs->msr & MSR_LE)
-   return 0;
err = read_mem(>gpr[op.reg], op.ea, size, regs);
if (!err) {
if (op.type & SIGNEXT)
@@ -1862,8 +1856,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned 
int instr)
 
 #ifdef CONFIG_PPC_FPU
case LOAD_FP:
-   if (regs->msr & MSR_LE)
-   return 0;
if (size == 4)
err = do_fp_load(op.reg, do_lfs, op.ea, size, regs);
else
@@ -1872,15 +1864,11 @@ int __kprobes emulate_step(struct pt_regs *regs, 
unsigned int instr)
 #endif
 #ifdef CONFIG_ALTIVEC
case LOAD_VMX:
-   if (regs->msr & MSR_LE)
-   return 0;
err = do_vec_load(op.reg, do_lvx, op.ea & ~0xfUL, regs);
goto ldst_done;
 #endif
 #ifdef CONFIG_VSX
case LOAD_VSX:
-   if (regs->msr & MSR_LE)
-   return 0;
err = do_vsx_load(op.reg, do_lxvd2x, op.ea, regs);
goto ldst_done;
 #endif
@@ -1903,8 +1891,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned 
int instr)
goto instr_done;
 
case STORE:
-   if (regs->msr & MSR_LE)
-   return 0;
if ((op.type & UPDATE) && size == sizeof(long) &&
op.reg == 1 && op.update_reg == 1 &&
!(regs->msr & MSR_PR) &&
@@ -1917,8 +1903,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned 
int instr)
 
 #ifdef CONFIG_PPC_FPU
case STORE_FP:
-   if (regs->msr & MSR_LE)
-   return 0;
if (size == 4)
err = do_fp_store(op.reg, do_stfs, op.ea, size, regs);
else
@@ -1927,15 +1911,11 @@ int __kprobes emulate_step(struct pt_regs *regs, 
unsigned int instr)
 #endif
 #ifdef CONFIG_ALTIVEC
case STORE_VMX:
-   if (regs->msr & MSR_LE)
-   return 0;
err = do_vec_store(op.reg, do_stvx, op.ea & ~0xfUL, regs);
goto ldst_done;
 #endif
 #ifdef CONFIG_VSX
case STORE_VSX:
-   if (regs->msr & MSR_LE)
-   return 0;
err = do_vsx_store(op.reg, do_stxvd2x, op.ea, regs);
goto ldst_done;
 #endif
-- 
1.8.3.1



Re: [PATCH v2 1/2] powerpc: Emulation support for load/store instructions on LE

2017-02-14 Thread Ravi Bangoria


On Tuesday 14 February 2017 02:17 PM, Naveen N. Rao wrote:
> On 2017/02/14 01:32PM, Ravi Bangoria wrote:
>> emulate_step() uses a number of underlying kernel functions that were
>> initially not enabled for LE. This has been rectified since. So, fix
>> emulate_step() for LE for the corresponding instructions.
>>
>> Reported-by: Anton Blanchard <an...@samba.org>
>> Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
> Can you redo this on top of powerpc/next? This doesn't apply cleanly due 
> to a recent change...

Ok. sorry for the noise. I sent a v3 series. Please review it.

Ravi

> - Naveen
>



[PATCH v2 0/2] powerpc: Emulation support for load/store instructions on LE

2017-02-14 Thread Ravi Bangoria
emulate_step is the basic infrastructure which is used by number of other
kernel infrastructures like kprobe, hw-breakpoint(data breakpoint) etc.
In case of kprobe, enabling emulation of load/store instructions will
speedup the execution of probed instruction. In case of kernel-space
breakpoint, causative instruction is first get emulated before executing
user registered handler. If emulation fails, hw-breakpoint is disabled
with error. As emulate_step does not support load/store instructions on
LE, kernel-space hw-breakpoint infrastructure is broken on LE.

emulate_step() uses a number of underlying kernel functions that were
initially not enabled for LE. This has been rectified since. So, fix
emulate_step() for LE for the corresponding instructions.

Also add selftest which will run at boot if CONFIG_KPROBES_SANITY_TEST
and CONFIG_PPC64 is set.

Changes in v2:
  - Folded 2nd and 3rd patch of v1 into one patch, as suggested by Naveen

v1 link: 
https://www.mail-archive.com/linuxppc-dev@lists.ozlabs.org/msg110671.html

Ravi Bangoria (2):
  powerpc: Emulation support for load/store instructions on LE
  powerpc: emulate_step tests for load/store instructions

 arch/powerpc/include/asm/ppc-opcode.h |   7 +
 arch/powerpc/include/asm/sstep.h  |   8 +
 arch/powerpc/kernel/kprobes.c |   2 +
 arch/powerpc/lib/Makefile |   4 +
 arch/powerpc/lib/sstep.c  |  20 --
 arch/powerpc/lib/test_emulate_step.c  | 439 ++
 6 files changed, 460 insertions(+), 20 deletions(-)
 create mode 100644 arch/powerpc/lib/test_emulate_step.c

-- 
1.8.3.1



Re: [PATCH] powerpc/xmon: Fix data-breakpoint

2017-02-14 Thread Ravi Bangoria
Hi Michael,

Can you please pull this patch.

Thanks,
Ravi

On Tuesday 22 November 2016 02:55 PM, Ravi Bangoria wrote:
> Xmon data-breakpoint feature is broken.
>
> Whenever there is a watchpoint match occurs, hw_breakpoint_handler will
> be called by do_break via notifier chains mechanism. If watchpoint is
> registered by xmon, hw_breakpoint_handler won't find any associated
> perf_event and returns immediately with NOTIFY_STOP. Similarly, do_break
> also returns without notifying to xmon.
>
> Solve this by returning NOTIFY_DONE when hw_breakpoint_handler does not
> find any perf_event associated with matched watchpoint.
>
> Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
> ---
>  arch/powerpc/kernel/hw_breakpoint.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
>
> diff --git a/arch/powerpc/kernel/hw_breakpoint.c 
> b/arch/powerpc/kernel/hw_breakpoint.c
> index 03d089b..469d86d 100644
> --- a/arch/powerpc/kernel/hw_breakpoint.c
> +++ b/arch/powerpc/kernel/hw_breakpoint.c
> @@ -228,8 +228,10 @@ int hw_breakpoint_handler(struct die_args *args)
>   rcu_read_lock();
>
>   bp = __this_cpu_read(bp_per_reg);
> - if (!bp)
> + if (!bp) {
> + rc = NOTIFY_DONE;
>   goto out;
> + }
>   info = counter_arch_bp(bp);
>
>   /*



[PATCH v3 0/2] powerpc: Emulation support for load/store instructions on LE

2017-02-14 Thread Ravi Bangoria
emulate_step is the basic infrastructure which is used by number of other
kernel infrastructures like kprobe, hw-breakpoint(data breakpoint) etc.
In case of kprobe, enabling emulation of load/store instructions will
speedup the execution of probed instruction. In case of kernel-space
breakpoint, causative instruction is first get emulated before executing
user registered handler. If emulation fails, hw-breakpoint is disabled
with error. As emulate_step does not support load/store instructions on
LE, kernel-space hw-breakpoint infrastructure is broken on LE.

emulate_step() uses a number of underlying kernel functions that were
initially not enabled for LE. This has been rectified since. So, fix
emulate_step() for LE for the corresponding instructions.

Also add selftest which will run at boot if CONFIG_KPROBES_SANITY_TEST
and CONFIG_PPC64 is set.

Changes in v3:
  - Rebased to powerpc/next. No functionality changes.

v2 link: 
https://www.mail-archive.com/linux-kernel@vger.kernel.org/msg1332638.html

Ravi Bangoria (2):
  powerpc: Emulation support for load/store instructions on LE
  powerpc: emulate_step tests for load/store instructions

 arch/powerpc/include/asm/ppc-opcode.h |   7 +
 arch/powerpc/include/asm/sstep.h  |   8 +
 arch/powerpc/kernel/kprobes.c |   2 +
 arch/powerpc/lib/Makefile |   4 +
 arch/powerpc/lib/sstep.c  |  20 --
 arch/powerpc/lib/test_emulate_step.c  | 439 ++
 6 files changed, 460 insertions(+), 20 deletions(-)
 create mode 100644 arch/powerpc/lib/test_emulate_step.c

-- 
1.8.3.1



[PATCH v4 1/2] powerpc: Emulation support for load/store instructions on LE

2017-02-14 Thread Ravi Bangoria
emulate_step() uses a number of underlying kernel functions that were
initially not enabled for LE. This has been rectified since. So, fix
emulate_step() for LE for the corresponding instructions.

Reported-by: Anton Blanchard <an...@samba.org>
Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
---
 arch/powerpc/lib/sstep.c | 20 
 1 file changed, 20 deletions(-)

diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index 846dba2..9c542ec 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -1799,8 +1799,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned 
int instr)
goto instr_done;
 
case LARX:
-   if (regs->msr & MSR_LE)
-   return 0;
if (op.ea & (size - 1))
break;  /* can't handle misaligned */
if (!address_ok(regs, op.ea, size))
@@ -1823,8 +1821,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned 
int instr)
goto ldst_done;
 
case STCX:
-   if (regs->msr & MSR_LE)
-   return 0;
if (op.ea & (size - 1))
break;  /* can't handle misaligned */
if (!address_ok(regs, op.ea, size))
@@ -1849,8 +1845,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned 
int instr)
goto ldst_done;
 
case LOAD:
-   if (regs->msr & MSR_LE)
-   return 0;
err = read_mem(>gpr[op.reg], op.ea, size, regs);
if (!err) {
if (op.type & SIGNEXT)
@@ -1862,8 +1856,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned 
int instr)
 
 #ifdef CONFIG_PPC_FPU
case LOAD_FP:
-   if (regs->msr & MSR_LE)
-   return 0;
if (size == 4)
err = do_fp_load(op.reg, do_lfs, op.ea, size, regs);
else
@@ -1872,15 +1864,11 @@ int __kprobes emulate_step(struct pt_regs *regs, 
unsigned int instr)
 #endif
 #ifdef CONFIG_ALTIVEC
case LOAD_VMX:
-   if (regs->msr & MSR_LE)
-   return 0;
err = do_vec_load(op.reg, do_lvx, op.ea & ~0xfUL, regs);
goto ldst_done;
 #endif
 #ifdef CONFIG_VSX
case LOAD_VSX:
-   if (regs->msr & MSR_LE)
-   return 0;
err = do_vsx_load(op.reg, do_lxvd2x, op.ea, regs);
goto ldst_done;
 #endif
@@ -1903,8 +1891,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned 
int instr)
goto instr_done;
 
case STORE:
-   if (regs->msr & MSR_LE)
-   return 0;
if ((op.type & UPDATE) && size == sizeof(long) &&
op.reg == 1 && op.update_reg == 1 &&
!(regs->msr & MSR_PR) &&
@@ -1917,8 +1903,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned 
int instr)
 
 #ifdef CONFIG_PPC_FPU
case STORE_FP:
-   if (regs->msr & MSR_LE)
-   return 0;
if (size == 4)
err = do_fp_store(op.reg, do_stfs, op.ea, size, regs);
else
@@ -1927,15 +1911,11 @@ int __kprobes emulate_step(struct pt_regs *regs, 
unsigned int instr)
 #endif
 #ifdef CONFIG_ALTIVEC
case STORE_VMX:
-   if (regs->msr & MSR_LE)
-   return 0;
err = do_vec_store(op.reg, do_stvx, op.ea & ~0xfUL, regs);
goto ldst_done;
 #endif
 #ifdef CONFIG_VSX
case STORE_VSX:
-   if (regs->msr & MSR_LE)
-   return 0;
err = do_vsx_store(op.reg, do_stxvd2x, op.ea, regs);
goto ldst_done;
 #endif
-- 
1.8.3.1



[PATCH v4 2/2] powerpc: emulate_step tests for load/store instructions

2017-02-14 Thread Ravi Bangoria
Add new selftest that test emulate_step for Normal, Floating Point,
Vector and Vector Scalar - load/store instructions. Test should run
at boot time if CONFIG_KPROBES_SANITY_TEST and CONFIG_PPC64 is set.

Sample log:

  [0.762063] emulate_step smoke test: start.
  [0.762219] emulate_step smoke test: ld : PASS
  [0.762434] emulate_step smoke test: lwz: PASS
  [0.762653] emulate_step smoke test: lwzx   : PASS
  [0.762867] emulate_step smoke test: std: PASS
  [0.763082] emulate_step smoke test: ldarx / stdcx. : PASS
  [0.763302] emulate_step smoke test: lfsx   : PASS
  [0.763514] emulate_step smoke test: stfsx  : PASS
  [0.763727] emulate_step smoke test: lfdx   : PASS
  [0.763942] emulate_step smoke test: stfdx  : PASS
  [0.764134] emulate_step smoke test: lvx: PASS
  [0.764349] emulate_step smoke test: stvx   : PASS
  [0.764575] emulate_step smoke test: lxvd2x : PASS
  [0.764788] emulate_step smoke test: stxvd2x: PASS
  [0.764997] emulate_step smoke test: complete.

Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/ppc-opcode.h |   7 +
 arch/powerpc/lib/Makefile |   2 +
 arch/powerpc/lib/test_emulate_step.c  | 443 ++
 3 files changed, 452 insertions(+)
 create mode 100644 arch/powerpc/lib/test_emulate_step.c

diff --git a/arch/powerpc/include/asm/ppc-opcode.h 
b/arch/powerpc/include/asm/ppc-opcode.h
index d99bd44..e7d6d86 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -284,6 +284,13 @@
 #define PPC_INST_BRANCH_COND   0x4080
 #define PPC_INST_LBZCIX0x7c0006aa
 #define PPC_INST_STBCIX0x7c0007aa
+#define PPC_INST_LWZX  0x7c2e
+#define PPC_INST_LFSX  0x7c00042e
+#define PPC_INST_STFSX 0x7c00052e
+#define PPC_INST_LFDX  0x7c0004ae
+#define PPC_INST_STFDX 0x7c0005ae
+#define PPC_INST_LVX   0x7cce
+#define PPC_INST_STVX  0x7c0001ce
 
 /* macros to insert fields into opcodes */
 #define ___PPC_RA(a)   (((a) & 0x1f) << 16)
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 0e649d7..c0d6e79 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -32,4 +32,6 @@ obj-$(CONFIG_FTR_FIXUP_SELFTEST) += feature-fixups-test.o
 obj-$(CONFIG_ALTIVEC)  += xor_vmx.o
 CFLAGS_xor_vmx.o += -maltivec $(call cc-option,-mabi=altivec)
 
+obj64-$(CONFIG_KPROBES_SANITY_TEST) += test_emulate_step.o
+
 obj-$(CONFIG_PPC64) += $(obj64-y)
diff --git a/arch/powerpc/lib/test_emulate_step.c 
b/arch/powerpc/lib/test_emulate_step.c
new file mode 100644
index 000..4e3bce9
--- /dev/null
+++ b/arch/powerpc/lib/test_emulate_step.c
@@ -0,0 +1,443 @@
+/*
+ * test_emulate_step.c - simple sanity test for emulate_step load/store
+ *  instructions
+ *
+ * Copyright IBM Corp. 2016
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ */
+
+#define pr_fmt(fmt) "emulate_step smoke test: " fmt
+
+#include 
+#include 
+#include 
+
+#define IMM_L(i)   ((uintptr_t)(i) & 0x)
+
+/*
+ * Defined with TEST_ prefix so it does not conflict with other
+ * definitions.
+ */
+#define TEST_LD(r, base, i)(PPC_INST_LD | ___PPC_RT(r) |   \
+   ___PPC_RA(base) | IMM_L(i))
+#define TEST_LWZ(r, base, i)   (PPC_INST_LWZ | ___PPC_RT(r) |  \
+   ___PPC_RA(base) | IMM_L(i))
+#define TEST_LWZX(t, a, b) (PPC_INST_LWZX | ___PPC_RT(t) | \
+   ___PPC_RA(a) | ___PPC_RB(b))
+#define TEST_STD(r, base, i)   (PPC_INST_STD | ___PPC_RS(r) |  \
+   ___PPC_RA(base) | ((i) & 0xfffc))
+#define TEST_LDARX(t, a, b, eh)(PPC_INST_LDARX | ___PPC_RT(t) |
\
+   ___PPC_RA(a) | ___PPC_RB(b) |   \
+   __PPC_EH(eh))
+#define TEST_STDCX(s, a, b)(PPC_INST_STDCX | ___PPC_RS(s) |\
+   ___PPC_RA(a) | ___PPC_RB(b))
+#define TEST_LFSX(t, a, b) (PPC_INST_LFSX | ___PPC_RT(t) | \
+   ___PPC_RA(a) | ___PPC_

[PATCH v4 0/2] powerpc: Emulation support for load/store instructions on LE

2017-02-14 Thread Ravi Bangoria
emulate_step is the basic infrastructure which is used by number of other
kernel infrastructures like kprobe, hw-breakpoint(data breakpoint) etc.
In case of kprobe, enabling emulation of load/store instructions will
speedup the execution of probed instruction. In case of kernel-space
breakpoint, causative instruction is first get emulated before executing
user registered handler. If emulation fails, hw-breakpoint is disabled
with error. As emulate_step does not support load/store instructions on
LE, kernel-space hw-breakpoint infrastructure is broken on LE.

emulate_step() uses a number of underlying kernel functions that were
initially not enabled for LE. This has been rectified since. So, fix
emulate_step() for LE for the corresponding instructions.

Also add selftest which will run at boot if CONFIG_KPROBES_SANITY_TEST
and CONFIG_PPC64 is set.

Changes in v4:
  - Used late_initcall() for selftest
  - Makefile changes

v3 link: 
https://www.mail-archive.com/linux-kernel@vger.kernel.org/msg1332686.html

Ravi Bangoria (2):
  powerpc: Emulation support for load/store instructions on LE
  powerpc: emulate_step tests for load/store instructions

 arch/powerpc/include/asm/ppc-opcode.h |   7 +
 arch/powerpc/lib/Makefile |   2 +
 arch/powerpc/lib/sstep.c  |  20 --
 arch/powerpc/lib/test_emulate_step.c  | 443 ++
 4 files changed, 452 insertions(+), 20 deletions(-)
 create mode 100644 arch/powerpc/lib/test_emulate_step.c

-- 
1.8.3.1



Re: Normal service will resume shortly ...

2017-01-17 Thread Ravi Bangoria
Hi Michael,

Welcome back!!

Can you please take a look at:
https://lists.ozlabs.org/pipermail/linuxppc-dev/2016-November/151426.html
https://lists.ozlabs.org/pipermail/linuxppc-dev/2016-November/150452.html

Thanks,
Ravi


On Monday 16 January 2017 03:03 PM, Michael Ellerman wrote:
> Hi folks,
>
> I'm back from paternity leave, and will start processing patches in the
> next day or two.
>
> If you've sent a fix that should go into 4.10 that hasn't been merged
> yet, please feel free to reply to this message giving me a pointer to
> it.
>
> cheers
>



[PATCH v5 0/7] perf: Cross arch annotate + few miscellaneous fixes

2016-08-18 Thread Ravi Bangoria
Currently Perf annotate support code navigation (branches and calls)
only when run on the same architecture where perf.data was recorded.
But, for example, record on powerpc server and annotate on client's
x86 desktop is not supported.

This patchset enables cross arch annotate. Currently I've used x86
and arm instructions which are already available and added support
for powerpc.

Additionally this patch series also contains few other related fixes.

Patches are prepared on top of acme/perf/core and tested it with x86
and powerpc only.

Note for arm:
Few instructions were defined under #if __arm__ which I've used as a
table for arm. But I'm not sure whether instruction defined outside of
that also contains arm instructions. Apart from that, 'call__parse()'
and 'move__parse()' contains #ifdef __arm__ directive. I've changed it
to  if (!strcmp(norm_arch, arm)). I don't have a arm machine to test
these changes.

Example:

  Record on powerpc:
  $ ./perf record -a

  Report -> Annotate on x86:
  $ ./perf report -i perf.data.powerpc --vmlinux vmlinux.powerpc

Changes in v5:
  - Replaced symbol__annotate with symbol__disassemble.
  - Removed hacks for jump and call instructions like bctr and bctrl
respectively from generic patch that enables support for powerpc
and made separate patch for that.
  - v4 was not annotating powerpc 'btar' instruction. Included that.
  - Added few generic fixes.

v4 link:
  https://lkml.org/lkml/2016/7/8/10

Naveen N. Rao (1):
  perf annotate: Add support for powerpc

Ravi Bangoria (6):
  perf: Define macro for normalized arch names
  perf annotate: Add cross arch annotate support
  perf annotate: Do not ignore call instruction with indirect target
  perf annotate: Show raw form for jump instruction with indirect
target
  perf annotate: Support jump instruction with target as second operand
  perf annotate: Fix jump target outside of function address range

 tools/perf/arch/common.c   |  36 ++---
 tools/perf/arch/common.h   |  11 ++
 tools/perf/builtin-top.c   |   2 +-
 tools/perf/ui/browsers/annotate.c  |   8 +-
 tools/perf/ui/gtk/annotate.c   |   2 +-
 tools/perf/util/annotate.c | 276 +
 tools/perf/util/annotate.h |  10 +-
 tools/perf/util/unwind-libunwind.c |   4 +-
 8 files changed, 262 insertions(+), 87 deletions(-)

-- 
2.5.5



[PATCH v5 7/7] perf annotate: Fix jump target outside of function address range

2016-08-18 Thread Ravi Bangoria
If jump target is outside of function range, perf is not handling it
correctly. Especially when target address is lesser than function start
address, target offset will be negative. But, target address declared
to be unsigned, converts negative number into 2's complement. See below
example. Here target of 'jumpq' instruction at 34cf8 is 34ac0 which is
lesser than function start address(34cf0).

34ac0 - 34cf0 = -0x230 = 0xfdd0

Objdump output:

  00034cf0 <__sigaction>:
  __GI___sigaction():
34cf0: lea-0x20(%rdi),%eax
34cf3: cmp-bashx1,%eax
34cf6: jbe34d00 <__sigaction+0x10>
34cf8: jmpq   34ac0 <__GI___libc_sigaction>
34cfd: nopl   (%rax)
34d00: mov0x386161(%rip),%rax# 3bae68 <_DYNAMIC+0x2e8>
34d07: movl   -bashx16,%fs:(%rax)
34d0e: mov-bashx,%eax
34d13: retq

perf annotate before applying patch:

  __GI___sigaction  /usr/lib64/libc-2.22.so
   lea-0x20(%rdi),%eax
   cmp-bashx1,%eax
V  jbe10
V  jmpq   fdd0
   nop
10:mov_DYNAMIC+0x2e8,%rax
   movl   -bashx16,%fs:(%rax)
   mov-bashx,%eax
   retq

perf annotate after applying patch:

  __GI___sigaction  /usr/lib64/libc-2.22.so
   lea-0x20(%rdi),%eax
   cmp-bashx1,%eax
V  jbe10
^  jmpq   34ac0 <__GI___libc_sigaction>
   nop
10:mov_DYNAMIC+0x2e8,%rax
   movl   -bashx16,%fs:(%rax)
   mov-bashx,%eax
   retq

Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
---
Changes in v5:
  - New patch

 tools/perf/ui/browsers/annotate.c |  5 +++--
 tools/perf/util/annotate.c| 14 +-
 tools/perf/util/annotate.h|  5 +++--
 3 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/tools/perf/ui/browsers/annotate.c 
b/tools/perf/ui/browsers/annotate.c
index 21c5e10..c13df5b 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -215,7 +215,7 @@ static void annotate_browser__write(struct ui_browser 
*browser, void *entry, int
ui_browser__set_color(browser, color);
if (dl->ins && dl->ins->ops->scnprintf) {
if (ins__is_jump(dl->ins)) {
-   bool fwd = dl->ops.target.offset > 
(u64)dl->offset;
+   bool fwd = dl->ops.target.offset > dl->offset;
 
ui_browser__write_graph(browser, fwd ? 
SLSMG_DARROW_CHAR :

SLSMG_UARROW_CHAR);
@@ -245,7 +245,8 @@ static bool disasm_line__is_valid_jump(struct disasm_line 
*dl, struct symbol *sy
 {
if (!dl || !dl->ins || !ins__is_jump(dl->ins)
|| !disasm_line__has_offset(dl)
-   || dl->ops.target.offset >= symbol__size(sym))
+   || dl->ops.target.offset < 0
+   || dl->ops.target.offset >= (s64)symbol__size(sym))
return false;
 
return true;
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 678fb81..c8b017c 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -124,10 +124,12 @@ static int jump__parse(struct ins_operands *ops,
else
ops->target.addr = strtoull(ops->raw, NULL, 16);
 
-   if (s++ != NULL)
+   if (s++ != NULL) {
ops->target.offset = strtoull(s, NULL, 16);
-   else
-   ops->target.offset = UINT64_MAX;
+   ops->target.offset_avail = true;
+   } else {
+   ops->target.offset_avail = false;
+   }
 
return 0;
 }
@@ -135,7 +137,7 @@ static int jump__parse(struct ins_operands *ops,
 static int jump__scnprintf(struct ins *ins, char *bf, size_t size,
   struct ins_operands *ops)
 {
-   if (!ops->target.addr)
+   if (!ops->target.addr || ops->target.offset < 0)
return ins__raw_scnprintf(ins, bf, size, ops);
 
return scnprintf(bf, size, "%-6.6s %" PRIx64, ins->name, 
ops->target.offset);
@@ -1228,9 +1230,11 @@ static int symbol__parse_objdump_line(struct symbol 
*sym, struct map *map,
if (dl == NULL)
return -1;
 
-   if (dl->ops.target.offset == UINT64_MAX)
+   if (!disasm_line__has_offset(dl)) {
dl->ops.target.offset = dl->ops.target.addr -
map__rip_2objdump(map, sym->start);
+   dl->ops.target.offset_avail = true;
+   }
 
/* kcore has no symbols, so add the call target name */
if (dl->ins && ins__is_call(dl->ins) && !dl->ops.target.name) {
diff --git a/tools/perf/util/annotate.

[PATCH v5 6/7] perf annotate: Support jump instruction with target as second operand

2016-08-18 Thread Ravi Bangoria
Current perf is not able to parse jump instruction when second operand
contains target address. Arch like powerpc has such instructions. For
example, 'beq  cr7,10173e60'.

Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
---
Changes in v5:
  - New patch

 tools/perf/util/annotate.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 4a4a583..678fb81 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -117,8 +117,12 @@ static int jump__parse(struct ins_operands *ops,
   const char *norm_arch __maybe_unused)
 {
const char *s = strchr(ops->raw, '+');
+   const char *c = strchr(ops->raw, ',');
 
-   ops->target.addr = strtoull(ops->raw, NULL, 16);
+   if (c++ != NULL)
+   ops->target.addr = strtoull(c, NULL, 16);
+   else
+   ops->target.addr = strtoull(ops->raw, NULL, 16);
 
if (s++ != NULL)
ops->target.offset = strtoull(s, NULL, 16);
-- 
2.5.5



Re: [PATCH v4 0/3] perf annotate: Enable cross arch annotate

2016-08-19 Thread Ravi Bangoria
I've sent v5 series for this. Please review it.

Thanks,
Ravi

On Wednesday 13 July 2016 03:15 PM, Ravi Bangoria wrote:
> Arnaldo, Michael,
>
> I've tested this patchset on ppc64 BE and LE both. Please review this.
>
> -Ravi
>
> On Friday 08 July 2016 10:10 AM, Ravi Bangoria wrote:
>> Perf can currently only support code navigation (branches and calls) in
>> annotate when run on the same architecture where perf.data was recorded.
>> But cross arch annotate is not supported.
>>
>> This patchset enables cross arch annotate. Currently I've used x86
>> and arm instructions which are already available and adding support
>> for powerpc as well. Adding support for other arch will be easy.
>>
>> I've created this patch on top of acme/perf/core. And tested it with
>> x86 and powerpc only.
>>
>> Note for arm:
>> Few instructions were defined under #if __arm__ which I've used as a
>> table for arm. But I'm not sure whether instruction defined outside of
>> that also contains arm instructions. Apart from that, 'call__parse()'
>> and 'move__parse()' contains #ifdef __arm__ directive. I've changed it
>> to  if (!strcmp(norm_arch, arm)). I don't have a arm machine to test
>> these changes.
>>
>> Example:
>>
>>Record on powerpc:
>>$ ./perf record -a
>>
>>Report -> Annotate on x86:
>>$ ./perf report -i perf.data.powerpc --vmlinux vmlinux.powerpc
>>
>> Changes in v4:
>>- powerpc: Added support for branch instructions that includes 'ctr'
>>- __maybe_unused was misplaced at few location. Corrected it.
>>- Moved position of v3 last patch that define macro for each arch name
>>
>> v3 link: https://lkml.org/lkml/2016/6/30/99
>>
>> Naveen N. Rao (1):
>>perf annotate: add powerpc support
>>
>> Ravi Bangoria (2):
>>perf: Define macro for normalized arch names
>>perf annotate: Enable cross arch annotate
>>
>>   tools/perf/arch/common.c   |  36 ++---
>>   tools/perf/arch/common.h   |  11 ++
>>   tools/perf/builtin-top.c   |   2 +-
>>   tools/perf/ui/browsers/annotate.c  |   3 +-
>>   tools/perf/ui/gtk/annotate.c   |   2 +-
>>   tools/perf/util/annotate.c | 273 
>> ++---
>>   tools/perf/util/annotate.h |   6 +-
>>   tools/perf/util/unwind-libunwind.c |   4 +-
>>   8 files changed, 265 insertions(+), 72 deletions(-)
>>
>> -- 
>> 2.5.5
>>
>



[PATCH v5 3/7] perf annotate: Add support for powerpc

2016-08-18 Thread Ravi Bangoria
From: "Naveen N. Rao" <naveen.n@linux.vnet.ibm.com>

Current perf can disassemble annotated function but it does not have
parsing logic for powerpc instructions. So all navigation options are
not available for powerpc.

Apart from that, Powerpc has long list of branch instructions and
hardcoding them in table appears to be error-prone. So, add function
to find instruction instead of creating table. This function dynamically
create table (list of 'struct ins'), and instead of creating object
every time, first check if list already contain object for that
instruction.

Signed-off-by: Naveen N. Rao <naveen.n@linux.vnet.ibm.com>
Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
---
Changes in v5:
  - Removed hacks for instructions like bctr and bctrl from this patch.

 tools/perf/util/annotate.c | 116 +
 1 file changed, 116 insertions(+)

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index deb9af0..0b64841 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -459,6 +459,11 @@ static struct ins instructions_arm[] = {
{ .name = "bne",   .ops  = _ops, },
 };
 
+struct instructions_powerpc {
+   struct ins *ins;
+   struct list_head list;
+};
+
 static int ins__key_cmp(const void *name, const void *insp)
 {
const struct ins *ins = insp;
@@ -474,6 +479,115 @@ static int ins__cmp(const void *a, const void *b)
return strcmp(ia->name, ib->name);
 }
 
+static struct ins *list_add__ins_powerpc(struct instructions_powerpc *head,
+const char *name, struct ins_ops *ops)
+{
+   struct instructions_powerpc *ins_powerpc;
+   struct ins *ins;
+
+   ins = zalloc(sizeof(struct ins));
+   if (!ins)
+   return NULL;
+
+   ins_powerpc = zalloc(sizeof(struct instructions_powerpc));
+   if (!ins_powerpc)
+   goto out_free_ins;
+
+   ins->name = strdup(name);
+   if (!ins->name)
+   goto out_free_ins_power;
+
+   ins->ops = ops;
+   ins_powerpc->ins = ins;
+   list_add_tail(&(ins_powerpc->list), &(head->list));
+
+   return ins;
+
+out_free_ins_power:
+   zfree(_powerpc);
+out_free_ins:
+   zfree();
+   return NULL;
+}
+
+static struct ins *list_search__ins_powerpc(struct instructions_powerpc *head,
+   const char *name)
+{
+   struct instructions_powerpc *pos;
+
+   list_for_each_entry(pos, >list, list) {
+   if (!strcmp(pos->ins->name, name))
+   return pos->ins;
+   }
+   return NULL;
+}
+
+static struct ins *ins__find_powerpc(const char *name)
+{
+   int i;
+   struct ins *ins;
+   struct ins_ops *ops;
+   static struct instructions_powerpc head;
+   static bool list_initialized;
+
+   /*
+* - Interested only if instruction starts with 'b'.
+* - Few start with 'b', but aren't branch instructions.
+*/
+   if (name[0] != 'b' ||
+   !strncmp(name, "bcd", 3)   ||
+   !strncmp(name, "brinc", 5) ||
+   !strncmp(name, "bper", 4))
+   return NULL;
+
+   if (!list_initialized) {
+   INIT_LIST_HEAD();
+   list_initialized = true;
+   }
+
+   /*
+* Return if we already have object of 'struct ins' for this instruction
+*/
+   ins = list_search__ins_powerpc(, name);
+   if (ins)
+   return ins;
+
+   ops = _ops;
+
+   i = strlen(name) - 1;
+   if (i < 0)
+   return NULL;
+
+   /* ignore optional hints at the end of the instructions */
+   if (name[i] == '+' || name[i] == '-')
+   i--;
+
+   if (name[i] == 'l' || (name[i] == 'a' && name[i-1] == 'l')) {
+   /*
+* if the instruction ends up with 'l' or 'la', then
+* those are considered 'calls' since they update LR.
+* ... except for 'bnl' which is branch if not less than
+* and the absolute form of the same.
+*/
+   if (strcmp(name, "bnl") && strcmp(name, "bnl+") &&
+   strcmp(name, "bnl-") && strcmp(name, "bnla") &&
+   strcmp(name, "bnla+") && strcmp(name, "bnla-"))
+   ops = _ops;
+   }
+   if (name[i] == 'r' && name[i-1] == 'l')
+   /*
+* instructions ending with 'lr' are considered to be
+* return instructions
+*/
+   ops = _ops;
+
+   /*
+* Add instruction to list so next time no need to
+* allocate memory for it.
+*/
+   

[PATCH v5 1/7] perf: Define macro for normalized arch names

2016-08-18 Thread Ravi Bangoria
Define macro for each normalized arch name and use them instead
of using arch name as string.

Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
---
Changes in v5:
  - No changes.

 tools/perf/arch/common.c   | 36 ++--
 tools/perf/arch/common.h   | 11 +++
 tools/perf/util/unwind-libunwind.c |  4 ++--
 3 files changed, 31 insertions(+), 20 deletions(-)

diff --git a/tools/perf/arch/common.c b/tools/perf/arch/common.c
index 886dd2a..f763666 100644
--- a/tools/perf/arch/common.c
+++ b/tools/perf/arch/common.c
@@ -123,25 +123,25 @@ static int lookup_triplets(const char *const *triplets, 
const char *name)
 const char *normalize_arch(char *arch)
 {
if (!strcmp(arch, "x86_64"))
-   return "x86";
+   return NORM_X86;
if (arch[0] == 'i' && arch[2] == '8' && arch[3] == '6')
-   return "x86";
+   return NORM_X86;
if (!strcmp(arch, "sun4u") || !strncmp(arch, "sparc", 5))
-   return "sparc";
+   return NORM_SPARC;
if (!strcmp(arch, "aarch64") || !strcmp(arch, "arm64"))
-   return "arm64";
+   return NORM_ARM64;
if (!strncmp(arch, "arm", 3) || !strcmp(arch, "sa110"))
-   return "arm";
+   return NORM_ARM;
if (!strncmp(arch, "s390", 4))
-   return "s390";
+   return NORM_S390;
if (!strncmp(arch, "parisc", 6))
-   return "parisc";
+   return NORM_PARISC;
if (!strncmp(arch, "powerpc", 7) || !strncmp(arch, "ppc", 3))
-   return "powerpc";
+   return NORM_POWERPC;
if (!strncmp(arch, "mips", 4))
-   return "mips";
+   return NORM_MIPS;
if (!strncmp(arch, "sh", 2) && isdigit(arch[2]))
-   return "sh";
+   return NORM_SH;
 
return arch;
 }
@@ -181,21 +181,21 @@ static int perf_env__lookup_binutils_path(struct perf_env 
*env,
zfree();
}
 
-   if (!strcmp(arch, "arm"))
+   if (!strcmp(arch, NORM_ARM))
path_list = arm_triplets;
-   else if (!strcmp(arch, "arm64"))
+   else if (!strcmp(arch, NORM_ARM64))
path_list = arm64_triplets;
-   else if (!strcmp(arch, "powerpc"))
+   else if (!strcmp(arch, NORM_POWERPC))
path_list = powerpc_triplets;
-   else if (!strcmp(arch, "sh"))
+   else if (!strcmp(arch, NORM_SH))
path_list = sh_triplets;
-   else if (!strcmp(arch, "s390"))
+   else if (!strcmp(arch, NORM_S390))
path_list = s390_triplets;
-   else if (!strcmp(arch, "sparc"))
+   else if (!strcmp(arch, NORM_SPARC))
path_list = sparc_triplets;
-   else if (!strcmp(arch, "x86"))
+   else if (!strcmp(arch, NORM_X86))
path_list = x86_triplets;
-   else if (!strcmp(arch, "mips"))
+   else if (!strcmp(arch, NORM_MIPS))
path_list = mips_triplets;
else {
ui__error("binutils for %s not supported.\n", arch);
diff --git a/tools/perf/arch/common.h b/tools/perf/arch/common.h
index 6b01c73..14ca8ca 100644
--- a/tools/perf/arch/common.h
+++ b/tools/perf/arch/common.h
@@ -5,6 +5,17 @@
 
 extern const char *objdump_path;
 
+/* Macro for normalized arch names */
+#define NORM_X86   "x86"
+#define NORM_SPARC "sparc"
+#define NORM_ARM64 "arm64"
+#define NORM_ARM   "arm"
+#define NORM_S390  "s390"
+#define NORM_PARISC"parisc"
+#define NORM_POWERPC   "powerpc"
+#define NORM_MIPS  "mips"
+#define NORM_SH"sh"
+
 int perf_env__lookup_objdump(struct perf_env *env);
 const char *normalize_arch(char *arch);
 
diff --git a/tools/perf/util/unwind-libunwind.c 
b/tools/perf/util/unwind-libunwind.c
index 6d542a4..6199102 100644
--- a/tools/perf/util/unwind-libunwind.c
+++ b/tools/perf/util/unwind-libunwind.c
@@ -40,10 +40,10 @@ int unwind__prepare_access(struct thread *thread, struct 
map *map,
 
arch = normalize_arch(thread->mg->machine->env->arch);
 
-   if (!strcmp(arch, "x86")) {
+   if (!strcmp(arch, NORM_X86)) {
if (dso_type != DSO__TYPE_64BIT)
ops = x86_32_unwind_libunwind_ops;
-   } else if (!strcmp(arch, "arm64") || !strcmp(arch, "arm")) {
+   } else if (!strcmp(arch, NORM_ARM64) || !strcmp(arch, NORM_ARM)) {
if (dso_type == DSO__TYPE_64BIT)
ops = arm64_unwind_libunwind_ops;
}
-- 
2.5.5



[PATCH v5 5/7] perf annotate: Show raw form for jump instruction with indirect target

2016-08-18 Thread Ravi Bangoria
For jump instructions that does not include target address as direct
operand, use raw value for that. This is needed for certain powerpc
jump instructions that use target address in a register (such as bctr,
btar, ...).

Suggested-by: Michael Ellerman <m...@ellerman.id.au>
Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
---
Changes in v5:
  - New patch introduced to annotate jump instruction with indirect target

 tools/perf/util/annotate.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 6368ba9..4a4a583 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -131,6 +131,9 @@ static int jump__parse(struct ins_operands *ops,
 static int jump__scnprintf(struct ins *ins, char *bf, size_t size,
   struct ins_operands *ops)
 {
+   if (!ops->target.addr)
+   return ins__raw_scnprintf(ins, bf, size, ops);
+
return scnprintf(bf, size, "%-6.6s %" PRIx64, ins->name, 
ops->target.offset);
 }
 
-- 
2.5.5



[PATCH v5 2/7] perf annotate: Add cross arch annotate support

2016-08-18 Thread Ravi Bangoria
Change current data structures and function to enable cross arch
annotate.

Current perf implementation does not support cross arch annotate.
To make it truly cross arch, instruction table of all arch should
be present in perf binary. And use appropriate table based on arch
where perf.data was recorded.

Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
---
Changes in v5:
  - Replaced symbol__annotate with symbol__disassemble.

 tools/perf/builtin-top.c  |   2 +-
 tools/perf/ui/browsers/annotate.c |   3 +-
 tools/perf/ui/gtk/annotate.c  |   2 +-
 tools/perf/util/annotate.c| 133 --
 tools/perf/util/annotate.h|   5 +-
 5 files changed, 92 insertions(+), 53 deletions(-)

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index a3223aa..fdd4203 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -129,7 +129,7 @@ static int perf_top__parse_source(struct perf_top *top, 
struct hist_entry *he)
return err;
}
 
-   err = symbol__disassemble(sym, map, 0);
+   err = symbol__disassemble(sym, map, 0, NULL);
if (err == 0) {
 out_assign:
top->sym_filter_entry = he;
diff --git a/tools/perf/ui/browsers/annotate.c 
b/tools/perf/ui/browsers/annotate.c
index 2e2d100..21c5e10 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -1050,7 +1050,8 @@ int symbol__tui_annotate(struct symbol *sym, struct map 
*map,
  (nr_pcnt - 1);
}
 
-   err = symbol__disassemble(sym, map, sizeof_bdl);
+   err = symbol__disassemble(sym, map, sizeof_bdl,
+ perf_evsel__env_arch(evsel));
if (err) {
char msg[BUFSIZ];
symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg));
diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c
index 42d3199..c127aba 100644
--- a/tools/perf/ui/gtk/annotate.c
+++ b/tools/perf/ui/gtk/annotate.c
@@ -167,7 +167,7 @@ static int symbol__gtk_annotate(struct symbol *sym, struct 
map *map,
if (map->dso->annotate_warned)
return -1;
 
-   err = symbol__disassemble(sym, map, 0);
+   err = symbol__disassemble(sym, map, 0, perf_evsel__env_arch(evsel));
if (err) {
char msg[BUFSIZ];
symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg));
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 25a9259..deb9af0 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -20,12 +20,14 @@
 #include 
 #include 
 #include 
+#include 
+#include "../arch/common.h"
 
 const char *disassembler_style;
 const char *objdump_path;
 static regex_t  file_lineno;
 
-static struct ins *ins__find(const char *name);
+static struct ins *ins__find(const char *name, const char *norm_arch);
 static int disasm_line__parse(char *line, char **namep, char **rawp);
 
 static void ins__delete(struct ins_operands *ops)
@@ -53,7 +55,7 @@ int ins__scnprintf(struct ins *ins, char *bf, size_t size,
return ins__raw_scnprintf(ins, bf, size, ops);
 }
 
-static int call__parse(struct ins_operands *ops)
+static int call__parse(struct ins_operands *ops, const char *norm_arch)
 {
char *endptr, *tok, *name;
 
@@ -65,10 +67,8 @@ static int call__parse(struct ins_operands *ops)
 
name++;
 
-#ifdef __arm__
-   if (strchr(name, '+'))
+   if (!strcmp(norm_arch, NORM_ARM) && strchr(name, '+'))
return -1;
-#endif
 
tok = strchr(name, '>');
if (tok == NULL)
@@ -117,7 +117,8 @@ bool ins__is_call(const struct ins *ins)
return ins->ops == _ops;
 }
 
-static int jump__parse(struct ins_operands *ops)
+static int jump__parse(struct ins_operands *ops,
+  const char *norm_arch __maybe_unused)
 {
const char *s = strchr(ops->raw, '+');
 
@@ -172,7 +173,7 @@ static int comment__symbol(char *raw, char *comment, u64 
*addrp, char **namep)
return 0;
 }
 
-static int lock__parse(struct ins_operands *ops)
+static int lock__parse(struct ins_operands *ops, const char *norm_arch)
 {
char *name;
 
@@ -183,7 +184,7 @@ static int lock__parse(struct ins_operands *ops)
if (disasm_line__parse(ops->raw, , >locked.ops->raw) < 0)
goto out_free_ops;
 
-   ops->locked.ins = ins__find(name);
+   ops->locked.ins = ins__find(name, norm_arch);
free(name);
 
if (ops->locked.ins == NULL)
@@ -193,7 +194,7 @@ static int lock__parse(struct ins_operands *ops)
return 0;
 
if (ops->locked.ins->ops->parse &&
-   ops->locked.ins->ops->parse(ops->locked.ops) < 0)
+   ops->locked.ins->ops->parse(ops->locked.ops, norm_arch) < 0)
goto out_free_ops;
 

[PATCH v5 4/7] perf annotate: Do not ignore call instruction with indirect target

2016-08-18 Thread Ravi Bangoria
Do not ignore call instruction with indirect target when its already
identified as a call. This is an extension of commit e8ea1561952b
("perf annotate: Use raw form for register indirect call instructions")
to generalize annotation for all instructions with indirect calls.

This is needed for certain powerpc call instructions that use address
in a register (such as bctrl, btarl, ...).

Apart from that, when kcore is used to disassemble function, all call
instructions were ignored. This patch will fix it as a side effect by
not ignoring them. For example,

Before (with kcore):
   mov%r13,%rdi
   callq  0x811a7e70
 ^ jmpq   64
   mov%gs:0x7ef41a6e(%rip),%al

After (with kcore):
   mov%r13,%rdi
 > callq  0x811a7e70
 ^ jmpq   64
   mov%gs:0x7ef41a6e(%rip),%al

Suggested-by: Michael Ellerman <m...@ellerman.id.au>
[Suggested about 'bctrl' instruction]
Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
---
Changes in v5:
  - New patch, introduced to annotate all indirect call instructions.

 tools/perf/util/annotate.c | 8 ++--
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 0b64841..6368ba9 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -81,16 +81,12 @@ static int call__parse(struct ins_operands *ops, const char 
*norm_arch)
return ops->target.name == NULL ? -1 : 0;
 
 indirect_call:
-   tok = strchr(endptr, '(');
-   if (tok != NULL) {
+   tok = strchr(endptr, '*');
+   if (tok == NULL) {
ops->target.addr = 0;
return 0;
}
 
-   tok = strchr(endptr, '*');
-   if (tok == NULL)
-   return -1;
-
ops->target.addr = strtoull(tok + 1, NULL, 16);
return 0;
 }
-- 
2.5.5



Re: [PATCH v5 2/7] perf annotate: Add cross arch annotate support

2016-08-19 Thread Ravi Bangoria


On Friday 19 August 2016 04:18 PM, Russell King - ARM Linux wrote:
> On Fri, Aug 19, 2016 at 04:09:51PM +0530, Ravi Bangoria wrote:
>> Thanks Russell for reviewing.
>>
>> On Friday 19 August 2016 01:20 PM, Russell King - ARM Linux wrote:
>>> On Fri, Aug 19, 2016 at 10:59:01AM +0530, Ravi Bangoria wrote:
>>>> -static struct ins instructions[] = {
>>>> +static struct ins instructions_x86[] = {
>>>>{ .name = "add",   .ops  = _ops, },
>>>>{ .name = "addl",  .ops  = _ops, },
>>>>{ .name = "addq",  .ops  = _ops, },
>>>>{ .name = "addw",  .ops  = _ops, },
>>>>{ .name = "and",   .ops  = _ops, },
>>>> -#ifdef __arm__
>>>> -  { .name = "b", .ops  = _ops, }, // might also be a call
>>>> -  { .name = "bcc",   .ops  = _ops, },
>>>> -  { .name = "bcs",   .ops  = _ops, },
>>>> -  { .name = "beq",   .ops  = _ops, },
>>>> -  { .name = "bge",   .ops  = _ops, },
>>>> -  { .name = "bgt",   .ops  = _ops, },
>>>> -  { .name = "bhi",   .ops  = _ops, },
>>>> -  { .name = "bl",.ops  = _ops, },
>>>> -  { .name = "bls",   .ops  = _ops, },
>>>> -  { .name = "blt",   .ops  = _ops, },
>>>> -  { .name = "blx",   .ops  = _ops, },
>>>> -  { .name = "bne",   .ops  = _ops, },
>>>> -#endif
>>> Notice that ARM includes a lot of other instructions from this table,
>>> not just those above.
>>>
>>>>{ .name = "bts",   .ops  = _ops, },
>>>>{ .name = "call",  .ops  = _ops, },
>>>>{ .name = "callq", .ops  = _ops, },
>>>> @@ -456,6 +444,21 @@ static struct ins instructions[] = {
>>>>{ .name = "retq",  .ops  = _ops, },
>>>>  };
>>>>  
>>>> +static struct ins instructions_arm[] = {
>>>> +  { .name = "b", .ops  = _ops, }, /* might also be a call */
>>>> +  { .name = "bcc",   .ops  = _ops, },
>>>> +  { .name = "bcs",   .ops  = _ops, },
>>>> +  { .name = "beq",   .ops  = _ops, },
>>>> +  { .name = "bge",   .ops  = _ops, },
>>>> +  { .name = "bgt",   .ops  = _ops, },
>>>> +  { .name = "bhi",   .ops  = _ops, },
>>>> +  { .name = "bl",.ops  = _ops, },
>>>> +  { .name = "bls",   .ops  = _ops, },
>>>> +  { .name = "blt",   .ops  = _ops, },
>>>> +  { .name = "blx",   .ops  = _ops, },
>>>> +  { .name = "bne",   .ops  = _ops, },
>>>> +};
>>>> +
>>> ...
>>>> +  if (!strcmp(norm_arch, NORM_X86)) {
>>>> +  instructions = instructions_x86;
>>>> +  nmemb = ARRAY_SIZE(instructions_x86);
>>>> +  } else if (!strcmp(norm_arch, NORM_ARM)) {
>>>> +  instructions = instructions_arm;
>>>> +  nmemb = ARRAY_SIZE(instructions_arm);
>>> But these changes result in _only_ the ones that were in the #if __arm__
>>> being matched.  This is wrong.
>>>
>>> If we want to go that way, we need to add _all_ arm instructions to
>>> instructions_arm, not just those within the #if.
>> Yes, I've mentioned same in cover letter as well.
>>
>> Can I add all x86 instructions for arm as well? If not, can you please
>> provide a list of arm instructions that needs to be added here.
> If it were me doing a change like this, I'd be trying to preserve the
> current behaviour to avoid causing regressions, which would mean
> ensuring that all the instructions that were visible before the change
> remain visible after the change, even those which are obviously x86
> specific but were still in the table anyway.  It then becomes a cleanup
> matter later to remove those which aren't relevent, rather than having
> to chase around wondering why the tool broke.
>
> I'm afraid I don't have time to look at this (I'm chasing regressions
> and bugs in the kernel) so I'd suggest you try to avoid causing
> regressions in this tool...
>

Yes Russell, Fair point. Will send a next series.

-Ravi



[PATCH v6 1/7] perf: Define macro for normalized arch names

2016-08-19 Thread Ravi Bangoria
Define macro for each normalized arch name and use them instead
of using arch name as string.

Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
---
Changes in v6:
  - No change

 tools/perf/arch/common.c   | 36 ++--
 tools/perf/arch/common.h   | 11 +++
 tools/perf/util/unwind-libunwind.c |  4 ++--
 3 files changed, 31 insertions(+), 20 deletions(-)

diff --git a/tools/perf/arch/common.c b/tools/perf/arch/common.c
index 886dd2a..f763666 100644
--- a/tools/perf/arch/common.c
+++ b/tools/perf/arch/common.c
@@ -123,25 +123,25 @@ static int lookup_triplets(const char *const *triplets, 
const char *name)
 const char *normalize_arch(char *arch)
 {
if (!strcmp(arch, "x86_64"))
-   return "x86";
+   return NORM_X86;
if (arch[0] == 'i' && arch[2] == '8' && arch[3] == '6')
-   return "x86";
+   return NORM_X86;
if (!strcmp(arch, "sun4u") || !strncmp(arch, "sparc", 5))
-   return "sparc";
+   return NORM_SPARC;
if (!strcmp(arch, "aarch64") || !strcmp(arch, "arm64"))
-   return "arm64";
+   return NORM_ARM64;
if (!strncmp(arch, "arm", 3) || !strcmp(arch, "sa110"))
-   return "arm";
+   return NORM_ARM;
if (!strncmp(arch, "s390", 4))
-   return "s390";
+   return NORM_S390;
if (!strncmp(arch, "parisc", 6))
-   return "parisc";
+   return NORM_PARISC;
if (!strncmp(arch, "powerpc", 7) || !strncmp(arch, "ppc", 3))
-   return "powerpc";
+   return NORM_POWERPC;
if (!strncmp(arch, "mips", 4))
-   return "mips";
+   return NORM_MIPS;
if (!strncmp(arch, "sh", 2) && isdigit(arch[2]))
-   return "sh";
+   return NORM_SH;
 
return arch;
 }
@@ -181,21 +181,21 @@ static int perf_env__lookup_binutils_path(struct perf_env 
*env,
zfree();
}
 
-   if (!strcmp(arch, "arm"))
+   if (!strcmp(arch, NORM_ARM))
path_list = arm_triplets;
-   else if (!strcmp(arch, "arm64"))
+   else if (!strcmp(arch, NORM_ARM64))
path_list = arm64_triplets;
-   else if (!strcmp(arch, "powerpc"))
+   else if (!strcmp(arch, NORM_POWERPC))
path_list = powerpc_triplets;
-   else if (!strcmp(arch, "sh"))
+   else if (!strcmp(arch, NORM_SH))
path_list = sh_triplets;
-   else if (!strcmp(arch, "s390"))
+   else if (!strcmp(arch, NORM_S390))
path_list = s390_triplets;
-   else if (!strcmp(arch, "sparc"))
+   else if (!strcmp(arch, NORM_SPARC))
path_list = sparc_triplets;
-   else if (!strcmp(arch, "x86"))
+   else if (!strcmp(arch, NORM_X86))
path_list = x86_triplets;
-   else if (!strcmp(arch, "mips"))
+   else if (!strcmp(arch, NORM_MIPS))
path_list = mips_triplets;
else {
ui__error("binutils for %s not supported.\n", arch);
diff --git a/tools/perf/arch/common.h b/tools/perf/arch/common.h
index 6b01c73..14ca8ca 100644
--- a/tools/perf/arch/common.h
+++ b/tools/perf/arch/common.h
@@ -5,6 +5,17 @@
 
 extern const char *objdump_path;
 
+/* Macro for normalized arch names */
+#define NORM_X86   "x86"
+#define NORM_SPARC "sparc"
+#define NORM_ARM64 "arm64"
+#define NORM_ARM   "arm"
+#define NORM_S390  "s390"
+#define NORM_PARISC"parisc"
+#define NORM_POWERPC   "powerpc"
+#define NORM_MIPS  "mips"
+#define NORM_SH"sh"
+
 int perf_env__lookup_objdump(struct perf_env *env);
 const char *normalize_arch(char *arch);
 
diff --git a/tools/perf/util/unwind-libunwind.c 
b/tools/perf/util/unwind-libunwind.c
index 6d542a4..6199102 100644
--- a/tools/perf/util/unwind-libunwind.c
+++ b/tools/perf/util/unwind-libunwind.c
@@ -40,10 +40,10 @@ int unwind__prepare_access(struct thread *thread, struct 
map *map,
 
arch = normalize_arch(thread->mg->machine->env->arch);
 
-   if (!strcmp(arch, "x86")) {
+   if (!strcmp(arch, NORM_X86)) {
if (dso_type != DSO__TYPE_64BIT)
ops = x86_32_unwind_libunwind_ops;
-   } else if (!strcmp(arch, "arm64") || !strcmp(arch, "arm")) {
+   } else if (!strcmp(arch, NORM_ARM64) || !strcmp(arch, NORM_ARM)) {
if (dso_type == DSO__TYPE_64BIT)
ops = arm64_unwind_libunwind_ops;
}
-- 
2.5.5



[PATCH v6 7/7] perf annotate: Fix jump target outside of function address range

2016-08-19 Thread Ravi Bangoria
If jump target is outside of function range, perf is not handling it
correctly. Especially when target address is lesser than function start
address, target offset will be negative. But, target address declared
to be unsigned, converts negative number into 2's complement. See below
example. Here target of 'jumpq' instruction at 34cf8 is 34ac0 which is
lesser than function start address(34cf0).

34ac0 - 34cf0 = -0x230 = 0xfdd0

Objdump output:

  00034cf0 <__sigaction>:
  __GI___sigaction():
34cf0: lea-0x20(%rdi),%eax
34cf3: cmp-bashx1,%eax
34cf6: jbe34d00 <__sigaction+0x10>
34cf8: jmpq   34ac0 <__GI___libc_sigaction>
34cfd: nopl   (%rax)
34d00: mov0x386161(%rip),%rax# 3bae68 <_DYNAMIC+0x2e8>
34d07: movl   -bashx16,%fs:(%rax)
34d0e: mov-bashx,%eax
34d13: retq

perf annotate before applying patch:

  __GI___sigaction  /usr/lib64/libc-2.22.so
   lea-0x20(%rdi),%eax
   cmp-bashx1,%eax
V  jbe10
V  jmpq   fdd0
   nop
10:mov_DYNAMIC+0x2e8,%rax
   movl   -bashx16,%fs:(%rax)
   mov-bashx,%eax
   retq

perf annotate after applying patch:

  __GI___sigaction  /usr/lib64/libc-2.22.so
   lea-0x20(%rdi),%eax
   cmp-bashx1,%eax
V  jbe10
^  jmpq   34ac0 <__GI___libc_sigaction>
   nop
10:mov_DYNAMIC+0x2e8,%rax
   movl   -bashx16,%fs:(%rax)
   mov-bashx,%eax
   retq

Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
---
Changes in v6:
  - No changes

 tools/perf/ui/browsers/annotate.c |  5 +++--
 tools/perf/util/annotate.c| 14 +-
 tools/perf/util/annotate.h|  5 +++--
 3 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/tools/perf/ui/browsers/annotate.c 
b/tools/perf/ui/browsers/annotate.c
index 21c5e10..c13df5b 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -215,7 +215,7 @@ static void annotate_browser__write(struct ui_browser 
*browser, void *entry, int
ui_browser__set_color(browser, color);
if (dl->ins && dl->ins->ops->scnprintf) {
if (ins__is_jump(dl->ins)) {
-   bool fwd = dl->ops.target.offset > 
(u64)dl->offset;
+   bool fwd = dl->ops.target.offset > dl->offset;
 
ui_browser__write_graph(browser, fwd ? 
SLSMG_DARROW_CHAR :

SLSMG_UARROW_CHAR);
@@ -245,7 +245,8 @@ static bool disasm_line__is_valid_jump(struct disasm_line 
*dl, struct symbol *sy
 {
if (!dl || !dl->ins || !ins__is_jump(dl->ins)
|| !disasm_line__has_offset(dl)
-   || dl->ops.target.offset >= symbol__size(sym))
+   || dl->ops.target.offset < 0
+   || dl->ops.target.offset >= (s64)symbol__size(sym))
return false;
 
return true;
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 73c4f48..9409d54 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -124,10 +124,12 @@ static int jump__parse(struct ins_operands *ops,
else
ops->target.addr = strtoull(ops->raw, NULL, 16);
 
-   if (s++ != NULL)
+   if (s++ != NULL) {
ops->target.offset = strtoull(s, NULL, 16);
-   else
-   ops->target.offset = UINT64_MAX;
+   ops->target.offset_avail = true;
+   } else {
+   ops->target.offset_avail = false;
+   }
 
return 0;
 }
@@ -135,7 +137,7 @@ static int jump__parse(struct ins_operands *ops,
 static int jump__scnprintf(struct ins *ins, char *bf, size_t size,
   struct ins_operands *ops)
 {
-   if (!ops->target.addr)
+   if (!ops->target.addr || ops->target.offset < 0)
return ins__raw_scnprintf(ins, bf, size, ops);
 
return scnprintf(bf, size, "%-6.6s %" PRIx64, ins->name, 
ops->target.offset);
@@ -1304,9 +1306,11 @@ static int symbol__parse_objdump_line(struct symbol 
*sym, struct map *map,
if (dl == NULL)
return -1;
 
-   if (dl->ops.target.offset == UINT64_MAX)
+   if (!disasm_line__has_offset(dl)) {
dl->ops.target.offset = dl->ops.target.addr -
map__rip_2objdump(map, sym->start);
+   dl->ops.target.offset_avail = true;
+   }
 
/* kcore has no symbols, so add the call target name */
if (dl->ins && ins__is_call(dl->ins) && !dl->ops.target.name) {
diff --git a/tools/perf/util/annotate.

[PATCH v6 6/7] perf annotate: Support jump instruction with target as second operand

2016-08-19 Thread Ravi Bangoria
Current perf is not able to parse jump instruction when second operand
contains target address. Arch like powerpc has such instructions. For
example, 'beq  cr7,10173e60'.

Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
---
Changes in v6:
  - No changes

 tools/perf/util/annotate.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 7ecb1b8..73c4f48 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -117,8 +117,12 @@ static int jump__parse(struct ins_operands *ops,
   const char *norm_arch __maybe_unused)
 {
const char *s = strchr(ops->raw, '+');
+   const char *c = strchr(ops->raw, ',');
 
-   ops->target.addr = strtoull(ops->raw, NULL, 16);
+   if (c++ != NULL)
+   ops->target.addr = strtoull(c, NULL, 16);
+   else
+   ops->target.addr = strtoull(ops->raw, NULL, 16);
 
if (s++ != NULL)
ops->target.offset = strtoull(s, NULL, 16);
-- 
2.5.5



[PATCH v6 0/7] perf: Cross arch annotate + few miscellaneous fixes

2016-08-19 Thread Ravi Bangoria
Currently Perf annotate support code navigation (branches and calls)
only when run on the same architecture where perf.data was recorded.
But, for example, record on powerpc server and annotate on client's
x86 desktop is not supported.

This patchset enables cross arch annotate. Currently I've used x86
and arm instructions which are already available and added support
for powerpc.

Additionally this patch series also contains few other related fixes.

Patches are prepared on top of acme/perf/core and tested it with x86
and powerpc only.

Note for arm:
I don't have arm test machine. As suggested by Russell in one of the
review comment, I've copied all instructions from default table to
arm table. This way it want break tool on arm but cleanup is needed
for x86 specific instructions added in arm table.

Example:

  Record on powerpc:
  $ ./perf record -a

  Report -> Annotate on x86:
  $ ./perf report -i perf.data.powerpc --vmlinux vmlinux.powerpc

Changes in v6:
  - Instead of adding only those instructions defined in #ifdef __arm__,
add all instructions from default table to arm table.

v5 link:
  https://lkml.org/lkml/2016/8/19/35

Naveen N. Rao (1):
  perf annotate: Add support for powerpc

Ravi Bangoria (6):
  perf: Define macro for normalized arch names
  perf annotate: Add cross arch annotate support
  perf annotate: Do not ignore call instruction with indirect target
  perf annotate: Show raw form for jump instruction with indirect target
  perf annotate: Support jump instruction with target as second operand
  perf annotate: Fix jump target outside of function address range

 tools/perf/arch/common.c   |  36 ++--
 tools/perf/arch/common.h   |  11 ++
 tools/perf/builtin-top.c   |   2 +-
 tools/perf/ui/browsers/annotate.c  |   8 +-
 tools/perf/ui/gtk/annotate.c   |   2 +-
 tools/perf/util/annotate.c | 330 +++--
 tools/perf/util/annotate.h |  10 +-
 tools/perf/util/unwind-libunwind.c |   4 +-
 8 files changed, 327 insertions(+), 76 deletions(-)

-- 
2.5.5



[PATCH v6 5/7] perf annotate: Show raw form for jump instruction with indirect target

2016-08-19 Thread Ravi Bangoria
For jump instructions that does not include target address as direct
operand, use raw value for that. This is needed for certain powerpc
jump instructions that use target address in a register (such as bctr,
btar, ...).

Suggested-by: Michael Ellerman <m...@ellerman.id.au>
Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
---
Changes in v6:
  - No changes

 tools/perf/util/annotate.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index a05423b..7ecb1b8 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -131,6 +131,9 @@ static int jump__parse(struct ins_operands *ops,
 static int jump__scnprintf(struct ins *ins, char *bf, size_t size,
   struct ins_operands *ops)
 {
+   if (!ops->target.addr)
+   return ins__raw_scnprintf(ins, bf, size, ops);
+
return scnprintf(bf, size, "%-6.6s %" PRIx64, ins->name, 
ops->target.offset);
 }
 
-- 
2.5.5



[PATCH v6 2/7] perf annotate: Add cross arch annotate support

2016-08-19 Thread Ravi Bangoria
Change current data structures and function to enable cross arch
annotate.

Current perf implementation does not support cross arch annotate.
To make it truly cross arch, instruction table of all arch should
be present in perf binary. And use appropriate table based on arch
where perf.data was recorded.

Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
---
Changes in v6:
  - Instead of adding only those instructions defined in #ifdef __arm__,
add all instructions from default table to arm table.

 tools/perf/builtin-top.c  |   2 +-
 tools/perf/ui/browsers/annotate.c |   3 +-
 tools/perf/ui/gtk/annotate.c  |   2 +-
 tools/perf/util/annotate.c| 187 ++
 tools/perf/util/annotate.h|   5 +-
 5 files changed, 157 insertions(+), 42 deletions(-)

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index a3223aa..fdd4203 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -129,7 +129,7 @@ static int perf_top__parse_source(struct perf_top *top, 
struct hist_entry *he)
return err;
}
 
-   err = symbol__disassemble(sym, map, 0);
+   err = symbol__disassemble(sym, map, 0, NULL);
if (err == 0) {
 out_assign:
top->sym_filter_entry = he;
diff --git a/tools/perf/ui/browsers/annotate.c 
b/tools/perf/ui/browsers/annotate.c
index 2e2d100..21c5e10 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -1050,7 +1050,8 @@ int symbol__tui_annotate(struct symbol *sym, struct map 
*map,
  (nr_pcnt - 1);
}
 
-   err = symbol__disassemble(sym, map, sizeof_bdl);
+   err = symbol__disassemble(sym, map, sizeof_bdl,
+ perf_evsel__env_arch(evsel));
if (err) {
char msg[BUFSIZ];
symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg));
diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c
index 42d3199..c127aba 100644
--- a/tools/perf/ui/gtk/annotate.c
+++ b/tools/perf/ui/gtk/annotate.c
@@ -167,7 +167,7 @@ static int symbol__gtk_annotate(struct symbol *sym, struct 
map *map,
if (map->dso->annotate_warned)
return -1;
 
-   err = symbol__disassemble(sym, map, 0);
+   err = symbol__disassemble(sym, map, 0, perf_evsel__env_arch(evsel));
if (err) {
char msg[BUFSIZ];
symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg));
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 25a9259..14a8808 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -20,12 +20,14 @@
 #include 
 #include 
 #include 
+#include 
+#include "../arch/common.h"
 
 const char *disassembler_style;
 const char *objdump_path;
 static regex_t  file_lineno;
 
-static struct ins *ins__find(const char *name);
+static struct ins *ins__find(const char *name, const char *norm_arch);
 static int disasm_line__parse(char *line, char **namep, char **rawp);
 
 static void ins__delete(struct ins_operands *ops)
@@ -53,7 +55,7 @@ int ins__scnprintf(struct ins *ins, char *bf, size_t size,
return ins__raw_scnprintf(ins, bf, size, ops);
 }
 
-static int call__parse(struct ins_operands *ops)
+static int call__parse(struct ins_operands *ops, const char *norm_arch)
 {
char *endptr, *tok, *name;
 
@@ -65,10 +67,8 @@ static int call__parse(struct ins_operands *ops)
 
name++;
 
-#ifdef __arm__
-   if (strchr(name, '+'))
+   if (!strcmp(norm_arch, NORM_ARM) && strchr(name, '+'))
return -1;
-#endif
 
tok = strchr(name, '>');
if (tok == NULL)
@@ -117,7 +117,8 @@ bool ins__is_call(const struct ins *ins)
return ins->ops == _ops;
 }
 
-static int jump__parse(struct ins_operands *ops)
+static int jump__parse(struct ins_operands *ops,
+  const char *norm_arch __maybe_unused)
 {
const char *s = strchr(ops->raw, '+');
 
@@ -172,7 +173,7 @@ static int comment__symbol(char *raw, char *comment, u64 
*addrp, char **namep)
return 0;
 }
 
-static int lock__parse(struct ins_operands *ops)
+static int lock__parse(struct ins_operands *ops, const char *norm_arch)
 {
char *name;
 
@@ -183,7 +184,7 @@ static int lock__parse(struct ins_operands *ops)
if (disasm_line__parse(ops->raw, , >locked.ops->raw) < 0)
goto out_free_ops;
 
-   ops->locked.ins = ins__find(name);
+   ops->locked.ins = ins__find(name, norm_arch);
free(name);
 
if (ops->locked.ins == NULL)
@@ -193,7 +194,7 @@ static int lock__parse(struct ins_operands *ops)
return 0;
 
if (ops->locked.ins->ops->parse &&
-   ops->locked.ins->ops->parse(ops->locked.ops) < 0)
+   ops->locked.ins->ops->parse(ops->lo

[PATCH v6 3/7] perf annotate: Add support for powerpc

2016-08-19 Thread Ravi Bangoria
From: "Naveen N. Rao" <naveen.n@linux.vnet.ibm.com>

Current perf can disassemble annotated function but it does not have
parsing logic for powerpc instructions. So all navigation options are
not available for powerpc.

Apart from that, Powerpc has long list of branch instructions and
hardcoding them in table appears to be error-prone. So, add function
to find instruction instead of creating table. This function dynamically
create table (list of 'struct ins'), and instead of creating object
every time, first check if list already contain object for that
instruction.

Signed-off-by: Naveen N. Rao <naveen.n@linux.vnet.ibm.com>
Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
---
Changes in v6:
  - No change

 tools/perf/util/annotate.c | 116 +
 1 file changed, 116 insertions(+)

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 14a8808..ea07588 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -535,6 +535,11 @@ static struct ins instructions_arm[] = {
{ .name = "retq",  .ops  = _ops, },
 };
 
+struct instructions_powerpc {
+   struct ins *ins;
+   struct list_head list;
+};
+
 static int ins__key_cmp(const void *name, const void *insp)
 {
const struct ins *ins = insp;
@@ -550,6 +555,115 @@ static int ins__cmp(const void *a, const void *b)
return strcmp(ia->name, ib->name);
 }
 
+static struct ins *list_add__ins_powerpc(struct instructions_powerpc *head,
+const char *name, struct ins_ops *ops)
+{
+   struct instructions_powerpc *ins_powerpc;
+   struct ins *ins;
+
+   ins = zalloc(sizeof(struct ins));
+   if (!ins)
+   return NULL;
+
+   ins_powerpc = zalloc(sizeof(struct instructions_powerpc));
+   if (!ins_powerpc)
+   goto out_free_ins;
+
+   ins->name = strdup(name);
+   if (!ins->name)
+   goto out_free_ins_power;
+
+   ins->ops = ops;
+   ins_powerpc->ins = ins;
+   list_add_tail(&(ins_powerpc->list), &(head->list));
+
+   return ins;
+
+out_free_ins_power:
+   zfree(_powerpc);
+out_free_ins:
+   zfree();
+   return NULL;
+}
+
+static struct ins *list_search__ins_powerpc(struct instructions_powerpc *head,
+   const char *name)
+{
+   struct instructions_powerpc *pos;
+
+   list_for_each_entry(pos, >list, list) {
+   if (!strcmp(pos->ins->name, name))
+   return pos->ins;
+   }
+   return NULL;
+}
+
+static struct ins *ins__find_powerpc(const char *name)
+{
+   int i;
+   struct ins *ins;
+   struct ins_ops *ops;
+   static struct instructions_powerpc head;
+   static bool list_initialized;
+
+   /*
+* - Interested only if instruction starts with 'b'.
+* - Few start with 'b', but aren't branch instructions.
+*/
+   if (name[0] != 'b' ||
+   !strncmp(name, "bcd", 3)   ||
+   !strncmp(name, "brinc", 5) ||
+   !strncmp(name, "bper", 4))
+   return NULL;
+
+   if (!list_initialized) {
+   INIT_LIST_HEAD();
+   list_initialized = true;
+   }
+
+   /*
+* Return if we already have object of 'struct ins' for this instruction
+*/
+   ins = list_search__ins_powerpc(, name);
+   if (ins)
+   return ins;
+
+   ops = _ops;
+
+   i = strlen(name) - 1;
+   if (i < 0)
+   return NULL;
+
+   /* ignore optional hints at the end of the instructions */
+   if (name[i] == '+' || name[i] == '-')
+   i--;
+
+   if (name[i] == 'l' || (name[i] == 'a' && name[i-1] == 'l')) {
+   /*
+* if the instruction ends up with 'l' or 'la', then
+* those are considered 'calls' since they update LR.
+* ... except for 'bnl' which is branch if not less than
+* and the absolute form of the same.
+*/
+   if (strcmp(name, "bnl") && strcmp(name, "bnl+") &&
+   strcmp(name, "bnl-") && strcmp(name, "bnla") &&
+   strcmp(name, "bnla+") && strcmp(name, "bnla-"))
+   ops = _ops;
+   }
+   if (name[i] == 'r' && name[i-1] == 'l')
+   /*
+* instructions ending with 'lr' are considered to be
+* return instructions
+*/
+   ops = _ops;
+
+   /*
+* Add instruction to list so next time no need to
+* allocate memory for it.
+*/
+   return list_add__ins_powerpc(, name, ops);
+}
+
 st

[PATCH v6 4/7] perf annotate: Do not ignore call instruction with indirect target

2016-08-19 Thread Ravi Bangoria
Do not ignore call instruction with indirect target when its already
identified as a call. This is an extension of commit e8ea1561952b
("perf annotate: Use raw form for register indirect call instructions")
to generalize annotation for all instructions with indirect calls.

This is needed for certain powerpc call instructions that use address
in a register (such as bctrl, btarl, ...).

Apart from that, when kcore is used to disassemble function, all call
instructions were ignored. This patch will fix it as a side effect by
not ignoring them. For example,

Before (with kcore):
   mov%r13,%rdi
   callq  0x811a7e70
 ^ jmpq   64
   mov%gs:0x7ef41a6e(%rip),%al

After (with kcore):
   mov%r13,%rdi
 > callq  0x811a7e70
 ^ jmpq   64
   mov%gs:0x7ef41a6e(%rip),%al

Suggested-by: Michael Ellerman <m...@ellerman.id.au>
[Suggested about 'bctrl' instruction]
Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
---
Changes in v6:
  - No change

 tools/perf/util/annotate.c | 8 ++--
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index ea07588..a05423b 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -81,16 +81,12 @@ static int call__parse(struct ins_operands *ops, const char 
*norm_arch)
return ops->target.name == NULL ? -1 : 0;
 
 indirect_call:
-   tok = strchr(endptr, '(');
-   if (tok != NULL) {
+   tok = strchr(endptr, '*');
+   if (tok == NULL) {
ops->target.addr = 0;
return 0;
}
 
-   tok = strchr(endptr, '*');
-   if (tok == NULL)
-   return -1;
-
ops->target.addr = strtoull(tok + 1, NULL, 16);
return 0;
 }
-- 
2.5.5



Re: [PATCH v5 2/7] perf annotate: Add cross arch annotate support

2016-08-19 Thread Ravi Bangoria
Thanks Russell for reviewing.

On Friday 19 August 2016 01:20 PM, Russell King - ARM Linux wrote:
> On Fri, Aug 19, 2016 at 10:59:01AM +0530, Ravi Bangoria wrote:
>> -static struct ins instructions[] = {
>> +static struct ins instructions_x86[] = {
>>  { .name = "add",   .ops  = _ops, },
>>  { .name = "addl",  .ops  = _ops, },
>>  { .name = "addq",  .ops  = _ops, },
>>  { .name = "addw",  .ops  = _ops, },
>>  { .name = "and",   .ops  = _ops, },
>> -#ifdef __arm__
>> -{ .name = "b", .ops  = _ops, }, // might also be a call
>> -{ .name = "bcc",   .ops  = _ops, },
>> -{ .name = "bcs",   .ops  = _ops, },
>> -{ .name = "beq",   .ops  = _ops, },
>> -{ .name = "bge",   .ops  = _ops, },
>> -{ .name = "bgt",   .ops  = _ops, },
>> -{ .name = "bhi",   .ops  = _ops, },
>> -{ .name = "bl",.ops  = _ops, },
>> -{ .name = "bls",   .ops  = _ops, },
>> -{ .name = "blt",   .ops  = _ops, },
>> -{ .name = "blx",   .ops  = _ops, },
>> -{ .name = "bne",   .ops  = _ops, },
>> -#endif
> Notice that ARM includes a lot of other instructions from this table,
> not just those above.
>
>>  { .name = "bts",   .ops  = _ops, },
>>  { .name = "call",  .ops  = _ops, },
>>  { .name = "callq", .ops  = _ops, },
>> @@ -456,6 +444,21 @@ static struct ins instructions[] = {
>>  { .name = "retq",  .ops  = _ops, },
>>  };
>>  
>> +static struct ins instructions_arm[] = {
>> +{ .name = "b", .ops  = _ops, }, /* might also be a call */
>> +{ .name = "bcc",   .ops  = _ops, },
>> +{ .name = "bcs",   .ops  = _ops, },
>> +{ .name = "beq",   .ops  = _ops, },
>> +{ .name = "bge",   .ops  = _ops, },
>> +{ .name = "bgt",   .ops  = _ops, },
>> +{ .name = "bhi",   .ops  = _ops, },
>> +{ .name = "bl",.ops  = _ops, },
>> +{ .name = "bls",   .ops  = _ops, },
>> +{ .name = "blt",   .ops  = _ops, },
>> +{ .name = "blx",   .ops  = _ops, },
>> +{ .name = "bne",   .ops  = _ops, },
>> +};
>> +
> ...
>> +if (!strcmp(norm_arch, NORM_X86)) {
>> +instructions = instructions_x86;
>> +nmemb = ARRAY_SIZE(instructions_x86);
>> +} else if (!strcmp(norm_arch, NORM_ARM)) {
>> +instructions = instructions_arm;
>> +nmemb = ARRAY_SIZE(instructions_arm);
> But these changes result in _only_ the ones that were in the #if __arm__
> being matched.  This is wrong.
>
> If we want to go that way, we need to add _all_ arm instructions to
> instructions_arm, not just those within the #if.

Yes, I've mentioned same in cover letter as well.

Can I add all x86 instructions for arm as well? If not, can you please provide
a list of arm instructions that needs to be added here.

-Ravi




Re: [PATCH v6 0/7] perf: Cross arch annotate + few miscellaneous fixes

2016-09-07 Thread Ravi Bangoria
Hello,

Any update on this?

-Ravi

On Friday 19 August 2016 06:29 PM, Ravi Bangoria wrote:
> Currently Perf annotate support code navigation (branches and calls)
> only when run on the same architecture where perf.data was recorded.
> But, for example, record on powerpc server and annotate on client's
> x86 desktop is not supported.
>
> This patchset enables cross arch annotate. Currently I've used x86
> and arm instructions which are already available and added support
> for powerpc.
>
> Additionally this patch series also contains few other related fixes.
>
> Patches are prepared on top of acme/perf/core and tested it with x86
> and powerpc only.
>
> Note for arm:
> I don't have arm test machine. As suggested by Russell in one of the
> review comment, I've copied all instructions from default table to
> arm table. This way it want break tool on arm but cleanup is needed
> for x86 specific instructions added in arm table.
>
> Example:
>
>   Record on powerpc:
>   $ ./perf record -a
>
>   Report -> Annotate on x86:
>   $ ./perf report -i perf.data.powerpc --vmlinux vmlinux.powerpc
>
> Changes in v6:
>   - Instead of adding only those instructions defined in #ifdef __arm__,
> add all instructions from default table to arm table.
>
> v5 link:
>   https://lkml.org/lkml/2016/8/19/35
>
> Naveen N. Rao (1):
>   perf annotate: Add support for powerpc
>
> Ravi Bangoria (6):
>   perf: Define macro for normalized arch names
>   perf annotate: Add cross arch annotate support
>   perf annotate: Do not ignore call instruction with indirect target
>   perf annotate: Show raw form for jump instruction with indirect target
>   perf annotate: Support jump instruction with target as second operand
>   perf annotate: Fix jump target outside of function address range
>
>  tools/perf/arch/common.c   |  36 ++--
>  tools/perf/arch/common.h   |  11 ++
>  tools/perf/builtin-top.c   |   2 +-
>  tools/perf/ui/browsers/annotate.c  |   8 +-
>  tools/perf/ui/gtk/annotate.c   |   2 +-
>  tools/perf/util/annotate.c | 330 
> +++--
>  tools/perf/util/annotate.h |  10 +-
>  tools/perf/util/unwind-libunwind.c |   4 +-
>  8 files changed, 327 insertions(+), 76 deletions(-)
>



Re: [PATCH v7 1/6] perf annotate: Add cross arch annotate support

2016-10-10 Thread Ravi Bangoria
Hi Arnaldo,

Sorry for little late replies, I was off last week.

Please find my comments.

On Wednesday 05 October 2016 04:49 PM, Arnaldo Carvalho de Melo wrote:
> Em Wed, Sep 21, 2016 at 09:17:51PM +0530, Ravi Bangoria escreveu:
>> Change current data structures and function to enable cross arch
>> annotate.
>>
>> Current perf implementation does not support cross arch annotate.
>> To make it truly cross arch, instruction table of all arch should
>> be present in perf binary. And use appropriate table based on arch
>> where perf.data was recorded.
...
>>  tok = strchr(name, '>');
>>  if (tok == NULL)
>> @@ -252,16 +253,12 @@ static int mov__parse(struct ins_operands *ops, struct 
>> map *map __maybe_unused)
>>  return -1;
>>  
>>  target = ++s;
>> -#ifdef __arm__
>> +
>>  comment = strchr(s, ';');
>> -#else
>> -comment = strchr(s, '#');
>> -#endif
>> +if (comment == NULL)
>> +comment = strchr(s, '#');
>>  
>> -if (comment != NULL)
>> -s = comment - 1;
>> -else
>> -s = strchr(s, '\0') - 1;
>> +s = (comment != NULL) ? comment - 1 : strchr(s, '\0') - 1;
> Why have you touched the above 4 lines? The code you provided is
> equivalent, i.e. has no value for this patch you're working on, just a
> distraction for reviewers, please don't do that.

Sorry about that. I did this change to make code more compact but
yes, you are right, that should be done as separate patch.

-Ravi



Re: [PATCH v7 3/6] perf annotate: Show raw form for jump instruction with indirect target

2016-10-10 Thread Ravi Bangoria


On Wednesday 05 October 2016 04:57 PM, Arnaldo Carvalho de Melo wrote:
> Em Wed, Sep 21, 2016 at 09:17:53PM +0530, Ravi Bangoria escreveu:
>> For jump instructions that does not include target address as direct
>> operand, use raw value for that. This is needed for certain powerpc
>   "use raw value" looks vague, as the example below makes is go from
> using a value (ca2c) to no value at all, i.e. the output
> looks backwards from what you describe, can you instead show the
> original disassembled line from objdump, which I think is what you're
> calling "raw value" in this case?

Correct, I'm showing that only -- "original disassembled line from objdump".

There is no direct operand with bctr. It uses content of register 'ctr' as 
target
address.

For example, objdump output:

   100b8fd8:   addr10,r9,r10
   100b8fdc:   mtctr  r10
   100b8fe0:   bctr

> - Arnaldo
>
>> jump instructions that use target address in a register (such as bctr,
>> btar, ...).
>>
>> Before:
>>  ld r12,32088(r12)
>>  mtctr  r12
>>   v  bctr   ca2c
>>  stdr2,24(r1)
>>  addis  r12,r2,-1
>>
>> After:
>>  ld r12,32088(r12)
>>  mtctr  r12
>>   v  bctr
>>  stdr2,24(r1)
>>  addis  r12,r2,-1
>>
>> Suggested-by: Michael Ellerman <m...@ellerman.id.au>
>> Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
>> ---
>> Changes in v7:
>>   - Added example in description
>>
>>  tools/perf/util/annotate.c | 3 +++
>>  1 file changed, 3 insertions(+)
>>
>> diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
>> index 5aa72d9..1ccf26a 100644
>> --- a/tools/perf/util/annotate.c
>> +++ b/tools/perf/util/annotate.c
>> @@ -136,6 +136,9 @@ static int jump__parse(struct ins_operands *ops, struct 
>> map *map __maybe_unused)
>>  static int jump__scnprintf(struct ins *ins, char *bf, size_t size,
>> struct ins_operands *ops)
>>  {
>> +if (!ops->target.addr)
>> +return ins__raw_scnprintf(ins, bf, size, ops);
>> +
>>  return scnprintf(bf, size, "%-6.6s %" PRIx64, ins->name, 
>> ops->target.offset);
>>  }
>>  
>> -- 
>> 2.5.5



Re: [PATCH v7 4/6] perf annotate: Support jump instruction with target as second operand

2016-10-10 Thread Ravi Bangoria


On Wednesday 05 October 2016 04:58 PM, Arnaldo Carvalho de Melo wrote:
> Em Wed, Sep 21, 2016 at 09:17:54PM +0530, Ravi Bangoria escreveu:
>> Current perf is not able to parse jump instruction when second operand
>> contains target address. Arch like powerpc has such instructions. For
>> example, 'bne  cr7,0xc00f6154'.
>>
>> objdump o/p:
>>   c00f6140:   ld r9,1032(r31)
>>   c00f6144:   cmpdi  cr7,r9,0
>>   c00f6148:   bnecr7,0xc00f6154
>>   c00f614c:   ld r9,2312(r30)
>>   c00f6150:   stdr9,1032(r31)
>>   c00f6154:   ld r9,88(r31)
> So the above is what is parsed to generate the following? Or these
> aren't related?

Yes, following is the perf annotate o/p from above objdump o/p.

-Ravi

>
>> Before patch:
>>  ld r9,1032(r31)
>>  cmpdi  cr7,r9,0
>>   v  bne3ff09f2c
>>  ld r9,2312(r30)
>>  stdr9,1032(r31)
>>   74:ld r9,88(r31)
>>
>> After patch:
>>  ld r9,1032(r31)
>>  cmpdi  cr7,r9,0
>>   v  bne74
>>  ld r9,2312(r30)
>>  stdr9,1032(r31)
>>   74:ld r9,88(r31)
>>
>> Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
>> ---
>> Changes in v7:
>>   - Added example in description
>>
>>  tools/perf/util/annotate.c | 6 +-
>>  1 file changed, 5 insertions(+), 1 deletion(-)
>>
>> diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
>> index 1ccf26a..a9dbac1 100644
>> --- a/tools/perf/util/annotate.c
>> +++ b/tools/perf/util/annotate.c
>> @@ -122,8 +122,12 @@ bool ins__is_call(const struct ins *ins)
>>  static int jump__parse(struct ins_operands *ops, struct map *map 
>> __maybe_unused)
>>  {
>>  const char *s = strchr(ops->raw, '+');
>> +const char *c = strchr(ops->raw, ',');
>>  
>> -ops->target.addr = strtoull(ops->raw, NULL, 16);
>> +if (c++ != NULL)
>> +ops->target.addr = strtoull(c, NULL, 16);
>> +else
>> +ops->target.addr = strtoull(ops->raw, NULL, 16);
>>  
>>  if (s++ != NULL)
>>  ops->target.offset = strtoull(s, NULL, 16);
>> -- 
>> 2.5.5



Re: [PATCH v7 5/6] perf annotate: Fix jump target outside of function address range

2016-10-10 Thread Ravi Bangoria


On Wednesday 05 October 2016 05:01 PM, Arnaldo Carvalho de Melo wrote:
> Em Wed, Sep 21, 2016 at 09:17:55PM +0530, Ravi Bangoria escreveu:
>> If jump target is outside of function range, perf is not handling it
>> correctly. Especially when target address is lesser than function start
>> address, target offset will be negative. But, target address declared
>> to be unsigned, converts negative number into 2's complement. See below
>> example. Here target of 'jumpq' instruction at 34cf8 is 34ac0 which is
>> lesser than function start address(34cf0).
>>
>> 34ac0 - 34cf0 = -0x230 = 0xfdd0
> This one looks ok, but isn't applying.

This is applying fine for me on perf/core. Which branch are you trying?

-Ravi

>
> - Arnaldo
>
>> Objdump output:
>>
>>   00034cf0 <__sigaction>:
>>   __GI___sigaction():
>> 34cf0: lea-0x20(%rdi),%eax
>> 34cf3: cmp-bashx1,%eax
>> 34cf6: jbe34d00 <__sigaction+0x10>
>> 34cf8: jmpq   34ac0 <__GI___libc_sigaction>
>> 34cfd: nopl   (%rax)
>> 34d00: mov0x386161(%rip),%rax# 3bae68 <_DYNAMIC+0x2e8>
>> 34d07: movl   -bashx16,%fs:(%rax)
>> 34d0e: mov-bashx,%eax
>> 34d13: retq
>>
>> perf annotate before applying patch:
>>
>>   __GI___sigaction  /usr/lib64/libc-2.22.so
>>lea-0x20(%rdi),%eax
>>cmp-bashx1,%eax
>> v  jbe10
>> v  jmpq   fdd0
>>nop
>> 10:mov_DYNAMIC+0x2e8,%rax
>>movl   -bashx16,%fs:(%rax)
>>mov-bashx,%eax
>>retq
>>
>> perf annotate after applying patch:
>>
>>   __GI___sigaction  /usr/lib64/libc-2.22.so
>>lea-0x20(%rdi),%eax
>>    cmp-bashx1,%eax
>> v  jbe10
>> ^  jmpq   34ac0 <__GI___libc_sigaction>
>>nop
>> 10:mov_DYNAMIC+0x2e8,%rax
>>movl   -bashx16,%fs:(%rax)
>>mov-bashx,%eax
>>retq
>>
>> Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
>> ---
>> Changes in v7:
>>   - No changes
>>
>>  tools/perf/ui/browsers/annotate.c |  5 +++--
>>  tools/perf/util/annotate.c| 14 +-
>>  tools/perf/util/annotate.h|  5 +++--
>>  3 files changed, 15 insertions(+), 9 deletions(-)
>>
>> diff --git a/tools/perf/ui/browsers/annotate.c 
>> b/tools/perf/ui/browsers/annotate.c
>> index 214a14a..2d04bdf 100644
>> --- a/tools/perf/ui/browsers/annotate.c
>> +++ b/tools/perf/ui/browsers/annotate.c
>> @@ -215,7 +215,7 @@ static void annotate_browser__write(struct ui_browser 
>> *browser, void *entry, int
>>  ui_browser__set_color(browser, color);
>>  if (dl->ins && dl->ins->ops->scnprintf) {
>>  if (ins__is_jump(dl->ins)) {
>> -bool fwd = dl->ops.target.offset > 
>> (u64)dl->offset;
>> +bool fwd = dl->ops.target.offset > dl->offset;
>>  
>>  ui_browser__write_graph(browser, fwd ? 
>> SLSMG_DARROW_CHAR :
>>  
>> SLSMG_UARROW_CHAR);
>> @@ -245,7 +245,8 @@ static bool disasm_line__is_valid_jump(struct 
>> disasm_line *dl, struct symbol *sy
>>  {
>>  if (!dl || !dl->ins || !ins__is_jump(dl->ins)
>>  || !disasm_line__has_offset(dl)
>> -|| dl->ops.target.offset >= symbol__size(sym))
>> +|| dl->ops.target.offset < 0
>> +|| dl->ops.target.offset >= (s64)symbol__size(sym))
>>  return false;
>>  
>>  return true;
>> diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
>> index a9dbac1..fc44dd1 100644
>> --- a/tools/perf/util/annotate.c
>> +++ b/tools/perf/util/annotate.c
>> @@ -129,10 +129,12 @@ static int jump__parse(struct ins_operands *ops, 
>> struct map *map __maybe_unused)
>>  else
>>  ops->target.addr = strtoull(ops->raw, NULL, 16);
>>  
>> -if (s++ != NULL)
>> +if (s++ != NULL) {
>>  ops->target.offset = strtoull(s, NULL, 16);
>> -else
>> -ops->target.offset = UINT64_MAX;
>> +ops->target.offset_avail = true;
>> +} else {
>> +ops->ta

[PATCH v7 3/6] perf annotate: Show raw form for jump instruction with indirect target

2016-09-21 Thread Ravi Bangoria
For jump instructions that does not include target address as direct
operand, use raw value for that. This is needed for certain powerpc
jump instructions that use target address in a register (such as bctr,
btar, ...).

Before:
 ld r12,32088(r12)
 mtctr  r12
  v  bctr   ca2c
 stdr2,24(r1)
 addis  r12,r2,-1

After:
 ld r12,32088(r12)
 mtctr  r12
  v  bctr
 stdr2,24(r1)
 addis  r12,r2,-1

Suggested-by: Michael Ellerman <m...@ellerman.id.au>
Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
---
Changes in v7:
  - Added example in description

 tools/perf/util/annotate.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 5aa72d9..1ccf26a 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -136,6 +136,9 @@ static int jump__parse(struct ins_operands *ops, struct map 
*map __maybe_unused)
 static int jump__scnprintf(struct ins *ins, char *bf, size_t size,
   struct ins_operands *ops)
 {
+   if (!ops->target.addr)
+   return ins__raw_scnprintf(ins, bf, size, ops);
+
return scnprintf(bf, size, "%-6.6s %" PRIx64, ins->name, 
ops->target.offset);
 }
 
-- 
2.5.5



[PATCH v7 5/6] perf annotate: Fix jump target outside of function address range

2016-09-21 Thread Ravi Bangoria
If jump target is outside of function range, perf is not handling it
correctly. Especially when target address is lesser than function start
address, target offset will be negative. But, target address declared
to be unsigned, converts negative number into 2's complement. See below
example. Here target of 'jumpq' instruction at 34cf8 is 34ac0 which is
lesser than function start address(34cf0).

34ac0 - 34cf0 = -0x230 = 0xfdd0

Objdump output:

  00034cf0 <__sigaction>:
  __GI___sigaction():
34cf0: lea-0x20(%rdi),%eax
34cf3: cmp-bashx1,%eax
34cf6: jbe34d00 <__sigaction+0x10>
34cf8: jmpq   34ac0 <__GI___libc_sigaction>
34cfd: nopl   (%rax)
34d00: mov0x386161(%rip),%rax# 3bae68 <_DYNAMIC+0x2e8>
34d07: movl   -bashx16,%fs:(%rax)
34d0e: mov-bashx,%eax
34d13: retq

perf annotate before applying patch:

  __GI___sigaction  /usr/lib64/libc-2.22.so
   lea-0x20(%rdi),%eax
   cmp-bashx1,%eax
v  jbe10
v  jmpq   fdd0
   nop
10:mov_DYNAMIC+0x2e8,%rax
   movl   -bashx16,%fs:(%rax)
   mov-bashx,%eax
   retq

perf annotate after applying patch:

  __GI___sigaction  /usr/lib64/libc-2.22.so
   lea-0x20(%rdi),%eax
   cmp-bashx1,%eax
v  jbe10
^  jmpq   34ac0 <__GI___libc_sigaction>
   nop
10:mov_DYNAMIC+0x2e8,%rax
   movl   -bashx16,%fs:(%rax)
   mov-bashx,%eax
   retq

Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
---
Changes in v7:
  - No changes

 tools/perf/ui/browsers/annotate.c |  5 +++--
 tools/perf/util/annotate.c| 14 +-
 tools/perf/util/annotate.h|  5 +++--
 3 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/tools/perf/ui/browsers/annotate.c 
b/tools/perf/ui/browsers/annotate.c
index 214a14a..2d04bdf 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -215,7 +215,7 @@ static void annotate_browser__write(struct ui_browser 
*browser, void *entry, int
ui_browser__set_color(browser, color);
if (dl->ins && dl->ins->ops->scnprintf) {
if (ins__is_jump(dl->ins)) {
-   bool fwd = dl->ops.target.offset > 
(u64)dl->offset;
+   bool fwd = dl->ops.target.offset > dl->offset;
 
ui_browser__write_graph(browser, fwd ? 
SLSMG_DARROW_CHAR :

SLSMG_UARROW_CHAR);
@@ -245,7 +245,8 @@ static bool disasm_line__is_valid_jump(struct disasm_line 
*dl, struct symbol *sy
 {
if (!dl || !dl->ins || !ins__is_jump(dl->ins)
|| !disasm_line__has_offset(dl)
-   || dl->ops.target.offset >= symbol__size(sym))
+   || dl->ops.target.offset < 0
+   || dl->ops.target.offset >= (s64)symbol__size(sym))
return false;
 
return true;
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index a9dbac1..fc44dd1 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -129,10 +129,12 @@ static int jump__parse(struct ins_operands *ops, struct 
map *map __maybe_unused)
else
ops->target.addr = strtoull(ops->raw, NULL, 16);
 
-   if (s++ != NULL)
+   if (s++ != NULL) {
ops->target.offset = strtoull(s, NULL, 16);
-   else
-   ops->target.offset = UINT64_MAX;
+   ops->target.offset_avail = true;
+   } else {
+   ops->target.offset_avail = false;
+   }
 
return 0;
 }
@@ -140,7 +142,7 @@ static int jump__parse(struct ins_operands *ops, struct map 
*map __maybe_unused)
 static int jump__scnprintf(struct ins *ins, char *bf, size_t size,
   struct ins_operands *ops)
 {
-   if (!ops->target.addr)
+   if (!ops->target.addr || ops->target.offset < 0)
return ins__raw_scnprintf(ins, bf, size, ops);
 
return scnprintf(bf, size, "%-6.6s %" PRIx64, ins->name, 
ops->target.offset);
@@ -1373,9 +1375,11 @@ static int symbol__parse_objdump_line(struct symbol 
*sym, struct map *map,
if (dl == NULL)
return -1;
 
-   if (dl->ops.target.offset == UINT64_MAX)
+   if (!disasm_line__has_offset(dl)) {
dl->ops.target.offset = dl->ops.target.addr -
map__rip_2objdump(map, sym->start);
+   dl->ops.target.offset_avail = true;
+   }
 
/* kcore has no symbols, so add the call target name */
if (dl->ins && ins__is_call(dl->ins) &

[PATCH v7 2/6] perf annotate: Add support for powerpc

2016-09-21 Thread Ravi Bangoria
From: "Naveen N. Rao" <naveen.n@linux.vnet.ibm.com>

Current perf can disassemble annotated function but it does not have
parsing logic for powerpc instructions. So all navigation options are
not available for powerpc.

Apart from that, Powerpc has long list of branch instructions and
hardcoding them in table appears to be error-prone. So, add function
to find instruction instead of creating table. This function dynamically
create table (list of 'struct ins'), and instead of creating object
every time, first check if list already contain object for that
instruction.

Signed-off-by: Naveen N. Rao <naveen.n@linux.vnet.ibm.com>
Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
---
Changes in v7:
  - Little bit change in initializing instruction list.

 tools/perf/util/annotate.c | 112 +
 1 file changed, 112 insertions(+)

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 816aa2c..5aa72d9 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -531,6 +531,11 @@ static struct ins instructions_arm[] = {
{ .name = "retq",  .ops  = _ops, },
 };
 
+struct instructions_powerpc {
+   struct ins *ins;
+   struct list_head list;
+};
+
 static int ins__key_cmp(const void *name, const void *insp)
 {
const struct ins *ins = insp;
@@ -546,6 +551,111 @@ static int ins__cmp(const void *a, const void *b)
return strcmp(ia->name, ib->name);
 }
 
+static struct ins *list_add__ins_powerpc(struct instructions_powerpc *head,
+const char *name, struct ins_ops *ops)
+{
+   struct instructions_powerpc *ins_powerpc;
+   struct ins *ins;
+
+   ins = zalloc(sizeof(struct ins));
+   if (!ins)
+   return NULL;
+
+   ins_powerpc = zalloc(sizeof(struct instructions_powerpc));
+   if (!ins_powerpc)
+   goto out_free_ins;
+
+   ins->name = strdup(name);
+   if (!ins->name)
+   goto out_free_ins_power;
+
+   ins->ops = ops;
+   ins_powerpc->ins = ins;
+   list_add_tail(&(ins_powerpc->list), &(head->list));
+
+   return ins;
+
+out_free_ins_power:
+   zfree(_powerpc);
+out_free_ins:
+   zfree();
+   return NULL;
+}
+
+static struct ins *list_search__ins_powerpc(struct instructions_powerpc *head,
+   const char *name)
+{
+   struct instructions_powerpc *pos;
+
+   list_for_each_entry(pos, >list, list) {
+   if (!strcmp(pos->ins->name, name))
+   return pos->ins;
+   }
+   return NULL;
+}
+
+static struct ins *ins__find_powerpc(const char *name)
+{
+   int i;
+   struct ins *ins;
+   struct ins_ops *ops;
+   static struct instructions_powerpc head = {
+   .list = LIST_HEAD_INIT(head.list),
+   };
+
+   /*
+* - Interested only if instruction starts with 'b'.
+* - Few start with 'b', but aren't branch instructions.
+*/
+   if (name[0] != 'b' ||
+   !strncmp(name, "bcd", 3)   ||
+   !strncmp(name, "brinc", 5) ||
+   !strncmp(name, "bper", 4))
+   return NULL;
+
+   /*
+* Return if we already have object of 'struct ins' for this instruction
+*/
+   ins = list_search__ins_powerpc(, name);
+   if (ins)
+   return ins;
+
+   ops = _ops;
+
+   i = strlen(name) - 1;
+   if (i < 0)
+   return NULL;
+
+   /* ignore optional hints at the end of the instructions */
+   if (name[i] == '+' || name[i] == '-')
+   i--;
+
+   if (name[i] == 'l' || (name[i] == 'a' && name[i-1] == 'l')) {
+   /*
+* if the instruction ends up with 'l' or 'la', then
+* those are considered 'calls' since they update LR.
+* ... except for 'bnl' which is branch if not less than
+* and the absolute form of the same.
+*/
+   if (strcmp(name, "bnl") && strcmp(name, "bnl+") &&
+   strcmp(name, "bnl-") && strcmp(name, "bnla") &&
+   strcmp(name, "bnla+") && strcmp(name, "bnla-"))
+   ops = _ops;
+   }
+   if (name[i] == 'r' && name[i-1] == 'l')
+   /*
+* instructions ending with 'lr' are considered to be
+* return instructions
+*/
+   ops = _ops;
+
+   /*
+* Add instruction to list so next time no need to
+* allocate memory for it.
+*/
+   return list_add__ins_powerpc(, name, ops);
+}
+
 static void ins__sort(struct ins *instructions, int nmemb)

[PATCH v7 6/6] perf annotate: cross arch annotate support fixes for ARM

2016-09-21 Thread Ravi Bangoria
From: Kim Phillips <kim.phill...@arm.com>

For ARM we remove the list that contains non-arm insns, and
instead add more maintainable branch instruction regex logic.

Signed-off-by: Kim Phillips <kim.phill...@arm.com>
Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
---
Changes in v7:
  - Little bit change in initializing instruction list.

 tools/perf/util/annotate.c | 177 +
 1 file changed, 65 insertions(+), 112 deletions(-)

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index fc44dd1..83d5ac8 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -28,6 +28,7 @@ const char*disassembler_style;
 const char *objdump_path;
 static regex_t  file_lineno;
 static char*norm_arch;
+static regex_t arm_call_insn, arm_jump_insn;
 
 static struct ins *ins__find(const char *name);
 static int disasm_line__parse(char *line, char **namep, char **rawp);
@@ -449,98 +450,7 @@ static struct ins instructions_x86[] = {
{ .name = "retq",  .ops  = _ops, },
 };
 
-static struct ins instructions_arm[] = {
-   { .name = "add",   .ops  = _ops, },
-   { .name = "addl",  .ops  = _ops, },
-   { .name = "addq",  .ops  = _ops, },
-   { .name = "addw",  .ops  = _ops, },
-   { .name = "and",   .ops  = _ops, },
-   { .name = "b", .ops  = _ops, }, /* might also be a call */
-   { .name = "bcc",   .ops  = _ops, },
-   { .name = "bcs",   .ops  = _ops, },
-   { .name = "beq",   .ops  = _ops, },
-   { .name = "bge",   .ops  = _ops, },
-   { .name = "bgt",   .ops  = _ops, },
-   { .name = "bhi",   .ops  = _ops, },
-   { .name = "bl",.ops  = _ops, },
-   { .name = "bls",   .ops  = _ops, },
-   { .name = "blt",   .ops  = _ops, },
-   { .name = "blx",   .ops  = _ops, },
-   { .name = "bne",   .ops  = _ops, },
-   { .name = "bts",   .ops  = _ops, },
-   { .name = "call",  .ops  = _ops, },
-   { .name = "callq", .ops  = _ops, },
-   { .name = "cmp",   .ops  = _ops, },
-   { .name = "cmpb",  .ops  = _ops, },
-   { .name = "cmpl",  .ops  = _ops, },
-   { .name = "cmpq",  .ops  = _ops, },
-   { .name = "cmpw",  .ops  = _ops, },
-   { .name = "cmpxch", .ops  = _ops, },
-   { .name = "dec",   .ops  = _ops, },
-   { .name = "decl",  .ops  = _ops, },
-   { .name = "imul",  .ops  = _ops, },
-   { .name = "inc",   .ops  = _ops, },
-   { .name = "incl",  .ops  = _ops, },
-   { .name = "ja",.ops  = _ops, },
-   { .name = "jae",   .ops  = _ops, },
-   { .name = "jb",.ops  = _ops, },
-   { .name = "jbe",   .ops  = _ops, },
-   { .name = "jc",.ops  = _ops, },
-   { .name = "jcxz",  .ops  = _ops, },
-   { .name = "je",.ops  = _ops, },
-   { .name = "jecxz", .ops  = _ops, },
-   { .name = "jg",.ops  = _ops, },
-   { .name = "jge",   .ops  = _ops, },
-   { .name = "jl",.ops  = _ops, },
-   { .name = "jle",   .ops  = _ops, },
-   { .name = "jmp",   .ops  = _ops, },
-   { .name = "jmpq",  .ops  = _ops, },
-   { .name = "jna",   .ops  = _ops, },
-   { .name = "jnae",  .ops  = _ops, },
-   { .name = "jnb",   .ops  = _ops, },
-   { .name = "jnbe",  .ops  = _ops, },
-   { .name = "jnc",   .ops  = _ops, },
-   { .name = "jne",   .ops  = _ops, },
-   { .name = "jng",   .ops  = _ops, },
-   { .name = "jnge",  .ops  = _ops, },
-   { .name = "jnl",   .ops  = _ops, },
-   { .name = "jnle",  .ops  = _ops, },
-   { .name = "jno",   .ops  = _ops, },
-   { .name = "jnp",   .ops  = _ops, },
-   { .name = "jns",   .ops  = _ops, },
-   { .name = "jnz",   .ops  = _ops, },
-   { .name = "jo",.ops  = _ops, },
-   { .name = "jp",.ops  = _ops, },
-   { .name = "jpe",   .ops  = _ops, },
-   { .name = "jpo",   .ops  = _ops, },
-   { .name = "jrcxz", .ops  = _ops, },
-   { .name = "js",.ops  = _ops, },
-   { .name = "jz",.ops  = _ops, },
-   { .name = "lea",   .ops  = _ops, },
-   { .name = "lock",  .ops  = _ops, },
-   { .name = "mov",   .ops  = _ops, },
-   { .name = "movb",  .ops  = _ops, },
-   { .name = "movdqa",.ops 

[PATCH v7 0/6] perf annotate: Cross arch support + few fixes

2016-09-21 Thread Ravi Bangoria
Currently Perf annotate support code navigation (branches and calls)
only when run on the same architecture where perf.data was recorded.
But, for example, record on powerpc server and annotate on client's
x86 desktop is not supported.

This patchset adds supports for that.

Example:

  Record on powerpc:
  $ ./perf record -a

  Report -> Annotate on x86:
  $ ./perf report -i perf.data.powerpc --vmlinux vmlinux.powerpc

Changes in v7:
  - Using string for normalized arch names instread of macros.(i.e.
removed patch 1/7 of v6)
  - In patch 1/6, make norm_arch as global var instead of passing them
to each parser.
  - In patch 1/6 and 6/6, little bit change in initializing instruction
list.
  - patch 4/7 of v6 is already accepted. Removed that in v7.
  - Address other review comments.
  - Added more examples in patch descriptions.

v6 link:
  https://lkml.org/lkml/2016/8/19/411

Kim, I don't have arm test machine. Can you please help me to test
this on arm.


Kim Phillips (1):
  perf annotate: cross arch annotate support fixes for ARM

Naveen N. Rao (1):
  perf annotate: Add support for powerpc

Ravi Bangoria (4):
  perf annotate: Add cross arch annotate support
  perf annotate: Show raw form for jump instruction with indirect target
  perf annotate: Support jump instruction with target as second operand
  perf annotate: Fix jump target outside of function address range

 tools/perf/builtin-top.c  |   2 +-
 tools/perf/ui/browsers/annotate.c |   8 +-
 tools/perf/ui/gtk/annotate.c  |   2 +-
 tools/perf/util/annotate.c| 259 --
 tools/perf/util/annotate.h|   8 +-
 5 files changed, 232 insertions(+), 47 deletions(-)

-- 
2.5.5



[PATCH v7 1/6] perf annotate: Add cross arch annotate support

2016-09-21 Thread Ravi Bangoria
Change current data structures and function to enable cross arch
annotate.

Current perf implementation does not support cross arch annotate.
To make it truly cross arch, instruction table of all arch should
be present in perf binary. And use appropriate table based on arch
where perf.data was recorded.

Record on arm:
  $ ./perf record -a

Report -> Annotate on x86:
  $ ./perf report -i perf.data.arm --vmlinux vmlinux.arm

Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
---
Changes in v7:
  - Make norm_arch as global var instead of passing them to each parser.
  - Address other review comments.

 tools/perf/builtin-top.c  |   2 +-
 tools/perf/ui/browsers/annotate.c |   3 +-
 tools/perf/ui/gtk/annotate.c  |   2 +-
 tools/perf/util/annotate.c| 151 --
 tools/perf/util/annotate.h|   3 +-
 5 files changed, 134 insertions(+), 27 deletions(-)

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 4007857..41ecdd6 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -129,7 +129,7 @@ static int perf_top__parse_source(struct perf_top *top, 
struct hist_entry *he)
return err;
}
 
-   err = symbol__disassemble(sym, map, 0);
+   err = symbol__disassemble(sym, map, 0, NULL);
if (err == 0) {
 out_assign:
top->sym_filter_entry = he;
diff --git a/tools/perf/ui/browsers/annotate.c 
b/tools/perf/ui/browsers/annotate.c
index 4c18271..214a14a 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -1050,7 +1050,8 @@ int symbol__tui_annotate(struct symbol *sym, struct map 
*map,
  (nr_pcnt - 1);
}
 
-   err = symbol__disassemble(sym, map, sizeof_bdl);
+   err = symbol__disassemble(sym, map, sizeof_bdl,
+ perf_evsel__env_arch(evsel));
if (err) {
char msg[BUFSIZ];
symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg));
diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c
index 42d3199..c127aba 100644
--- a/tools/perf/ui/gtk/annotate.c
+++ b/tools/perf/ui/gtk/annotate.c
@@ -167,7 +167,7 @@ static int symbol__gtk_annotate(struct symbol *sym, struct 
map *map,
if (map->dso->annotate_warned)
return -1;
 
-   err = symbol__disassemble(sym, map, 0);
+   err = symbol__disassemble(sym, map, 0, perf_evsel__env_arch(evsel));
if (err) {
char msg[BUFSIZ];
symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg));
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index aeb5a44..816aa2c 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -21,10 +21,13 @@
 #include 
 #include 
 #include 
+#include 
+#include "../arch/common.h"
 
 const char *disassembler_style;
 const char *objdump_path;
 static regex_t  file_lineno;
+static char*norm_arch;
 
 static struct ins *ins__find(const char *name);
 static int disasm_line__parse(char *line, char **namep, char **rawp);
@@ -66,10 +69,8 @@ static int call__parse(struct ins_operands *ops, struct map 
*map)
 
name++;
 
-#ifdef __arm__
-   if (strchr(name, '+'))
+   if (!strcmp(norm_arch, "arm") && strchr(name, '+'))
return -1;
-#endif
 
tok = strchr(name, '>');
if (tok == NULL)
@@ -252,16 +253,12 @@ static int mov__parse(struct ins_operands *ops, struct 
map *map __maybe_unused)
return -1;
 
target = ++s;
-#ifdef __arm__
+
comment = strchr(s, ';');
-#else
-   comment = strchr(s, '#');
-#endif
+   if (comment == NULL)
+   comment = strchr(s, '#');
 
-   if (comment != NULL)
-   s = comment - 1;
-   else
-   s = strchr(s, '\0') - 1;
+   s = (comment != NULL) ? comment - 1 : strchr(s, '\0') - 1;
 
while (s > target && isspace(s[0]))
--s;
@@ -364,14 +361,92 @@ bool ins__is_ret(const struct ins *ins)
return ins->ops == _ops;
 }
 
-static struct ins instructions[] = {
+static struct ins instructions_x86[] = {
{ .name = "add",   .ops  = _ops, },
{ .name = "addl",  .ops  = _ops, },
{ .name = "addq",  .ops  = _ops, },
{ .name = "addw",  .ops  = _ops, },
{ .name = "and",   .ops  = _ops, },
-#ifdef __arm__
-   { .name = "b", .ops  = _ops, }, // might also be a call
+   { .name = "bts",   .ops  = _ops, },
+   { .name = "call",  .ops  = _ops, },
+   { .name = "callq", .ops  = _ops, },
+   { .name = "cmp",   .ops  = _ops, },
+   { .name = "cmpb",  .ops  = _ops, },
+   { .name = "cmpl",  .ops  = _ops, },
+   { .name = "cmpq"

[PATCH v7 4/6] perf annotate: Support jump instruction with target as second operand

2016-09-21 Thread Ravi Bangoria
Current perf is not able to parse jump instruction when second operand
contains target address. Arch like powerpc has such instructions. For
example, 'bne  cr7,0xc00f6154'.

objdump o/p:
  c00f6140:   ld r9,1032(r31)
  c00f6144:   cmpdi  cr7,r9,0
  c00f6148:   bnecr7,0xc00f6154
  c00f614c:   ld r9,2312(r30)
  c00f6150:   stdr9,1032(r31)
  c00f6154:   ld r9,88(r31)

Before patch:
 ld r9,1032(r31)
 cmpdi  cr7,r9,0
  v  bne3ff09f2c
 ld r9,2312(r30)
 stdr9,1032(r31)
  74:ld r9,88(r31)

After patch:
 ld r9,1032(r31)
 cmpdi  cr7,r9,0
  v  bne74
 ld r9,2312(r30)
 stdr9,1032(r31)
  74:ld r9,88(r31)

Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
---
Changes in v7:
  - Added example in description

 tools/perf/util/annotate.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 1ccf26a..a9dbac1 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -122,8 +122,12 @@ bool ins__is_call(const struct ins *ins)
 static int jump__parse(struct ins_operands *ops, struct map *map 
__maybe_unused)
 {
const char *s = strchr(ops->raw, '+');
+   const char *c = strchr(ops->raw, ',');
 
-   ops->target.addr = strtoull(ops->raw, NULL, 16);
+   if (c++ != NULL)
+   ops->target.addr = strtoull(c, NULL, 16);
+   else
+   ops->target.addr = strtoull(ops->raw, NULL, 16);
 
if (s++ != NULL)
ops->target.offset = strtoull(s, NULL, 16);
-- 
2.5.5



[PATCH] perf tests: Add dwarf unwind test for powerpc

2016-09-19 Thread Ravi Bangoria
User stack dump feature is recently added for powerpc. But there was no
test case available to test it. This test works same as other on arch by
preparing stack frame on perf test thread and comparing each frame by
unwinding it.

  $ ./perf test 50
50: Test dwarf unwind: Ok

User stack dump for powerpc: https://lkml.org/lkml/2016/4/28/482

Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
---
 tools/perf/arch/powerpc/Build|  1 +
 tools/perf/arch/powerpc/include/arch-tests.h | 13 
 tools/perf/arch/powerpc/include/perf_regs.h  |  2 +
 tools/perf/arch/powerpc/tests/Build  |  4 ++
 tools/perf/arch/powerpc/tests/arch-tests.c   | 15 +
 tools/perf/arch/powerpc/tests/dwarf-unwind.c | 62 ++
 tools/perf/arch/powerpc/tests/regs_load.S| 94 
 tools/perf/tests/Build   |  2 +-
 tools/perf/tests/dwarf-unwind.c  |  2 +-
 9 files changed, 193 insertions(+), 2 deletions(-)
 create mode 100644 tools/perf/arch/powerpc/include/arch-tests.h
 create mode 100644 tools/perf/arch/powerpc/tests/Build
 create mode 100644 tools/perf/arch/powerpc/tests/arch-tests.c
 create mode 100644 tools/perf/arch/powerpc/tests/dwarf-unwind.c
 create mode 100644 tools/perf/arch/powerpc/tests/regs_load.S

diff --git a/tools/perf/arch/powerpc/Build b/tools/perf/arch/powerpc/Build
index 54afe4a..db52fa2 100644
--- a/tools/perf/arch/powerpc/Build
+++ b/tools/perf/arch/powerpc/Build
@@ -1 +1,2 @@
 libperf-y += util/
+libperf-y += tests/
diff --git a/tools/perf/arch/powerpc/include/arch-tests.h 
b/tools/perf/arch/powerpc/include/arch-tests.h
new file mode 100644
index 000..84d8ded
--- /dev/null
+++ b/tools/perf/arch/powerpc/include/arch-tests.h
@@ -0,0 +1,13 @@
+#ifndef ARCH_TESTS_H
+#define ARCH_TESTS_H
+
+#ifdef HAVE_DWARF_UNWIND_SUPPORT
+struct thread;
+struct perf_sample;
+int test__arch_unwind_sample(struct perf_sample *sample,
+struct thread *thread);
+#endif
+
+extern struct test arch_tests[];
+
+#endif
diff --git a/tools/perf/arch/powerpc/include/perf_regs.h 
b/tools/perf/arch/powerpc/include/perf_regs.h
index 75de0e9..c12f4e8 100644
--- a/tools/perf/arch/powerpc/include/perf_regs.h
+++ b/tools/perf/arch/powerpc/include/perf_regs.h
@@ -5,6 +5,8 @@
 #include 
 #include 
 
+void perf_regs_load(u64 *regs);
+
 #define PERF_REGS_MASK  ((1ULL << PERF_REG_POWERPC_MAX) - 1)
 #define PERF_REGS_MAX   PERF_REG_POWERPC_MAX
 #ifdef __powerpc64__
diff --git a/tools/perf/arch/powerpc/tests/Build 
b/tools/perf/arch/powerpc/tests/Build
new file mode 100644
index 000..d827ef3
--- /dev/null
+++ b/tools/perf/arch/powerpc/tests/Build
@@ -0,0 +1,4 @@
+libperf-$(CONFIG_DWARF_UNWIND) += regs_load.o
+libperf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
+
+libperf-y += arch-tests.o
diff --git a/tools/perf/arch/powerpc/tests/arch-tests.c 
b/tools/perf/arch/powerpc/tests/arch-tests.c
new file mode 100644
index 000..e24f462
--- /dev/null
+++ b/tools/perf/arch/powerpc/tests/arch-tests.c
@@ -0,0 +1,15 @@
+#include 
+#include "tests/tests.h"
+#include "arch-tests.h"
+
+struct test arch_tests[] = {
+#ifdef HAVE_DWARF_UNWIND_SUPPORT
+   {
+   .desc = "Test dwarf unwind",
+   .func = test__dwarf_unwind,
+   },
+#endif
+   {
+   .func = NULL,
+   },
+};
diff --git a/tools/perf/arch/powerpc/tests/dwarf-unwind.c 
b/tools/perf/arch/powerpc/tests/dwarf-unwind.c
new file mode 100644
index 000..0bac313
--- /dev/null
+++ b/tools/perf/arch/powerpc/tests/dwarf-unwind.c
@@ -0,0 +1,62 @@
+#include 
+#include "perf_regs.h"
+#include "thread.h"
+#include "map.h"
+#include "event.h"
+#include "debug.h"
+#include "tests/tests.h"
+#include "arch-tests.h"
+
+#define STACK_SIZE 8192
+
+static int sample_ustack(struct perf_sample *sample,
+struct thread *thread, u64 *regs)
+{
+   struct stack_dump *stack = >user_stack;
+   struct map *map;
+   unsigned long sp;
+   u64 stack_size, *buf;
+
+   buf = malloc(STACK_SIZE);
+   if (!buf) {
+   pr_debug("failed to allocate sample uregs data\n");
+   return -1;
+   }
+
+   sp = (unsigned long) regs[PERF_REG_POWERPC_R1];
+
+   map = map_groups__find(thread->mg, MAP__VARIABLE, (u64) sp);
+   if (!map) {
+   pr_debug("failed to get stack map\n");
+   free(buf);
+   return -1;
+   }
+
+   stack_size = map->end - sp;
+   stack_size = stack_size > STACK_SIZE ? STACK_SIZE : stack_size;
+
+   memcpy(buf, (void *) sp, stack_size);
+   stack->data = (char *) buf;
+   stack->size = stack_size;
+   return 0;
+}
+
+int test__arch_unwind_sample(struct perf_sample *sample,
+struct thread *thread)
+{
+   struct regs_d

Re: [PATCH v7 0/6] perf annotate: Cross arch support + few fixes

2016-09-21 Thread Ravi Bangoria


On Thursday 22 September 2016 01:04 AM, Kim Phillips wrote:
> On Wed, 21 Sep 2016 21:17:50 +0530
> Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com> wrote:
>
>> Kim, I don't have arm test machine. Can you please help me to test
>> this on arm.
> This works for me:  hitting return on return instructions yields
> "Invalid jump offset", but I'll get that later.

Thanks Kim.

Hmm.. so, ins__find_arm does not contain logic for return instructions. 
Navigation
with return instruction is working fine for x86 and powerpc.

-Ravi

> Thanks,
>
> Kim
>



Re: [PATCH v6 4/7] perf annotate: Do not ignore call instruction with indirect target

2016-09-20 Thread Ravi Bangoria
Hi Arnaldo,

On Monday 19 September 2016 09:14 PM, Arnaldo Carvalho de Melo wrote:
> Em Fri, Aug 19, 2016 at 06:29:35PM +0530, Ravi Bangoria escreveu:
>> Do not ignore call instruction with indirect target when its already
>> identified as a call. This is an extension of commit e8ea1561952b
>> ("perf annotate: Use raw form for register indirect call instructions")
>> to generalize annotation for all instructions with indirect calls.
>>
>> This is needed for certain powerpc call instructions that use address
>> in a register (such as bctrl, btarl, ...).
>>
>> Apart from that, when kcore is used to disassemble function, all call
>> instructions were ignored. This patch will fix it as a side effect by
>> not ignoring them. For example,
>>
>> Before (with kcore):
>>mov%r13,%rdi
>>callq  0x811a7e70
>>  ^ jmpq   64
>>mov%gs:0x7ef41a6e(%rip),%al
>>
>> After (with kcore):
>>mov%r13,%rdi
>>  > callq  0x811a7e70
>>  ^ jmpq   64
>>mov%gs:0x7ef41a6e(%rip),%al
> Ok, makes sense, but then now I have the -> and can't press enter to go
> to that function, in fact for the case I'm using as a test, the
> vsnprintf kernel function, I get:
>
>│ 56:   test   %al,%al 
>   
>  ▒
>│ ↓ je 81  
>   
>  ▒
>│   lea-0x38(%rbp),%rsi
>   
>  ▒
>│   mov%r15,%rdi   
>   
>  ▒
>│ → callq  0x993e3230 
>
> That 0x993e3230 should've been resolved to:
>
> [root@jouet ~]# grep 993e3230 /proc/kallsyms 
> 993e3230 t format_decode
>
> Trying to investigate why it doesn't...

I investigated this.

If this example is with kcore, then it's expected. Because, perf annotate does
not inspect kallsyms when it can't find symbol name from disassembly itself.

For example, disassembly of  finish_task_switch,

with kcore:

810cf1b0:   mov$0x1,%esi
810cf1b5:   mov$0x4,%edi
810cf1ba:   callq  0x811aced0
810cf1bf:   andb   $0xfb,0x4c4(%rbx)
810cf1c6:   jmpq   0x810cf0e9
810cf1cb:   mov%rbx,%rsi
810cf1ce:   mov%r13,%rdi
810cf1d1:   callq  0x811a7e70
810cf1d6:   jmpq   0x810cf0e4

with debuginfo:

810cf1b0:   mov$0x1,%esi
810cf1b5:   mov$0x4,%edi
810cf1ba:   callq  811aced0 <___perf_sw_event>
810cf1bf:   andb   $0xfb,0x4c4(%rbx)
810cf1c6:   jmpq   810cf0e9 <finish_task_switch+0x69>
810cf1cb:   mov%rbx,%rsi
810cf1ce:   mov%r13,%rdi
810cf1d1:   callq  811a7e70 <__perf_event_task_sched_in>
810cf1d6:   jmpq   810cf0e4 <finish_task_switch+0x64>

call__parse tries to find symbol from angle brackets which is not present
in case of kcore.

-Ravi


> - Arnaldo
>
>> Suggested-by: Michael Ellerman <m...@ellerman.id.au>
>> [Suggested about 'bctrl' instruction]
>> Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
>> ---
>> Changes in v6:
>>   - No change
>>
>>  tools/perf/util/annotate.c | 8 ++--
>>  1 file changed, 2 insertions(+), 6 deletions(-)
>>
>> diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
>> index ea07588..a05423b 100644
>> --- a/tools/perf/util/annotate.c
>> +++ b/tools/perf/util/annotate.c
>> @@ -81,16 +81,12 @@ static int call__parse(struct ins_operands *ops, const 
>> char *norm_arch)
>>  return ops->target.name == NULL ? -1 : 0;
>>  
>>  indirect_call:
>> -tok = strchr(endptr, '(');
>> -if (tok != NULL) {
>> +tok = strchr(endptr, '*');
>> +if (tok == NULL) {
>>  ops->target.addr = 0;
>>  return 0;
>>  }
>>  
>> -tok = strchr(endptr, '*');
>> -if (tok == NULL)
>> -return -1;
>> -
>>  ops->target.addr = strtoull(tok + 1, NULL, 16);
>>  return 0;
>>  }
>> -- 
>> 2.5.5



Re: [PATCH v6 2/7] perf annotate: Add cross arch annotate support

2016-08-26 Thread Ravi Bangoria
Hi Kim,

I've tested your patch on x86 and powerpc and it looks fine to me. Can you 
please
put your signed-off-by.

Please add Act-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com> as well.

Regards,
-Ravi

On Wednesday 24 August 2016 02:06 AM, Kim Phillips wrote:
> On Tue, 23 Aug 2016 11:17:16 +0900
> Namhyung Kim <namhy...@kernel.org> wrote:
>
>> On Tue, Aug 23, 2016 at 8:01 AM, Kim Phillips <kim.phill...@arm.com> wrote:
>>> On Fri, 19 Aug 2016 18:29:33 +0530
>>> Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com> wrote:
>>>
>>>> Changes in v6:
>>>>   - Instead of adding only those instructions defined in #ifdef __arm__,
>>>> add all instructions from default table to arm table.
>>> Thanks, I've gone through the list and removed all not-ARM
>>> instructions, and added some missing ARM branch instructions:
>> Can we use regex patterns instead?
> Yes, that helps prevent mistakes updating instruction lists - how does
> this look?:
>
> diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
> index b2c6cf3..52316f3 100644
> --- a/tools/perf/util/annotate.c
> +++ b/tools/perf/util/annotate.c
> @@ -26,6 +26,7 @@
>  const char   *disassembler_style;
>  const char   *objdump_path;
>  static regex_tfile_lineno;
> +static regex_tarm_call_insn, arm_jump_insn;
>
>  static struct ins *ins__find(const char *name, const char *norm_arch);
>  static int disasm_line__parse(char *line, char **namep, char **rawp);
> @@ -449,98 +450,7 @@ static struct ins instructions_x86[] = {
>   { .name = "retq",  .ops  = _ops, },
>  };
>
> -static struct ins instructions_arm[] = {
> - { .name = "add",   .ops  = _ops, },
> - { .name = "addl",  .ops  = _ops, },
> - { .name = "addq",  .ops  = _ops, },
> - { .name = "addw",  .ops  = _ops, },
> - { .name = "and",   .ops  = _ops, },
> - { .name = "b", .ops  = _ops, }, /* might also be a call */
> - { .name = "bcc",   .ops  = _ops, },
> - { .name = "bcs",   .ops  = _ops, },
> - { .name = "beq",   .ops  = _ops, },
> - { .name = "bge",   .ops  = _ops, },
> - { .name = "bgt",   .ops  = _ops, },
> - { .name = "bhi",   .ops  = _ops, },
> - { .name = "bl",.ops  = _ops, },
> - { .name = "bls",   .ops  = _ops, },
> - { .name = "blt",   .ops  = _ops, },
> - { .name = "blx",   .ops  = _ops, },
> - { .name = "bne",   .ops  = _ops, },
> - { .name = "bts",   .ops  = _ops, },
> - { .name = "call",  .ops  = _ops, },
> - { .name = "callq", .ops  = _ops, },
> - { .name = "cmp",   .ops  = _ops, },
> - { .name = "cmpb",  .ops  = _ops, },
> - { .name = "cmpl",  .ops  = _ops, },
> - { .name = "cmpq",  .ops  = _ops, },
> - { .name = "cmpw",  .ops  = _ops, },
> - { .name = "cmpxch", .ops  = _ops, },
> - { .name = "dec",   .ops  = _ops, },
> - { .name = "decl",  .ops  = _ops, },
> - { .name = "imul",  .ops  = _ops, },
> - { .name = "inc",   .ops  = _ops, },
> - { .name = "incl",  .ops  = _ops, },
> - { .name = "ja",.ops  = _ops, },
> - { .name = "jae",   .ops  = _ops, },
> - { .name = "jb",.ops  = _ops, },
> - { .name = "jbe",   .ops  = _ops, },
> - { .name = "jc",.ops  = _ops, },
> - { .name = "jcxz",  .ops  = _ops, },
> - { .name = "je",.ops  = _ops, },
> - { .name = "jecxz", .ops  = _ops, },
> - { .name = "jg",.ops  = _ops, },
> - { .name = "jge",   .ops  = _ops, },
> - { .name = "jl",.ops  = _ops, },
> - { .name = "jle",   .ops  = _ops, },
> - { .name = "jmp",   .ops  = _ops, },
> - { .name = "jmpq",  .ops  = _ops, },
> - { .name = "jna",   .ops  = _ops, },
> - { .name = "jnae",  .ops  = _ops, },
> - { .name = "jnb",   .ops  = _ops, },
> - { .name = "jnbe",  .ops  = _ops, },
> - { .name = "jnc",   .ops  = _ops, },
> - { .name = "jne",   .ops  = _ops, },
> - { .name = "jng",   .ops  = _ops, },
> - { .name = "jnge",  .ops  = _ops, },
> - { .name = "jnl",   .ops  = _ops, },
> - { .name = "jnle",  .ops  = _ops, },
> - 

Re: [PATCH v7 0/6] perf annotate: Cross arch support + few fixes

2016-09-27 Thread Ravi Bangoria
Hello,

Any updates?

Arnaldo, if patches looks good to you, can you please pickup them.

-Ravi

On Wednesday 21 September 2016 09:17 PM, Ravi Bangoria wrote:
> Currently Perf annotate support code navigation (branches and calls)
> only when run on the same architecture where perf.data was recorded.
> But, for example, record on powerpc server and annotate on client's
> x86 desktop is not supported.
>
> This patchset adds supports for that.
>
> Example:
>
>   Record on powerpc:
>   $ ./perf record -a
>
>   Report -> Annotate on x86:
>   $ ./perf report -i perf.data.powerpc --vmlinux vmlinux.powerpc
>
> Changes in v7:
>   - Using string for normalized arch names instread of macros.(i.e.
> removed patch 1/7 of v6)
>   - In patch 1/6, make norm_arch as global var instead of passing them
> to each parser.
>   - In patch 1/6 and 6/6, little bit change in initializing instruction
> list.
>   - patch 4/7 of v6 is already accepted. Removed that in v7.
>   - Address other review comments.
>   - Added more examples in patch descriptions.
>
> v6 link:
>   https://lkml.org/lkml/2016/8/19/411
>
> Kim, I don't have arm test machine. Can you please help me to test
> this on arm.
>
>
> Kim Phillips (1):
>   perf annotate: cross arch annotate support fixes for ARM
>
> Naveen N. Rao (1):
>   perf annotate: Add support for powerpc
>
> Ravi Bangoria (4):
>   perf annotate: Add cross arch annotate support
>   perf annotate: Show raw form for jump instruction with indirect target
>   perf annotate: Support jump instruction with target as second operand
>   perf annotate: Fix jump target outside of function address range
>
>  tools/perf/builtin-top.c  |   2 +-
>  tools/perf/ui/browsers/annotate.c |   8 +-
>  tools/perf/ui/gtk/annotate.c  |   2 +-
>  tools/perf/util/annotate.c| 259 
> --
>  tools/perf/util/annotate.h|   8 +-
>  5 files changed, 232 insertions(+), 47 deletions(-)
>



Re: [PATCH 1/3] powerpc: Emulation support for load/store instructions on LE

2016-11-06 Thread Ravi Bangoria


On Sunday 06 November 2016 01:01 AM, Anton Blanchard wrote:
> Hi,
>
>> kprobe, uprobe, hw-breakpoint and xmon are the only user of
>> emulate_step.
>>
>> Kprobe / uprobe single-steps instruction if they can't emulate it, so
>> there is no problem with them. As I mention, hw-breakpoint is broken.
>> However I'm not sure about xmon, I need to check that.
> I was mostly concerned that it would impact kprobes. Sounds like we are
> ok there.
>
>> So yes, there is no user-visible feature that depends on this.
> Aren't hardware breakpoints exposed via perf? I'd call perf
> user-visible.


Thanks Anton, That's a good catch. I tried this on ppc64le:

  $ sudo cat /proc/kallsyms  | grep pid_max
c116998c D pid_max

  $ sudo ./perf record -a --event=mem:0xc116998c sleep 10


Before patch:
  It does not record any data and throws below warning.

  $ dmesg
[  817.895573] Unable to handle hardware breakpoint. Breakpoint at 
0xc116998c will be disabled.
[  817.895581] [ cut here ]
[  817.895588] WARNING: CPU: 24 PID: 2032 at 
arch/powerpc/kernel/hw_breakpoint.c:277 hw_breakpoint_handler+0x124/0x230
...

After patch:
  It records data properly.

  $ sudo ./perf report --stdio
...
# Samples: 36  of event 'mem:0xc116998c'
# Event count (approx.): 36
#
# Overhead  CommandShared Object Symbol  
#   .    .
#
63.89%  kdumpctl   [kernel.vmlinux]  [k] alloc_pid
27.78%  opal_errd  [kernel.vmlinux]  [k] alloc_pid
 5.56%  kworker/u97:4  [kernel.vmlinux]  [k] alloc_pid
 2.78%  systemd[kernel.vmlinux]  [k] alloc_pid


-Ravi



[RFC] ppc64le: Enable emulation support for simple Load/Store instructions

2016-10-20 Thread Ravi Bangoria
emulate_step() uses a number of underlying kernel functions that were
initially not enabled for LE. This has been rectified since. So, fix
emulate_step() for LE for the corresponding instructions.

Reported-by: Anton Blanchard <an...@samba.org>
Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
---
Note: This patch only enables LOAD, STORE, LARX and STCX instructions.
  I'll send a subsequent patch for other types like LOAD_FP,
  LOAD_VMX etc.

 arch/powerpc/lib/sstep.c | 8 
 1 file changed, 8 deletions(-)

diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index 3362299..82323ef 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -1807,8 +1807,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned 
int instr)
goto instr_done;
 
case LARX:
-   if (regs->msr & MSR_LE)
-   return 0;
if (op.ea & (size - 1))
break;  /* can't handle misaligned */
err = -EFAULT;
@@ -1832,8 +1830,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned 
int instr)
goto ldst_done;
 
case STCX:
-   if (regs->msr & MSR_LE)
-   return 0;
if (op.ea & (size - 1))
break;  /* can't handle misaligned */
err = -EFAULT;
@@ -1859,8 +1855,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned 
int instr)
goto ldst_done;
 
case LOAD:
-   if (regs->msr & MSR_LE)
-   return 0;
err = read_mem(>gpr[op.reg], op.ea, size, regs);
if (!err) {
if (op.type & SIGNEXT)
@@ -1913,8 +1907,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned 
int instr)
goto instr_done;
 
case STORE:
-   if (regs->msr & MSR_LE)
-   return 0;
if ((op.type & UPDATE) && size == sizeof(long) &&
op.reg == 1 && op.update_reg == 1 &&
!(regs->msr & MSR_PR) &&
-- 
1.8.3.1



[PATCH] powerpc/xmon: Fix data-breakpoint

2016-11-22 Thread Ravi Bangoria
Xmon data-breakpoint feature is broken.

Whenever there is a watchpoint match occurs, hw_breakpoint_handler will
be called by do_break via notifier chains mechanism. If watchpoint is
registered by xmon, hw_breakpoint_handler won't find any associated
perf_event and returns immediately with NOTIFY_STOP. Similarly, do_break
also returns without notifying to xmon.

Solve this by returning NOTIFY_DONE when hw_breakpoint_handler does not
find any perf_event associated with matched watchpoint.

Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/hw_breakpoint.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/hw_breakpoint.c 
b/arch/powerpc/kernel/hw_breakpoint.c
index 03d089b..469d86d 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -228,8 +228,10 @@ int hw_breakpoint_handler(struct die_args *args)
rcu_read_lock();
 
bp = __this_cpu_read(bp_per_reg);
-   if (!bp)
+   if (!bp) {
+   rc = NOTIFY_DONE;
goto out;
+   }
info = counter_arch_bp(bp);
 
/*
-- 
1.8.3.1



Re: [PATCH] powerpc/xmon: Fix data-breakpoint

2016-11-22 Thread Ravi Bangoria
Thanks Michael,

On Tuesday 22 November 2016 05:03 PM, Michael Ellerman wrote:
> Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com> writes:
>
>> Xmon data-breakpoint feature is broken.
>>
>> Whenever there is a watchpoint match occurs, hw_breakpoint_handler will
>> be called by do_break via notifier chains mechanism. If watchpoint is
>> registered by xmon, hw_breakpoint_handler won't find any associated
>> perf_event and returns immediately with NOTIFY_STOP. Similarly, do_break
>> also returns without notifying to xmon.
>>
>> Solve this by returning NOTIFY_DONE when hw_breakpoint_handler does not
>> find any perf_event associated with matched watchpoint.
> .. rather than NOTIFY_STOP, which tells the core code to continue
> calling the other breakpoint handlers including the xmon one.
>
> Right?

Yes.

> Also any idea when we broke this?

Hmm, not sure exactly. The code is same since it was merged in 2010 when
support for hw_breakpoint was added for server processor.

-Ravi

> cheers
>



Re: [PATCH 1/3] powerpc: Emulation support for load/store instructions on LE

2016-11-03 Thread Ravi Bangoria


On Thursday 03 November 2016 03:18 PM, Michael Ellerman wrote:
> Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com> writes:
>
>> On Thursday 03 November 2016 02:34 AM, Anton Blanchard wrote:
>>> Hi Ravi,
>>>
>>>> emulate_step() uses a number of underlying kernel functions that were
>>>> initially not enabled for LE. This has been rectified since. So, fix
>>>> emulate_step() for LE for the corresponding instructions.
>>> Thanks. Should this be queued up for stable?
>> Thanks Anton. Yes, this should go in stable.
> It's fairly big for stable. Does it fix an actual bug? If so what, and
> how bad is it, what's the user impact.

Hi Michael,

Yes, kernel-space hw-breakpoint feature is broken on LE without this.

Actually, there is no specific commit that introduced this. Back
in 2010, Paul Mackerras has added emulation support for load/store
instructions for BE. hw-breakpoint was also develpoed by K.Prasad
in the same timeframe.

Kernel-space hw-breakpoint emulates causative instruction before
notifying to user. As emulate_step is never enabled for LE, kernel-
space hw-breakpoint is always broken on LE.

-Ravi

> Can you also pinpoint which commit it "fixes"?
>
> cheers
>



Re: [PATCH 1/3] powerpc: Emulation support for load/store instructions on LE

2016-11-02 Thread Ravi Bangoria


On Thursday 03 November 2016 02:34 AM, Anton Blanchard wrote:
> Hi Ravi,
>
>> emulate_step() uses a number of underlying kernel functions that were
>> initially not enabled for LE. This has been rectified since. So, fix
>> emulate_step() for LE for the corresponding instructions.
> Thanks. Should this be queued up for stable?

Thanks Anton. Yes, this should go in stable.

-Ravi



Re: [PATCH 1/3] powerpc: Emulation support for load/store instructions on LE

2016-11-03 Thread Ravi Bangoria


On Friday 04 November 2016 07:37 AM, Andrew Donnellan wrote:
> On 03/11/16 21:27, Ravi Bangoria wrote:
>> Yes, kernel-space hw-breakpoint feature is broken on LE without this.
>
> Is there any actual user-visible feature that depends on this, or is this 
> solely for debugging and development purposes?
>
> It would of course be *nice* to have it in stable trees (particularly so we 
> pick it up in distros) but I'm not convinced that enabling HW breakpoints on 
> a platform where it has *never* worked qualifies as an "actual bug".
>
> (BTW many thanks for fixing this - I had a shot at it late last year but 
> never quite got there!)

Thanks Andrew,

kprobe, uprobe, hw-breakpoint and xmon are the only user of emulate_step.

Kprobe / uprobe single-steps instruction if they can't emulate it, so there
is no problem with them. As I mention, hw-breakpoint is broken. However
I'm not sure about xmon, I need to check that.

So yes, there is no user-visible feature that depends on this.

-Ravi



[PATCH 0/3] powerpc: Emulation support for load/store instructions on LE

2016-11-02 Thread Ravi Bangoria
emulate_step is the basic infrastructure which is used by number of other
kernel infrastructures like kprobe, hw-breakpoint(data breakpoint) etc.
In case of kprobe, enabling emulation of load/store instructions will
speedup the execution of probed instruction. In case of kernel-space
breakpoint, causative instruction is first get emulated before executing
user registered handler. If emulation fails, hw-breakpoint is disabled
with error. As emulate_step does not support load/store instructions on
LE, kernel-space hw-breakpoint infrastructure is broken on LE.

emulate_step() uses a number of underlying kernel functions that were
initially not enabled for LE. This has been rectified since. So, fix
emulate_step() for LE for the corresponding instructions.

Also add selftest which will run at boot if CONFIG_KPROBES_SANITY_TEST
and CONFIG_PPC64 is set.

Changes w.r.t. RFC:
  - Enable emulation support for all types of (Normal, Floating Point,
Vector and Vector Scalar) load/store instructions.
  - Introduce selftest to test emulate_step for load/store instructions.

Ravi Bangoria (3):
  powerpc: Emulation support for load/store instructions on LE
  powerpc: Add encoding for couple of load/store instructions
  powerpc: emulate_step test for load/store instructions

 arch/powerpc/include/asm/ppc-opcode.h |   7 +
 arch/powerpc/include/asm/sstep.h  |   8 +
 arch/powerpc/kernel/kprobes.c |   2 +
 arch/powerpc/lib/Makefile |   4 +
 arch/powerpc/lib/sstep.c  |  20 --
 arch/powerpc/lib/test_emulate_step.c  | 439 ++
 6 files changed, 460 insertions(+), 20 deletions(-)
 create mode 100644 arch/powerpc/lib/test_emulate_step.c

-- 
1.8.3.1



[PATCH 1/3] powerpc: Emulation support for load/store instructions on LE

2016-11-02 Thread Ravi Bangoria
emulate_step() uses a number of underlying kernel functions that were
initially not enabled for LE. This has been rectified since. So, fix
emulate_step() for LE for the corresponding instructions.

Reported-by: Anton Blanchard <an...@samba.org>
Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
---
 arch/powerpc/lib/sstep.c | 20 
 1 file changed, 20 deletions(-)

diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index 3362299..6ca3b90 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -1807,8 +1807,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned 
int instr)
goto instr_done;
 
case LARX:
-   if (regs->msr & MSR_LE)
-   return 0;
if (op.ea & (size - 1))
break;  /* can't handle misaligned */
err = -EFAULT;
@@ -1832,8 +1830,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned 
int instr)
goto ldst_done;
 
case STCX:
-   if (regs->msr & MSR_LE)
-   return 0;
if (op.ea & (size - 1))
break;  /* can't handle misaligned */
err = -EFAULT;
@@ -1859,8 +1855,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned 
int instr)
goto ldst_done;
 
case LOAD:
-   if (regs->msr & MSR_LE)
-   return 0;
err = read_mem(>gpr[op.reg], op.ea, size, regs);
if (!err) {
if (op.type & SIGNEXT)
@@ -1872,8 +1866,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned 
int instr)
 
 #ifdef CONFIG_PPC_FPU
case LOAD_FP:
-   if (regs->msr & MSR_LE)
-   return 0;
if (size == 4)
err = do_fp_load(op.reg, do_lfs, op.ea, size, regs);
else
@@ -1882,15 +1874,11 @@ int __kprobes emulate_step(struct pt_regs *regs, 
unsigned int instr)
 #endif
 #ifdef CONFIG_ALTIVEC
case LOAD_VMX:
-   if (regs->msr & MSR_LE)
-   return 0;
err = do_vec_load(op.reg, do_lvx, op.ea & ~0xfUL, regs);
goto ldst_done;
 #endif
 #ifdef CONFIG_VSX
case LOAD_VSX:
-   if (regs->msr & MSR_LE)
-   return 0;
err = do_vsx_load(op.reg, do_lxvd2x, op.ea, regs);
goto ldst_done;
 #endif
@@ -1913,8 +1901,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned 
int instr)
goto instr_done;
 
case STORE:
-   if (regs->msr & MSR_LE)
-   return 0;
if ((op.type & UPDATE) && size == sizeof(long) &&
op.reg == 1 && op.update_reg == 1 &&
!(regs->msr & MSR_PR) &&
@@ -1927,8 +1913,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned 
int instr)
 
 #ifdef CONFIG_PPC_FPU
case STORE_FP:
-   if (regs->msr & MSR_LE)
-   return 0;
if (size == 4)
err = do_fp_store(op.reg, do_stfs, op.ea, size, regs);
else
@@ -1937,15 +1921,11 @@ int __kprobes emulate_step(struct pt_regs *regs, 
unsigned int instr)
 #endif
 #ifdef CONFIG_ALTIVEC
case STORE_VMX:
-   if (regs->msr & MSR_LE)
-   return 0;
err = do_vec_store(op.reg, do_stvx, op.ea & ~0xfUL, regs);
goto ldst_done;
 #endif
 #ifdef CONFIG_VSX
case STORE_VSX:
-   if (regs->msr & MSR_LE)
-   return 0;
err = do_vsx_store(op.reg, do_stxvd2x, op.ea, regs);
goto ldst_done;
 #endif
-- 
1.8.3.1



[PATCH 2/3] powerpc: Add encoding for couple of load/store instructions

2016-11-02 Thread Ravi Bangoria
These encodings will be used in subsequent patch that test
emulate_step for load/store instructions.

Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/ppc-opcode.h | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/arch/powerpc/include/asm/ppc-opcode.h 
b/arch/powerpc/include/asm/ppc-opcode.h
index 0132831..a17a09a 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -284,6 +284,13 @@
 #define PPC_INST_BRANCH_COND   0x4080
 #define PPC_INST_LBZCIX0x7c0006aa
 #define PPC_INST_STBCIX0x7c0007aa
+#define PPC_INST_LWZX  0x7c2e
+#define PPC_INST_LFSX  0x7c00042e
+#define PPC_INST_STFSX 0x7c00052e
+#define PPC_INST_LFDX  0x7c0004ae
+#define PPC_INST_STFDX 0x7c0005ae
+#define PPC_INST_LVX   0x7cce
+#define PPC_INST_STVX  0x7c0001ce
 
 /* macros to insert fields into opcodes */
 #define ___PPC_RA(a)   (((a) & 0x1f) << 16)
-- 
1.8.3.1



[PATCH 3/3] powerpc: emulate_step test for load/store instructions

2016-11-02 Thread Ravi Bangoria
Add new selftest that test emulate_step for Normal, Floating Point,
Vector and Vector Scalar - load/store instructions. Test should run
at boot time if CONFIG_KPROBES_SANITY_TEST and CONFIG_PPC64 is set.

Sample log:

  [0.762063] emulate_step smoke test: start.
  [0.762219] emulate_step smoke test: ld : PASS
  [0.762434] emulate_step smoke test: lwz: PASS
  [0.762653] emulate_step smoke test: lwzx   : PASS
  [0.762867] emulate_step smoke test: std: PASS
  [0.763082] emulate_step smoke test: ldarx / stdcx. : PASS
  [0.763302] emulate_step smoke test: lfsx   : PASS
  [0.763514] emulate_step smoke test: stfsx  : PASS
  [0.763727] emulate_step smoke test: lfdx   : PASS
  [0.763942] emulate_step smoke test: stfdx  : PASS
  [0.764134] emulate_step smoke test: lvx: PASS
  [0.764349] emulate_step smoke test: stvx   : PASS
  [0.764575] emulate_step smoke test: lxvd2x : PASS
  [0.764788] emulate_step smoke test: stxvd2x: PASS
  [0.764997] emulate_step smoke test: complete.

Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
---
 arch/powerpc/include/asm/sstep.h |   8 +
 arch/powerpc/kernel/kprobes.c|   2 +
 arch/powerpc/lib/Makefile|   4 +
 arch/powerpc/lib/test_emulate_step.c | 439 +++
 4 files changed, 453 insertions(+)
 create mode 100644 arch/powerpc/lib/test_emulate_step.c

diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h
index d3a42cc..d6d3630 100644
--- a/arch/powerpc/include/asm/sstep.h
+++ b/arch/powerpc/include/asm/sstep.h
@@ -87,3 +87,11 @@ struct instruction_op {
 
 extern int analyse_instr(struct instruction_op *op, struct pt_regs *regs,
 unsigned int instr);
+
+#if defined(CONFIG_KPROBES_SANITY_TEST) && defined(CONFIG_PPC64)
+void test_emulate_step(void);
+#else
+static inline void test_emulate_step(void)
+{
+}
+#endif
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index e785cc9..01d8002 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -544,6 +544,8 @@ int __kprobes longjmp_break_handler(struct kprobe *p, 
struct pt_regs *regs)
 
 int __init arch_init_kprobes(void)
 {
+   test_emulate_step();
+
return register_kprobe(_p);
 }
 
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 309361e8..7d046ca 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -35,3 +35,7 @@ obj-$(CONFIG_ALTIVEC) += xor_vmx.o
 CFLAGS_xor_vmx.o += -maltivec $(call cc-option,-mabi=altivec)
 
 obj-$(CONFIG_PPC64) += $(obj64-y)
+
+ifeq ($(CONFIG_PPC64), y)
+obj-$(CONFIG_KPROBES_SANITY_TEST) += test_emulate_step.o
+endif
diff --git a/arch/powerpc/lib/test_emulate_step.c 
b/arch/powerpc/lib/test_emulate_step.c
new file mode 100644
index 000..887d1db
--- /dev/null
+++ b/arch/powerpc/lib/test_emulate_step.c
@@ -0,0 +1,439 @@
+/*
+ * test_emulate_step.c - simple sanity test for emulate_step load/store
+ *  instructions
+ *
+ * Copyright IBM Corp. 2016
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
+ * the GNU General Public License for more details.
+ */
+
+#define pr_fmt(fmt) "emulate_step smoke test: " fmt
+
+#include 
+#include 
+#include 
+
+#define IMM_L(i)   ((uintptr_t)(i) & 0x)
+
+/*
+ * Defined with TEST_ prefix so it does not conflict with other
+ * definitions.
+ */
+#define TEST_LD(r, base, i)(PPC_INST_LD | ___PPC_RT(r) |   \
+   ___PPC_RA(base) | IMM_L(i))
+#define TEST_LWZ(r, base, i)   (PPC_INST_LWZ | ___PPC_RT(r) |  \
+   ___PPC_RA(base) | IMM_L(i))
+#define TEST_LWZX(t, a, b) (PPC_INST_LWZX | ___PPC_RT(t) | \
+   ___PPC_RA(a) | ___PPC_RB(b))
+#define TEST_STD(r, base, i)   (PPC_INST_STD | ___PPC_RS(r) |  \
+   ___PPC_RA(base) | ((i) & 0xfffc))
+#define TEST_LDARX(t, a, b, eh)(PPC_INST_LDARX | ___PPC_RT(t) |
\
+   ___PPC_RA(a) | ___PPC_RB(b) |   \
+   __PPC_EH(eh))
+#define TEST_STDCX(s, a, b)(PPC_INST_STDCX | ___PPC_RS(s) |\
+   ___PPC_RA(a) | ___PPC_RB(b))
+#define TEST_LFSX(t, a, b) 

Re: [PATCH v7 6/6] perf annotate: cross arch annotate support fixes for ARM

2016-10-10 Thread Ravi Bangoria


On Wednesday 05 October 2016 05:04 PM, Arnaldo Carvalho de Melo wrote:
> Em Wed, Sep 21, 2016 at 09:17:56PM +0530, Ravi Bangoria escreveu:
>> From: Kim Phillips <kim.phill...@arm.com>
>>
>> For ARM we remove the list that contains non-arm insns, and
>> instead add more maintainable branch instruction regex logic.
> This one looks ok and actually is in the direction of having facilities
> for all arches, should've come as infrastructure that then gets used by
> ARM and powerpc.

This was authored by Kim and I didn't wanted to change that so I kept it
at the end.

I'm sending a cleanup patch that applies on top of this series. That patch
moves most of arch specific stuff from util/annotate.c to
util/annotate/.c. Please review it.

Please pull this series if you are ok with that patch. Otherwise I'll respin
entire series.

Thanks
-Ravi



Re: [PATCH v8 1/3] perf annotate: Show raw form for jump instruction with indirect target

2016-12-13 Thread Ravi Bangoria
Hi Arnaldo,

Can you please review 2nd and 3rd patch.

-Ravi

On Monday 05 December 2016 09:26 PM, Ravi Bangoria wrote:
> For jump instructions that does not include target address as direct
> operand, show the original disassembled line for them. This is needed
> for certain powerpc jump instructions that use target address in a
> register (such as bctr, btar, ...).
>
> Before:
>  ld r12,32088(r12)
>  mtctr  r12
>   v  bctr   ca2c
>  stdr2,24(r1)
>  addis  r12,r2,-1
>
> After:
>  ld r12,32088(r12)
>  mtctr  r12
>   v  bctr
>  stdr2,24(r1)
>  addis  r12,r2,-1
>
> Suggested-by: Michael Ellerman <m...@ellerman.id.au>
> Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
> ---
> Changes in v8:
>   - v7: https://lkml.org/lkml/2016/9/21/436
>   - Rebase to acme/perf/core
>   - No logical changes. (Cross arch annotate patches are in. This patch
> is for hardening annotate for powerpc.)
>
>  tools/perf/util/annotate.c | 3 +++
>  1 file changed, 3 insertions(+)
>
> diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
> index 4012b1d..ea7e0de 100644
> --- a/tools/perf/util/annotate.c
> +++ b/tools/perf/util/annotate.c
> @@ -237,6 +237,9 @@ static int jump__parse(struct arch *arch __maybe_unused, 
> struct ins_operands *op
>  static int jump__scnprintf(struct ins *ins, char *bf, size_t size,
>  struct ins_operands *ops)
>  {
> + if (!ops->target.addr)
> + return ins__raw_scnprintf(ins, bf, size, ops);
> +
>   return scnprintf(bf, size, "%-6.6s %" PRIx64, ins->name, 
> ops->target.offset);
>  }
>



Re: [PATCH v8 2/3] perf annotate: Support jump instruction with target as second operand

2016-12-05 Thread Ravi Bangoria
Hi Arnaldo,

Hmm, so it's difficult to find example of this when we use debuginfo.
Because...

Jump__parse tries to look for two things 'offset' and 'target address'.

objdump with debuginfo will include offset in assembly f.e. annotate of
'smp_call_function_single' with perf.data and vmlinux I shared.

   │c016d6ac:   cmpwi  cr7,r9,0 
   ▒
   │c016d6b0: ↑ bnecr7,c016d59c 
<.smp_call_function_single+0x8c>   ▒
   │c016d6b4:   addis  r10,r2,-15   
   ▒

objdump of same function with kcore.

   │c016d6ac:   cmpwi  cr7,r9,0 
   ▒
   │c016d6b0: ↓ bnecr7,0xc016d59c   
   ▒
   │c016d6b4:   addis  r10,r2,-15   
   ▒

Annotating in first case won't show any issue because we directly get
offset. But in this case as well, we are parsing wrong target address
in ops->target.addr

While we don't have offset in second case, we use target address to
find it. And thus it shows wrong o/p something like:

   │   cmpwi  cr7,r9,0  
   ▒
   │ ↓ bne3fe92afc  
   ▒
   │   addis  r10,r2,-15
   ▒

BTW, we have lot of such instructions in kernel.

Thanks,
-Ravi


On Monday 05 December 2016 09:26 PM, Ravi Bangoria wrote:
> Arch like powerpc has jump instructions that includes target address
> as second operand. For example, 'bne  cr7,0xc00f6154'. Add
> support for such instruction in perf annotate.
>
> objdump o/p:
>   c00f6140:   ld r9,1032(r31)
>   c00f6144:   cmpdi  cr7,r9,0
>   c00f6148:   bnecr7,0xc00f6154
>   c00f614c:   ld r9,2312(r30)
>   c00f6150:   stdr9,1032(r31)
>   c00f6154:   ld r9,88(r31)
>
> Corresponding perf annotate o/p:
>
> Before patch:
>  ld r9,1032(r31)
>  cmpdi  cr7,r9,0
>   v  bne3ff09f2c
>  ld r9,2312(r30)
>  stdr9,1032(r31)
>   74:ld r9,88(r31)
>
> After patch:
>  ld r9,1032(r31)
>  cmpdi  cr7,r9,0
>   v  bne74
>  ld r9,2312(r30)
>  stdr9,1032(r31)
>   74:ld r9,88(r31)
>
> Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
> ---
> Changes in v8:
>   - v7: https://lkml.org/lkml/2016/9/21/436
>   - Rebase to acme/perf/core
>   - Little change in patch description.
>   - No logical changes. (Cross arch annotate patches are in. This patch
> is for hardening annotate for powerpc.)
>
>  tools/perf/util/annotate.c | 6 +-
>  1 file changed, 5 insertions(+), 1 deletion(-)
>
> diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
> index ea7e0de..590244e 100644
> --- a/tools/perf/util/annotate.c
> +++ b/tools/perf/util/annotate.c
> @@ -223,8 +223,12 @@ bool ins__is_call(const struct ins *ins)
>  static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands 
> *ops, struct map *map __maybe_unused)
>  {
>   const char *s = strchr(ops->raw, '+');
> + const char *c = strchr(ops->raw, ',');
>
> - ops->target.addr = strtoull(ops->raw, NULL, 16);
> + if (c++ != NULL)
> + ops->target.addr = strtoull(c, NULL, 16);
> + else
> + ops->target.addr = strtoull(ops->raw, NULL, 16);
>
>   if (s++ != NULL)
>   ops->target.offset = strtoull(s, NULL, 16);



[PATCH v8 2/3] perf annotate: Support jump instruction with target as second operand

2016-12-05 Thread Ravi Bangoria
Arch like powerpc has jump instructions that includes target address
as second operand. For example, 'bne  cr7,0xc00f6154'. Add
support for such instruction in perf annotate.

objdump o/p:
  c00f6140:   ld r9,1032(r31)
  c00f6144:   cmpdi  cr7,r9,0
  c00f6148:   bnecr7,0xc00f6154
  c00f614c:   ld r9,2312(r30)
  c00f6150:   stdr9,1032(r31)
  c00f6154:   ld r9,88(r31)

Corresponding perf annotate o/p:

Before patch:
 ld r9,1032(r31)
 cmpdi  cr7,r9,0
  v  bne3ff09f2c
 ld r9,2312(r30)
 stdr9,1032(r31)
  74:ld r9,88(r31)

After patch:
 ld r9,1032(r31)
 cmpdi  cr7,r9,0
  v  bne74
 ld r9,2312(r30)
 stdr9,1032(r31)
  74:ld r9,88(r31)

Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
---
Changes in v8:
  - v7: https://lkml.org/lkml/2016/9/21/436
  - Rebase to acme/perf/core
  - Little change in patch description.
  - No logical changes. (Cross arch annotate patches are in. This patch
is for hardening annotate for powerpc.)

 tools/perf/util/annotate.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index ea7e0de..590244e 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -223,8 +223,12 @@ bool ins__is_call(const struct ins *ins)
 static int jump__parse(struct arch *arch __maybe_unused, struct ins_operands 
*ops, struct map *map __maybe_unused)
 {
const char *s = strchr(ops->raw, '+');
+   const char *c = strchr(ops->raw, ',');
 
-   ops->target.addr = strtoull(ops->raw, NULL, 16);
+   if (c++ != NULL)
+   ops->target.addr = strtoull(c, NULL, 16);
+   else
+   ops->target.addr = strtoull(ops->raw, NULL, 16);
 
if (s++ != NULL)
ops->target.offset = strtoull(s, NULL, 16);
-- 
2.4.11



[PATCH v8 3/3] perf annotate: Fix jump target outside of function address range

2016-12-05 Thread Ravi Bangoria
If jump target is outside of function range, perf is not handling it
correctly. Especially when target address is lesser than function start
address, target offset will be negative. But, target address declared
to be unsigned, converts negative number into 2's complement. See below
example. Here target of 'jumpq' instruction at 34cf8 is 34ac0 which is
lesser than function start address(34cf0).

34ac0 - 34cf0 = -0x230 = 0xfdd0

Objdump output:

  00034cf0 <__sigaction>:
  __GI___sigaction():
34cf0: lea-0x20(%rdi),%eax
34cf3: cmp-bashx1,%eax
34cf6: jbe34d00 <__sigaction+0x10>
34cf8: jmpq   34ac0 <__GI___libc_sigaction>
34cfd: nopl   (%rax)
34d00: mov0x386161(%rip),%rax# 3bae68 <_DYNAMIC+0x2e8>
34d07: movl   -bashx16,%fs:(%rax)
34d0e: mov-bashx,%eax
34d13: retq

perf annotate before applying patch:

  __GI___sigaction  /usr/lib64/libc-2.22.so
   lea-0x20(%rdi),%eax
   cmp-bashx1,%eax
v  jbe10
v  jmpq   fdd0
   nop
10:mov_DYNAMIC+0x2e8,%rax
   movl   -bashx16,%fs:(%rax)
   mov-bashx,%eax
   retq

perf annotate after applying patch:

  __GI___sigaction  /usr/lib64/libc-2.22.so
   lea-0x20(%rdi),%eax
   cmp-bashx1,%eax
v  jbe10
^  jmpq   34ac0 <__GI___libc_sigaction>
   nop
10:mov_DYNAMIC+0x2e8,%rax
   movl   -bashx16,%fs:(%rax)
   mov-bashx,%eax
   retq

Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
---
Changes in v8:
  - v7: https://lkml.org/lkml/2016/9/21/436
  - Rebased to acme/perf/core.
  - No logical changes. (Cross arch annotate patches are in. This patch
is for hardening annotate.)

tools/perf/ui/browsers/annotate.c |  5 +++--
 tools/perf/util/annotate.c| 14 +-
 tools/perf/util/annotate.h|  5 +++--
 3 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/tools/perf/ui/browsers/annotate.c 
b/tools/perf/ui/browsers/annotate.c
index ec7a30f..ba36aac 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -215,7 +215,7 @@ static void annotate_browser__write(struct ui_browser 
*browser, void *entry, int
ui_browser__set_color(browser, color);
if (dl->ins.ops && dl->ins.ops->scnprintf) {
if (ins__is_jump(>ins)) {
-   bool fwd = dl->ops.target.offset > 
(u64)dl->offset;
+   bool fwd = dl->ops.target.offset > dl->offset;
 
ui_browser__write_graph(browser, fwd ? 
SLSMG_DARROW_CHAR :

SLSMG_UARROW_CHAR);
@@ -245,7 +245,8 @@ static bool disasm_line__is_valid_jump(struct disasm_line 
*dl, struct symbol *sy
 {
if (!dl || !dl->ins.ops || !ins__is_jump(>ins)
|| !disasm_line__has_offset(dl)
-   || dl->ops.target.offset >= symbol__size(sym))
+   || dl->ops.target.offset < 0
+   || dl->ops.target.offset >= (s64)symbol__size(sym))
return false;
 
return true;
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 590244e..c81a395 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -230,10 +230,12 @@ static int jump__parse(struct arch *arch __maybe_unused, 
struct ins_operands *op
else
ops->target.addr = strtoull(ops->raw, NULL, 16);
 
-   if (s++ != NULL)
+   if (s++ != NULL) {
ops->target.offset = strtoull(s, NULL, 16);
-   else
-   ops->target.offset = UINT64_MAX;
+   ops->target.offset_avail = true;
+   } else {
+   ops->target.offset_avail = false;
+   }
 
return 0;
 }
@@ -241,7 +243,7 @@ static int jump__parse(struct arch *arch __maybe_unused, 
struct ins_operands *op
 static int jump__scnprintf(struct ins *ins, char *bf, size_t size,
   struct ins_operands *ops)
 {
-   if (!ops->target.addr)
+   if (!ops->target.addr || ops->target.offset < 0)
return ins__raw_scnprintf(ins, bf, size, ops);
 
return scnprintf(bf, size, "%-6.6s %" PRIx64, ins->name, 
ops->target.offset);
@@ -1209,9 +1211,11 @@ static int symbol__parse_objdump_line(struct symbol 
*sym, struct map *map,
if (dl == NULL)
return -1;
 
-   if (dl->ops.target.offset == UINT64_MAX)
+   if (!disasm_line__has_offset(dl)) {
dl->ops.target.offset = dl->ops.target.addr -
map__rip_2objdump(map, sym->start);
+   dl->o

[PATCH v8 1/3] perf annotate: Show raw form for jump instruction with indirect target

2016-12-05 Thread Ravi Bangoria
For jump instructions that does not include target address as direct
operand, show the original disassembled line for them. This is needed
for certain powerpc jump instructions that use target address in a
register (such as bctr, btar, ...).

Before:
 ld r12,32088(r12)
 mtctr  r12
  v  bctr   ca2c
 stdr2,24(r1)
 addis  r12,r2,-1

After:
 ld r12,32088(r12)
 mtctr  r12
  v  bctr
 stdr2,24(r1)
 addis  r12,r2,-1

Suggested-by: Michael Ellerman <m...@ellerman.id.au>
Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
---
Changes in v8:
  - v7: https://lkml.org/lkml/2016/9/21/436
  - Rebase to acme/perf/core
  - No logical changes. (Cross arch annotate patches are in. This patch
is for hardening annotate for powerpc.)

 tools/perf/util/annotate.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 4012b1d..ea7e0de 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -237,6 +237,9 @@ static int jump__parse(struct arch *arch __maybe_unused, 
struct ins_operands *op
 static int jump__scnprintf(struct ins *ins, char *bf, size_t size,
   struct ins_operands *ops)
 {
+   if (!ops->target.addr)
+   return ins__raw_scnprintf(ins, bf, size, ops);
+
return scnprintf(bf, size, "%-6.6s %" PRIx64, ins->name, 
ops->target.offset);
 }
 
-- 
2.4.11



Re: [PATCH] perf TUI: Don't throw error for zero length symbols

2016-12-16 Thread Ravi Bangoria
Hi Arnaldo,

Can you please pick this up if it looks good?

-Ravi

On Tuesday 22 November 2016 02:10 PM, Ravi Bangoria wrote:
> perf report (with TUI) exits with error when it finds a sample of zero
> length symbol(i.e. addr == sym->start == sym->end). Actually these are
> valid samples. Don't exit TUI and show report with such symbols.
>
> Link: https://lkml.org/lkml/2016/10/8/189
>
> Reported-by: Anton Blanchard <an...@samba.org>
> Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
> ---
>  tools/perf/util/annotate.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
> index aeb5a44..430d039 100644
> --- a/tools/perf/util/annotate.c
> +++ b/tools/perf/util/annotate.c
> @@ -593,7 +593,8 @@ static int __symbol__inc_addr_samples(struct symbol *sym, 
> struct map *map,
>
>   pr_debug3("%s: addr=%#" PRIx64 "\n", __func__, map->unmap_ip(map, 
> addr));
>
> - if (addr < sym->start || addr >= sym->end) {
> + if ((addr < sym->start || addr >= sym->end) &&
> + (addr != sym->end || sym->start != sym->end)) {
>   pr_debug("%s(%d): ERANGE! sym->name=%s, start=%#" PRIx64 ", 
> addr=%#" PRIx64 ", end=%#" PRIx64 "\n",
>  __func__, __LINE__, sym->name, sym->start, addr, 
> sym->end);
>   return -ERANGE;



[PATCH] ppc64/kprobe: Fix oops when kprobed on 'stdu' instruction

2017-04-10 Thread Ravi Bangoria
If we set a kprobe on a 'stdu' instruction on powerpc64, we see a kernel 
OOPS:

  [ 1275.165932] Bad kernel stack pointer cd93c840 at c0009868
  [ 1275.166378] Oops: Bad kernel stack pointer, sig: 6 [#1]
  ...
  GPR00: c01fcd93cb30 cd93c840 c15c5e00 cd93c840
  ...
  [ 1275.178305] NIP [c0009868] resume_kernel+0x2c/0x58
  [ 1275.178594] LR [c0006208] program_check_common+0x108/0x180

Basically, on 64 bit system, when user probes on 'stdu' instruction,
kernel does not emulate actual store in emulate_step itself because it
may corrupt exception frame. So kernel does actual store operation in
exception return code i.e. resume_kernel().

resume_kernel() loads the saved stack pointer from memory using lwz,
effectively loading a corrupt (32bit) address, causing the kernel crash.

Fix this by loading the 64bit value instead.

Fixes: 8e9f69371536 ("powerpc/kprobe: Don't emulate store when kprobe stwu r1")
Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
---
History:
  Commit 8e9f69371536 ("powerpc/kprobe: Don't emulate store when kprobe
  stwu r1") fixed exception frame corruption for 32 bit system which uses
  'stwu' instruction for stack frame allocation. This commit also added
  code for 64 bit system but did not enabled it for 'stdu' instruction.
  So 'stdu' instruction on 64 bit machine was emulating actual store in
  emulate_step() itself until...

  Commit be96f63375a1 ("powerpc: Split out instruction analysis part of
  emulate_step()"), enabled it for 'stdu' instruction on 64 bit machine.

  Since then it's broken. So this should also go into stable.

 arch/powerpc/kernel/entry_64.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 6432d4b..530f6e9 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -689,7 +689,7 @@ resume_kernel:
 
addir8,r1,INT_FRAME_SIZE/* Get the kprobed function entry */
 
-   lwz r3,GPR1(r1)
+   ld  r3,GPR1(r1)
subir3,r3,INT_FRAME_SIZE/* dst: Allocate a trampoline exception 
frame */
mr  r4,r1   /* src:  current exception frame */
mr  r1,r3   /* Reroute the trampoline frame to r1 */
@@ -704,7 +704,7 @@ resume_kernel:
bdnz2b
 
/* Do real store operation to complete stwu */
-   lwz r5,GPR1(r1)
+   ld  r5,GPR1(r1)
std r8,0(r5)
 
/* Clear _TIF_EMULATE_STACK_STORE flag */
-- 
1.9.3



[PATCH v2] ppc64/kprobe: Fix oops when kprobed on 'stdu' instruction

2017-04-10 Thread Ravi Bangoria
If we set a kprobe on a 'stdu' instruction on powerpc64, we see a kernel 
OOPS:

  [ 1275.165932] Bad kernel stack pointer cd93c840 at c0009868
  [ 1275.166378] Oops: Bad kernel stack pointer, sig: 6 [#1]
  ...
  GPR00: c01fcd93cb30 cd93c840 c15c5e00 cd93c840
  ...
  [ 1275.178305] NIP [c0009868] resume_kernel+0x2c/0x58
  [ 1275.178594] LR [c0006208] program_check_common+0x108/0x180

Basically, on 64 bit system, when user probes on 'stdu' instruction,
kernel does not emulate actual store in emulate_step itself because it
may corrupt exception frame. So kernel does actual store operation in
exception return code i.e. resume_kernel().

resume_kernel() loads the saved stack pointer from memory using lwz,
effectively loading a corrupt (32bit) address, causing the kernel crash.

Fix this by loading the 64bit value instead.

Fixes: be96f63375a1 ("powerpc: Split out instruction analysis part of 
emulate_step()") 
Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
Reviewed-by: Naveen N. Rao <naveen.n@linux.vnet.ibm.com> 
---
History:
  Commit 8e9f69371536 ("powerpc/kprobe: Don't emulate store when kprobe
  stwu r1") fixed exception frame corruption for 32 bit system which uses
  'stwu' instruction for stack frame allocation. This commit also added
  code for 64 bit system but did not enabled it for 'stdu' instruction.
  So 'stdu' instruction on 64 bit machine was emulating actual store in
  emulate_step() itself until...

  Commit be96f63375a1 ("powerpc: Split out instruction analysis part of
  emulate_step()"), enabled it for 'stdu' instruction on 64 bit machine.

  So kprobe on 'stdu' has always been broken on powerpc64.  We haven't
  noticed since most stdu operations were probably landing in the red
  zone so the exception frame never got corrupted. In that sense, this
  fix is needed for BE ever since load/store emulation was added.

  For LE, this is only getting exposed now due to my recent patch to
  enable load/store emulation on LE, which got merged as commit
  e148bd17f48b ("powerpc: Emulation support for load/store instructions
  on LE").

  Please mark this for stable as well.

Changes in v2:
  - Replace 'stwu' with 'stdu' in the comment.

 arch/powerpc/kernel/entry_64.S | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 6432d4b..767ef6d 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -689,7 +689,7 @@ resume_kernel:
 
addir8,r1,INT_FRAME_SIZE/* Get the kprobed function entry */
 
-   lwz r3,GPR1(r1)
+   ld  r3,GPR1(r1)
subir3,r3,INT_FRAME_SIZE/* dst: Allocate a trampoline exception 
frame */
mr  r4,r1   /* src:  current exception frame */
mr  r1,r3   /* Reroute the trampoline frame to r1 */
@@ -703,8 +703,8 @@ resume_kernel:
addir6,r6,8
bdnz2b
 
-   /* Do real store operation to complete stwu */
-   lwz r5,GPR1(r1)
+   /* Do real store operation to complete stdu */
+   ld  r5,GPR1(r1)
std r8,0(r5)
 
/* Clear _TIF_EMULATE_STACK_STORE flag */
-- 
1.9.3



Re: [PATCH v2] ppc64/kprobe: Fix oops when kprobed on 'stdu' instruction

2017-04-11 Thread Ravi Bangoria
Thanks Balbir for the review,

On Tuesday 11 April 2017 02:25 PM, Balbir Singh wrote:
> On Tue, 2017-04-11 at 10:38 +0530, Ravi Bangoria wrote:
>> If we set a kprobe on a 'stdu' instruction on powerpc64, we see a kernel 
>> OOPS:
>>
>>   [ 1275.165932] Bad kernel stack pointer cd93c840 at c0009868
>>   [ 1275.166378] Oops: Bad kernel stack pointer, sig: 6 [#1]
>>   ...
>>   GPR00: c01fcd93cb30 cd93c840 c15c5e00 cd93c840
>>   ...
>>   [ 1275.178305] NIP [c0009868] resume_kernel+0x2c/0x58
>>   [ 1275.178594] LR [c0006208] program_check_common+0x108/0x180
>>
>> Basically, on 64 bit system, when user probes on 'stdu' instruction,
>> kernel does not emulate actual store in emulate_step itself because it
>> may corrupt exception frame. So kernel does actual store operation in
>> exception return code i.e. resume_kernel().
>>
>> resume_kernel() loads the saved stack pointer from memory using lwz,
>> effectively loading a corrupt (32bit) address, causing the kernel crash.
>>
>> Fix this by loading the 64bit value instead.
>>
>> Fixes: be96f63375a1 ("powerpc: Split out instruction analysis part of 
>> emulate_step()") 
>> Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com>
>> Reviewed-by: Naveen N. Rao <naveen.n@linux.vnet.ibm.com> 
>> ---
> The patch looks correct to me from the description and code. I have not
> validated that the write to GPR1(r1) via store of r8 to 0(r5) is indeed 
> correct.
> I would assume r8 should contain regs->gpr[r1] with the updated ea that
> is written down to the GPR1(r1) which will be what we restore when we return
> from the exception.

emulate_step() updates regs->gpr[r1] with the new value. So,
regs->gpr[r1] and GPR(r1) both are same at resume_kernel.

At resume_kernel, r1 points to the exception frame. Address
of frame preceding exception frame gets loaded in r8 with:

addir8,r1,INT_FRAME_SIZE

Let me know if you need more details.

Ravi



  1   2   3   4   5   6   >