Re: [PATCH 05/61] perf tools: Introduce c2c_decode_stats function

2016-09-21 Thread Jiri Olsa
On Wed, Sep 21, 2016 at 11:16:26AM -0400, Don Zickus wrote:
> On Wed, Sep 21, 2016 at 11:18:29AM +0200, Jiri Olsa wrote:
> > On Wed, Sep 21, 2016 at 09:08:40AM +, Stanislav Ievlev wrote:
> > > Hi, Jiri!
> > > 
> > > Why are you not using unsigned integer for counters in c2c_stats 
> > > structure?
> > 
> > hi,
> > never really thought of that, because that's one of the original
> > patches I could take almost untouched.. so no real reason ;-)
> 
> Hi Jirka,
> 
> I can't recall the reason Dick and myself started that way.  I think it
> makes sense to use u32 here.  So I am fine with it. :-)

ok, will change

thanks,
jirka


Re: [PATCH 05/61] perf tools: Introduce c2c_decode_stats function

2016-09-21 Thread Don Zickus
On Wed, Sep 21, 2016 at 11:18:29AM +0200, Jiri Olsa wrote:
> On Wed, Sep 21, 2016 at 09:08:40AM +, Stanislav Ievlev wrote:
> > Hi, Jiri!
> > 
> > Why are you not using unsigned integer for counters in c2c_stats structure?
> 
> hi,
> never really thought of that, because that's one of the original
> patches I could take almost untouched.. so no real reason ;-)

Hi Jirka,

I can't recall the reason Dick and myself started that way.  I think it
makes sense to use u32 here.  So I am fine with it. :-)

Cheers,
Don

> 
> jirka
> 
> > 
> > On Mon, Sep 19, 2016 at 4:27 PM Jiri Olsa  wrote:
> > 
> > > Introducing c2c_decode_stats function, which decodes
> > > data_src data into new struct c2c_stats.
> > >
> > > +struct c2c_stats {
> > > +   int nr_entries;
> > > +
> > > +   int locks;   /* count of 'lock' transactions */
> > > +   int store;   /* count of all stores in trace */
> > > +   int st_uncache;  /* stores to uncacheable address */
> > > +   int st_noadrs;   /* cacheable store with no address */
> > > +   int st_l1hit;/* count of stores that hit L1D */
> > > +   int st_l1miss;   /* count of stores that miss L1D */
> > > +   int load;/* count of all loads in trace */
> > > +   int ld_excl; /* exclusive loads, rmt/lcl DRAM -
> > > snp none/miss */
> > > +   int ld_shared;   /* shared loads, rmt/lcl DRAM - snp
> > > hit */
> > > +   int ld_uncache;  /* loads to uncacheable address */
> > > +   int ld_io;   /* loads to io address */
> > > +   int ld_miss; /* loads miss */
> > > +   int ld_noadrs;   /* cacheable load with no address */
> > > +   int ld_fbhit;/* count of loads hitting Fill Buffer
> > > */
> > > +   int ld_l1hit;/* count of loads that hit L1D */
> > > +   int ld_l2hit;/* count of loads that hit L2D */
> > > +   int ld_llchit;   /* count of loads that hit LLC */
> > > +   int lcl_hitm;/* count of loads with local HITM  */
> > > +   int rmt_hitm;/* count of loads with remote HITM */
> > > +   int rmt_hit; /* count of loads with remote hit
> > > clean; */
> > > +   int lcl_dram;/* count of loads miss to local DRAM
> > > */
> > > +   int rmt_dram;/* count of loads miss to remote DRAM
> > > */
> > > +   int nomap;   /* count of load/stores with no phys
> > > adrs */
> > > +   int noparse; /* count of unparsable data sources 
> > > */
> > > +};
> > >
> > >


Re: [PATCH 05/61] perf tools: Introduce c2c_decode_stats function

2016-09-21 Thread Jiri Olsa
On Wed, Sep 21, 2016 at 09:08:40AM +, Stanislav Ievlev wrote:
> Hi, Jiri!
> 
> Why are you not using unsigned integer for counters in c2c_stats structure?

hi,
never really thought of that, because that's one of the original
patches I could take almost untouched.. so no real reason ;-)

jirka

> 
> On Mon, Sep 19, 2016 at 4:27 PM Jiri Olsa  wrote:
> 
> > Introducing c2c_decode_stats function, which decodes
> > data_src data into new struct c2c_stats.
> >
> > +struct c2c_stats {
> > +   int nr_entries;
> > +
> > +   int locks;   /* count of 'lock' transactions */
> > +   int store;   /* count of all stores in trace */
> > +   int st_uncache;  /* stores to uncacheable address */
> > +   int st_noadrs;   /* cacheable store with no address */
> > +   int st_l1hit;/* count of stores that hit L1D */
> > +   int st_l1miss;   /* count of stores that miss L1D */
> > +   int load;/* count of all loads in trace */
> > +   int ld_excl; /* exclusive loads, rmt/lcl DRAM -
> > snp none/miss */
> > +   int ld_shared;   /* shared loads, rmt/lcl DRAM - snp
> > hit */
> > +   int ld_uncache;  /* loads to uncacheable address */
> > +   int ld_io;   /* loads to io address */
> > +   int ld_miss; /* loads miss */
> > +   int ld_noadrs;   /* cacheable load with no address */
> > +   int ld_fbhit;/* count of loads hitting Fill Buffer
> > */
> > +   int ld_l1hit;/* count of loads that hit L1D */
> > +   int ld_l2hit;/* count of loads that hit L2D */
> > +   int ld_llchit;   /* count of loads that hit LLC */
> > +   int lcl_hitm;/* count of loads with local HITM  */
> > +   int rmt_hitm;/* count of loads with remote HITM */
> > +   int rmt_hit; /* count of loads with remote hit
> > clean; */
> > +   int lcl_dram;/* count of loads miss to local DRAM
> > */
> > +   int rmt_dram;/* count of loads miss to remote DRAM
> > */
> > +   int nomap;   /* count of load/stores with no phys
> > adrs */
> > +   int noparse; /* count of unparsable data sources */
> > +};
> >
> >


Re: [PATCH 05/61] perf tools: Introduce c2c_decode_stats function

2016-09-19 Thread Joe Mario

On 09/19/2016 01:15 PM, Nilay Vaish wrote:

On 19 September 2016 at 08:09, Jiri Olsa  wrote:

diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h
index 7f69bf9d789d..27c6bb5abafb 100644
--- a/tools/perf/util/mem-events.h
+++ b/tools/perf/util/mem-events.h
@@ -2,6 +2,10 @@
 #define __PERF_MEM_EVENTS_H

 #include 
+#include 
+#include 
+#include 
+#include "stat.h"

 struct perf_mem_event {
boolrecord;
@@ -33,4 +37,36 @@ int perf_mem__lck_scnprintf(char *out, size_t sz, struct 
mem_info *mem_info);

 int perf_script__meminfo_scnprintf(char *bf, size_t size, struct mem_info 
*mem_info);

+struct c2c_stats {
+   int nr_entries;
+
+   int locks;   /* count of 'lock' transactions */
+   int store;   /* count of all stores in trace */
+   int st_uncache;  /* stores to uncacheable address */
+   int st_noadrs;   /* cacheable store with no address */


No address! Why would that happen?


[Resending without the html]

There are a small number of instructions that will trigger a perf mem event and 
will have no address associated with them.Three of them include mfence, 
wrmsr, and rdtsc.I believe there are at least two more.




--
Nilay





Re: [PATCH 05/61] perf tools: Introduce c2c_decode_stats function

2016-09-19 Thread Nilay Vaish
On 19 September 2016 at 08:09, Jiri Olsa  wrote:
> diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h
> index 7f69bf9d789d..27c6bb5abafb 100644
> --- a/tools/perf/util/mem-events.h
> +++ b/tools/perf/util/mem-events.h
> @@ -2,6 +2,10 @@
>  #define __PERF_MEM_EVENTS_H
>
>  #include 
> +#include 
> +#include 
> +#include 
> +#include "stat.h"
>
>  struct perf_mem_event {
> boolrecord;
> @@ -33,4 +37,36 @@ int perf_mem__lck_scnprintf(char *out, size_t sz, struct 
> mem_info *mem_info);
>
>  int perf_script__meminfo_scnprintf(char *bf, size_t size, struct mem_info 
> *mem_info);
>
> +struct c2c_stats {
> +   int nr_entries;
> +
> +   int locks;   /* count of 'lock' transactions */
> +   int store;   /* count of all stores in trace */
> +   int st_uncache;  /* stores to uncacheable address */
> +   int st_noadrs;   /* cacheable store with no address */

No address! Why would that happen?


--
Nilay


[PATCH 05/61] perf tools: Introduce c2c_decode_stats function

2016-09-19 Thread Jiri Olsa
Introducing c2c_decode_stats function, which decodes
data_src data into new struct c2c_stats.

Original-patch-by: Dick Fowles 
Original-patch-by: Don Zickus 
Link: http://lkml.kernel.org/n/tip-7garqfmx5izaqysde9jik...@git.kernel.org
Signed-off-by: Jiri Olsa 
---
 tools/perf/util/mem-events.c | 98 
 tools/perf/util/mem-events.h | 36 
 2 files changed, 134 insertions(+)

diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
index bbc368e7d1e4..502fcee91973 100644
--- a/tools/perf/util/mem-events.c
+++ b/tools/perf/util/mem-events.c
@@ -9,6 +9,7 @@
 #include "mem-events.h"
 #include "debug.h"
 #include "symbol.h"
+#include "sort.h"
 
 unsigned int perf_mem_events__loads_ldlat = 30;
 
@@ -268,3 +269,100 @@ int perf_script__meminfo_scnprintf(char *out, size_t sz, 
struct mem_info *mem_in
 
return i;
 }
+
+int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi)
+{
+   union perf_mem_data_src *data_src = &mi->data_src;
+   u64 daddr  = mi->daddr.addr;
+   u64 op = data_src->mem_op;
+   u64 lvl= data_src->mem_lvl;
+   u64 snoop  = data_src->mem_snoop;
+   u64 lock   = data_src->mem_lock;
+   int err = 0;
+
+#define P(a, b) PERF_MEM_##a##_##b
+
+   stats->nr_entries++;
+
+   if (lock & P(LOCK, LOCKED)) stats->locks++;
+
+   if (op & P(OP, LOAD)) {
+   /* load */
+   stats->load++;
+
+   if (!daddr) {
+   stats->ld_noadrs++;
+   return -1;
+   }
+
+   if (lvl & P(LVL, HIT)) {
+   if (lvl & P(LVL, UNC)) stats->ld_uncache++;
+   if (lvl & P(LVL, IO))  stats->ld_io++;
+   if (lvl & P(LVL, LFB)) stats->ld_fbhit++;
+   if (lvl & P(LVL, L1 )) stats->ld_l1hit++;
+   if (lvl & P(LVL, L2 )) stats->ld_l2hit++;
+   if (lvl & P(LVL, L3 )) {
+   if (snoop & P(SNOOP, HITM))
+   stats->lcl_hitm++;
+   else
+   stats->ld_llchit++;
+   }
+
+   if (lvl & P(LVL, LOC_RAM)) {
+   stats->lcl_dram++;
+   if (snoop & P(SNOOP, HIT))
+   stats->ld_shared++;
+   else
+   stats->ld_excl++;
+   }
+
+   if ((lvl & P(LVL, REM_RAM1)) ||
+   (lvl & P(LVL, REM_RAM2))) {
+   stats->rmt_dram++;
+   if (snoop & P(SNOOP, HIT))
+   stats->ld_shared++;
+   else
+   stats->ld_excl++;
+   }
+   }
+
+   if ((lvl & P(LVL, REM_CCE1)) ||
+   (lvl & P(LVL, REM_CCE2))) {
+   if (snoop & P(SNOOP, HIT))
+   stats->rmt_hit++;
+   else if (snoop & P(SNOOP, HITM))
+   stats->rmt_hitm++;
+   }
+
+   if ((lvl & P(LVL, MISS)))
+   stats->ld_miss++;
+
+   } else if (op & P(OP, STORE)) {
+   /* store */
+   stats->store++;
+
+   if (!daddr) {
+   stats->st_noadrs++;
+   return -1;
+   }
+
+   if (lvl & P(LVL, HIT)) {
+   if (lvl & P(LVL, UNC)) stats->st_uncache++;
+   if (lvl & P(LVL, L1 )) stats->st_l1hit++;
+   }
+   if (lvl & P(LVL, MISS))
+   if (lvl & P(LVL, L1)) stats->st_l1miss++;
+   } else {
+   /* unparsable data_src? */
+   stats->noparse++;
+   return -1;
+   }
+
+   if (!mi->daddr.map || !mi->iaddr.map) {
+   stats->nomap++;
+   return -1;
+   }
+
+#undef P
+   return err;
+}
diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h
index 7f69bf9d789d..27c6bb5abafb 100644
--- a/tools/perf/util/mem-events.h
+++ b/tools/perf/util/mem-events.h
@@ -2,6 +2,10 @@
 #define __PERF_MEM_EVENTS_H
 
 #include 
+#include 
+#include 
+#include 
+#include "stat.h"
 
 struct perf_mem_event {
boolrecord;
@@ -33,4 +37,36 @@ int perf_mem__lck_scnprintf(char *out, size_t sz, struct 
mem_info *mem_info);
 
 int perf_script__meminfo_scnprintf(char *bf, size_t size, struct mem_info 
*mem_info);
 
+struct c2c_stats {
+   int nr_entries;
+
+   int locks;   /* count of 'lock' transactions */
+   int store;   /* count of all stores in trace */
+   in