[PATCH 3/6] perf, tools, stat: Support metrics in --per-core/socket mode

2016-02-26 Thread Andi Kleen
From: Andi Kleen 

Enable metrics printing in --per-core / --per-socket mode. We need
to save the shadow metrics in a unique place. Always use the first
CPU in the aggregation. Then use the same CPU to retrieve the
shadow value later.

Example output:

% perf stat --per-core -a ./BC1s

 Performance counter stats for 'system wide':

S0-C0   22966.020381  task-clock (msec) #2.004 
CPUs utilized(100.00%)
S0-C0   2 49  context-switches  #0.017 
K/sec(100.00%)
S0-C0   2  4  cpu-migrations#0.001 
K/sec(100.00%)
S0-C0   2467  page-faults   #0.157 
K/sec
S0-C0   2  4,599,061,773  cycles#1.551 
GHz  (100.00%)
S0-C0   2  9,755,886,883  instructions  #2.12  
insn per cycle   (100.00%)
S0-C0   2  1,906,272,125  branches  #  642.704 
M/sec(100.00%)
S0-C0   2 81,180,867  branch-misses #4.26% 
of all branches
S0-C1   22965.995373  task-clock (msec) #2.003 
CPUs utilized(100.00%)
S0-C1   2 62  context-switches  #0.021 
K/sec(100.00%)
S0-C1   2  8  cpu-migrations#0.003 
K/sec(100.00%)
S0-C1   2281  page-faults   #0.095 
K/sec
S0-C1   2  6,347,290  cycles#0.002 
GHz  (100.00%)
S0-C1   2  4,654,156  instructions  #0.73  
insn per cycle   (100.00%)
S0-C1   2947,121  branches  #0.319 
M/sec(100.00%)
S0-C1   2 37,322  branch-misses #3.94% 
of all branches

   1.480409747 seconds time elapsed

v2: Rebase to older patches
v3: Document shadow cpus. Fix aggr_get_id argument. Fix -A shadows (Jiri)
Signed-off-by: Andi Kleen 
---
 tools/perf/builtin-stat.c | 61 +--
 tools/perf/util/stat-shadow.c |  7 +
 2 files changed, 60 insertions(+), 8 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 2ffb822..c79e571 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -740,6 +740,8 @@ struct outstate {
bool newline;
const char *prefix;
int  nfields;
+   int  id, nr;
+   struct perf_evsel *evsel;
 };
 
 #define METRIC_LEN  35
@@ -755,12 +757,9 @@ static void do_new_line_std(struct outstate *os)
 {
fputc('\n', os->fh);
fputs(os->prefix, os->fh);
+   aggr_printout(os->evsel, os->id, os->nr);
if (stat_config.aggr_mode == AGGR_NONE)
fprintf(os->fh, "");
-   if (stat_config.aggr_mode == AGGR_CORE)
-   fprintf(os->fh, "  ");
-   if (stat_config.aggr_mode == AGGR_SOCKET)
-   fprintf(os->fh, "");
fprintf(os->fh, " ");
 }
 
@@ -798,6 +797,7 @@ static void new_line_csv(void *ctx)
fputc('\n', os->fh);
if (os->prefix)
fprintf(os->fh, "%s%s", os->prefix, csv_sep);
+   aggr_printout(os->evsel, os->id, os->nr);
for (i = 0; i < os->nfields; i++)
fputs(csv_sep, os->fh);
 }
@@ -855,6 +855,25 @@ static void nsec_printout(int id, int nr, struct 
perf_evsel *evsel, double avg)
fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
 }
 
+static int first_shadow_cpu(struct perf_evsel *evsel, int id)
+{
+   int i;
+
+   if (stat_config.aggr_mode == AGGR_NONE)
+   return id;
+
+   if (stat_config.aggr_mode == AGGR_GLOBAL)
+   return 0;
+
+   for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) {
+   int cpu2 = perf_evsel__cpus(evsel)->map[i];
+
+   if (aggr_get_id(evsel_list->cpus, cpu2) == id)
+   return cpu2;
+   }
+   return 0;
+}
+
 static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 {
FILE *output = stat_config.output;
@@ -891,7 +910,10 @@ static void printout(int id, int nr, struct perf_evsel 
*counter, double uval,
struct perf_stat_output_ctx out;
struct outstate os = {
.fh = stat_config.output,
-   .prefix = prefix ? prefix : ""
+   .prefix = prefix ? prefix : "",
+   .id = id,
+   .nr = nr,
+   .evsel = counter,
};
print_metric_t pm = print_metric_std;
void (*nl)(void *);
@@ -958,16 +980,37 @@ 

[PATCH 3/6] perf, tools, stat: Support metrics in --per-core/socket mode

2016-02-26 Thread Andi Kleen
From: Andi Kleen 

Enable metrics printing in --per-core / --per-socket mode. We need
to save the shadow metrics in a unique place. Always use the first
CPU in the aggregation. Then use the same CPU to retrieve the
shadow value later.

Example output:

% perf stat --per-core -a ./BC1s

 Performance counter stats for 'system wide':

S0-C0   22966.020381  task-clock (msec) #2.004 
CPUs utilized(100.00%)
S0-C0   2 49  context-switches  #0.017 
K/sec(100.00%)
S0-C0   2  4  cpu-migrations#0.001 
K/sec(100.00%)
S0-C0   2467  page-faults   #0.157 
K/sec
S0-C0   2  4,599,061,773  cycles#1.551 
GHz  (100.00%)
S0-C0   2  9,755,886,883  instructions  #2.12  
insn per cycle   (100.00%)
S0-C0   2  1,906,272,125  branches  #  642.704 
M/sec(100.00%)
S0-C0   2 81,180,867  branch-misses #4.26% 
of all branches
S0-C1   22965.995373  task-clock (msec) #2.003 
CPUs utilized(100.00%)
S0-C1   2 62  context-switches  #0.021 
K/sec(100.00%)
S0-C1   2  8  cpu-migrations#0.003 
K/sec(100.00%)
S0-C1   2281  page-faults   #0.095 
K/sec
S0-C1   2  6,347,290  cycles#0.002 
GHz  (100.00%)
S0-C1   2  4,654,156  instructions  #0.73  
insn per cycle   (100.00%)
S0-C1   2947,121  branches  #0.319 
M/sec(100.00%)
S0-C1   2 37,322  branch-misses #3.94% 
of all branches

   1.480409747 seconds time elapsed

v2: Rebase to older patches
v3: Document shadow cpus. Fix aggr_get_id argument. Fix -A shadows (Jiri)
Signed-off-by: Andi Kleen 
---
 tools/perf/builtin-stat.c | 61 +--
 tools/perf/util/stat-shadow.c |  7 +
 2 files changed, 60 insertions(+), 8 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 2ffb822..c79e571 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -740,6 +740,8 @@ struct outstate {
bool newline;
const char *prefix;
int  nfields;
+   int  id, nr;
+   struct perf_evsel *evsel;
 };
 
 #define METRIC_LEN  35
@@ -755,12 +757,9 @@ static void do_new_line_std(struct outstate *os)
 {
fputc('\n', os->fh);
fputs(os->prefix, os->fh);
+   aggr_printout(os->evsel, os->id, os->nr);
if (stat_config.aggr_mode == AGGR_NONE)
fprintf(os->fh, "");
-   if (stat_config.aggr_mode == AGGR_CORE)
-   fprintf(os->fh, "  ");
-   if (stat_config.aggr_mode == AGGR_SOCKET)
-   fprintf(os->fh, "");
fprintf(os->fh, " ");
 }
 
@@ -798,6 +797,7 @@ static void new_line_csv(void *ctx)
fputc('\n', os->fh);
if (os->prefix)
fprintf(os->fh, "%s%s", os->prefix, csv_sep);
+   aggr_printout(os->evsel, os->id, os->nr);
for (i = 0; i < os->nfields; i++)
fputs(csv_sep, os->fh);
 }
@@ -855,6 +855,25 @@ static void nsec_printout(int id, int nr, struct 
perf_evsel *evsel, double avg)
fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
 }
 
+static int first_shadow_cpu(struct perf_evsel *evsel, int id)
+{
+   int i;
+
+   if (stat_config.aggr_mode == AGGR_NONE)
+   return id;
+
+   if (stat_config.aggr_mode == AGGR_GLOBAL)
+   return 0;
+
+   for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) {
+   int cpu2 = perf_evsel__cpus(evsel)->map[i];
+
+   if (aggr_get_id(evsel_list->cpus, cpu2) == id)
+   return cpu2;
+   }
+   return 0;
+}
+
 static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 {
FILE *output = stat_config.output;
@@ -891,7 +910,10 @@ static void printout(int id, int nr, struct perf_evsel 
*counter, double uval,
struct perf_stat_output_ctx out;
struct outstate os = {
.fh = stat_config.output,
-   .prefix = prefix ? prefix : ""
+   .prefix = prefix ? prefix : "",
+   .id = id,
+   .nr = nr,
+   .evsel = counter,
};
print_metric_t pm = print_metric_std;
void (*nl)(void *);
@@ -958,16 +980,37 @@ static void printout(int id, int nr, struct 

Re: [PATCH 3/6] perf, tools, stat: Support metrics in --per-core/socket mode

2016-02-26 Thread Andi Kleen
> > +
> >  static void print_aggr(char *prefix)
> >  {
> > FILE *output = stat_config.output;
> > @@ -982,6 +1024,8 @@ static void print_aggr(char *prefix)
> > if (!(aggr_map || aggr_get_id))
> > return;
> >  
> > +   aggr_update_shadow();
> 
> this should be called from perf_stat_process_counter,
> not from display function

I tried it, but the function needs a lot of stuff (aggr_map,
evsel_list) that only exists in builtin-stat. Passing all
that around is quite complicated and intrusive.

I left it alone for now.

-Andi


Re: [PATCH 3/6] perf, tools, stat: Support metrics in --per-core/socket mode

2016-02-26 Thread Andi Kleen
> > +
> >  static void print_aggr(char *prefix)
> >  {
> > FILE *output = stat_config.output;
> > @@ -982,6 +1024,8 @@ static void print_aggr(char *prefix)
> > if (!(aggr_map || aggr_get_id))
> > return;
> >  
> > +   aggr_update_shadow();
> 
> this should be called from perf_stat_process_counter,
> not from display function

I tried it, but the function needs a lot of stuff (aggr_map,
evsel_list) that only exists in builtin-stat. Passing all
that around is quite complicated and intrusive.

I left it alone for now.

-Andi


Re: [PATCH 3/6] perf, tools, stat: Support metrics in --per-core/socket mode

2016-02-22 Thread Jiri Olsa
On Mon, Feb 22, 2016 at 05:52:02PM +0100, Andi Kleen wrote:
> On Sun, Feb 21, 2016 at 06:15:35PM +0100, Jiri Olsa wrote:
> > On Wed, Feb 17, 2016 at 02:44:02PM -0800, Andi Kleen wrote:
> > 
> > SNIP
> > 
> > > @@ -892,7 +908,10 @@ static void printout(int id, int nr, struct 
> > > perf_evsel *counter, double uval,
> > >   struct perf_stat_output_ctx out;
> > >   struct outstate os = {
> > >   .fh = stat_config.output,
> > > - .prefix = prefix ? prefix : ""
> > > + .prefix = prefix ? prefix : "",
> > > + .id = id,
> > > + .nr = nr,
> > > + .evsel = counter,
> > >   };
> > >   print_metric_t pm = print_metric_std;
> > >   void (*nl)(void *);
> > > @@ -962,15 +981,38 @@ static void printout(int id, int nr, struct 
> > > perf_evsel *counter, double uval,
> > >  
> > >   perf_stat__print_shadow_stats(counter, uval,
> > >   stat_config.aggr_mode == AGGR_GLOBAL ? 0 :
> > > - cpu_map__id_to_cpu(id),
> > > + first_shadow_cpu(counter, id),
> > 
> > hum, IIUC you need to handle AGGR_NONE in here as well?
> 
> AFAIK it works. aggr_get_id in first_shadow_cpu and cpu_map__id_to_cpu
> handle this case, right?

it does not look like.. however it'll be more clear once
there's the doc/comment about used cpus for aggr modes
I asked for in here:

  http://marc.info/?l=linux-kernel=145607533503803=2

thanks,
jirka


Re: [PATCH 3/6] perf, tools, stat: Support metrics in --per-core/socket mode

2016-02-22 Thread Jiri Olsa
On Mon, Feb 22, 2016 at 05:52:02PM +0100, Andi Kleen wrote:
> On Sun, Feb 21, 2016 at 06:15:35PM +0100, Jiri Olsa wrote:
> > On Wed, Feb 17, 2016 at 02:44:02PM -0800, Andi Kleen wrote:
> > 
> > SNIP
> > 
> > > @@ -892,7 +908,10 @@ static void printout(int id, int nr, struct 
> > > perf_evsel *counter, double uval,
> > >   struct perf_stat_output_ctx out;
> > >   struct outstate os = {
> > >   .fh = stat_config.output,
> > > - .prefix = prefix ? prefix : ""
> > > + .prefix = prefix ? prefix : "",
> > > + .id = id,
> > > + .nr = nr,
> > > + .evsel = counter,
> > >   };
> > >   print_metric_t pm = print_metric_std;
> > >   void (*nl)(void *);
> > > @@ -962,15 +981,38 @@ static void printout(int id, int nr, struct 
> > > perf_evsel *counter, double uval,
> > >  
> > >   perf_stat__print_shadow_stats(counter, uval,
> > >   stat_config.aggr_mode == AGGR_GLOBAL ? 0 :
> > > - cpu_map__id_to_cpu(id),
> > > + first_shadow_cpu(counter, id),
> > 
> > hum, IIUC you need to handle AGGR_NONE in here as well?
> 
> AFAIK it works. aggr_get_id in first_shadow_cpu and cpu_map__id_to_cpu
> handle this case, right?

it does not look like.. however it'll be more clear once
there's the doc/comment about used cpus for aggr modes
I asked for in here:

  http://marc.info/?l=linux-kernel=145607533503803=2

thanks,
jirka


Re: [PATCH 3/6] perf, tools, stat: Support metrics in --per-core/socket mode

2016-02-22 Thread Andi Kleen
On Sun, Feb 21, 2016 at 06:15:35PM +0100, Jiri Olsa wrote:
> On Wed, Feb 17, 2016 at 02:44:02PM -0800, Andi Kleen wrote:
> 
> SNIP
> 
> > @@ -892,7 +908,10 @@ static void printout(int id, int nr, struct perf_evsel 
> > *counter, double uval,
> > struct perf_stat_output_ctx out;
> > struct outstate os = {
> > .fh = stat_config.output,
> > -   .prefix = prefix ? prefix : ""
> > +   .prefix = prefix ? prefix : "",
> > +   .id = id,
> > +   .nr = nr,
> > +   .evsel = counter,
> > };
> > print_metric_t pm = print_metric_std;
> > void (*nl)(void *);
> > @@ -962,15 +981,38 @@ static void printout(int id, int nr, struct 
> > perf_evsel *counter, double uval,
> >  
> > perf_stat__print_shadow_stats(counter, uval,
> > stat_config.aggr_mode == AGGR_GLOBAL ? 0 :
> > -   cpu_map__id_to_cpu(id),
> > +   first_shadow_cpu(counter, id),
> 
> hum, IIUC you need to handle AGGR_NONE in here as well?

AFAIK it works. aggr_get_id in first_shadow_cpu and cpu_map__id_to_cpu
handle this case, right?

-Andi


Re: [PATCH 3/6] perf, tools, stat: Support metrics in --per-core/socket mode

2016-02-22 Thread Andi Kleen
On Sun, Feb 21, 2016 at 06:15:35PM +0100, Jiri Olsa wrote:
> On Wed, Feb 17, 2016 at 02:44:02PM -0800, Andi Kleen wrote:
> 
> SNIP
> 
> > @@ -892,7 +908,10 @@ static void printout(int id, int nr, struct perf_evsel 
> > *counter, double uval,
> > struct perf_stat_output_ctx out;
> > struct outstate os = {
> > .fh = stat_config.output,
> > -   .prefix = prefix ? prefix : ""
> > +   .prefix = prefix ? prefix : "",
> > +   .id = id,
> > +   .nr = nr,
> > +   .evsel = counter,
> > };
> > print_metric_t pm = print_metric_std;
> > void (*nl)(void *);
> > @@ -962,15 +981,38 @@ static void printout(int id, int nr, struct 
> > perf_evsel *counter, double uval,
> >  
> > perf_stat__print_shadow_stats(counter, uval,
> > stat_config.aggr_mode == AGGR_GLOBAL ? 0 :
> > -   cpu_map__id_to_cpu(id),
> > +   first_shadow_cpu(counter, id),
> 
> hum, IIUC you need to handle AGGR_NONE in here as well?

AFAIK it works. aggr_get_id in first_shadow_cpu and cpu_map__id_to_cpu
handle this case, right?

-Andi


Re: [PATCH 3/6] perf, tools, stat: Support metrics in --per-core/socket mode

2016-02-21 Thread Jiri Olsa
On Wed, Feb 17, 2016 at 02:44:02PM -0800, Andi Kleen wrote:

SNIP

> @@ -892,7 +908,10 @@ static void printout(int id, int nr, struct perf_evsel 
> *counter, double uval,
>   struct perf_stat_output_ctx out;
>   struct outstate os = {
>   .fh = stat_config.output,
> - .prefix = prefix ? prefix : ""
> + .prefix = prefix ? prefix : "",
> + .id = id,
> + .nr = nr,
> + .evsel = counter,
>   };
>   print_metric_t pm = print_metric_std;
>   void (*nl)(void *);
> @@ -962,15 +981,38 @@ static void printout(int id, int nr, struct perf_evsel 
> *counter, double uval,
>  
>   perf_stat__print_shadow_stats(counter, uval,
>   stat_config.aggr_mode == AGGR_GLOBAL ? 0 :
> - cpu_map__id_to_cpu(id),
> + first_shadow_cpu(counter, id),

hum, IIUC you need to handle AGGR_NONE in here as well?

thanks,
jirka


Re: [PATCH 3/6] perf, tools, stat: Support metrics in --per-core/socket mode

2016-02-21 Thread Jiri Olsa
On Wed, Feb 17, 2016 at 02:44:02PM -0800, Andi Kleen wrote:

SNIP

> @@ -892,7 +908,10 @@ static void printout(int id, int nr, struct perf_evsel 
> *counter, double uval,
>   struct perf_stat_output_ctx out;
>   struct outstate os = {
>   .fh = stat_config.output,
> - .prefix = prefix ? prefix : ""
> + .prefix = prefix ? prefix : "",
> + .id = id,
> + .nr = nr,
> + .evsel = counter,
>   };
>   print_metric_t pm = print_metric_std;
>   void (*nl)(void *);
> @@ -962,15 +981,38 @@ static void printout(int id, int nr, struct perf_evsel 
> *counter, double uval,
>  
>   perf_stat__print_shadow_stats(counter, uval,
>   stat_config.aggr_mode == AGGR_GLOBAL ? 0 :
> - cpu_map__id_to_cpu(id),
> + first_shadow_cpu(counter, id),

hum, IIUC you need to handle AGGR_NONE in here as well?

thanks,
jirka


Re: [PATCH 3/6] perf, tools, stat: Support metrics in --per-core/socket mode

2016-02-21 Thread Jiri Olsa
On Wed, Feb 17, 2016 at 02:44:02PM -0800, Andi Kleen wrote:

SNIP

> +static void aggr_update_shadow(void)
> +{
> + int cpu, cpu2, s2, id, s;
> + u64 val;
> + struct perf_evsel *counter;
> +
> + for (s = 0; s < aggr_map->nr; s++) {
> + id = aggr_map->map[s];
> + evlist__for_each(evsel_list, counter) {
> + val = 0;
> + for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); 
> cpu++) {
> + cpu2 = perf_evsel__cpus(counter)->map[cpu];
> + s2 = aggr_get_id(evsel_list->cpus, cpu2);
> + if (s2 != id)
> + continue;
> + val += perf_counts(counter->counts, cpu, 
> 0)->val;
> + }
> + val = val * counter->scale;
> + perf_stat__update_shadow_stats(counter, ,
> +
> first_shadow_cpu(counter, id));
> + }
> + }
> +}


> +
>  static void print_aggr(char *prefix)
>  {
>   FILE *output = stat_config.output;
> @@ -982,6 +1024,8 @@ static void print_aggr(char *prefix)
>   if (!(aggr_map || aggr_get_id))
>   return;
>  
> + aggr_update_shadow();

this should be called from perf_stat_process_counter,
not from display function

also please document somewhere (best around shadow stats variables)
what cpus (array members) are used for given AGGR_*

thanks,
jirka


Re: [PATCH 3/6] perf, tools, stat: Support metrics in --per-core/socket mode

2016-02-21 Thread Jiri Olsa
On Wed, Feb 17, 2016 at 02:44:02PM -0800, Andi Kleen wrote:

SNIP

> +static void aggr_update_shadow(void)
> +{
> + int cpu, cpu2, s2, id, s;
> + u64 val;
> + struct perf_evsel *counter;
> +
> + for (s = 0; s < aggr_map->nr; s++) {
> + id = aggr_map->map[s];
> + evlist__for_each(evsel_list, counter) {
> + val = 0;
> + for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); 
> cpu++) {
> + cpu2 = perf_evsel__cpus(counter)->map[cpu];
> + s2 = aggr_get_id(evsel_list->cpus, cpu2);
> + if (s2 != id)
> + continue;
> + val += perf_counts(counter->counts, cpu, 
> 0)->val;
> + }
> + val = val * counter->scale;
> + perf_stat__update_shadow_stats(counter, ,
> +
> first_shadow_cpu(counter, id));
> + }
> + }
> +}


> +
>  static void print_aggr(char *prefix)
>  {
>   FILE *output = stat_config.output;
> @@ -982,6 +1024,8 @@ static void print_aggr(char *prefix)
>   if (!(aggr_map || aggr_get_id))
>   return;
>  
> + aggr_update_shadow();

this should be called from perf_stat_process_counter,
not from display function

also please document somewhere (best around shadow stats variables)
what cpus (array members) are used for given AGGR_*

thanks,
jirka


Re: [PATCH 3/6] perf, tools, stat: Support metrics in --per-core/socket mode

2016-02-21 Thread Jiri Olsa
On Wed, Feb 17, 2016 at 02:44:02PM -0800, Andi Kleen wrote:

SNIP

>  
>   perf_stat__print_shadow_stats(counter, uval,
>   stat_config.aggr_mode == AGGR_GLOBAL ? 0 :
> - cpu_map__id_to_cpu(id),
> + first_shadow_cpu(counter, id),
>   );
> -
>   if (!csv_output) {
>   print_noise(counter, noise);
>   print_running(run, ena);
>   }
>  }
>  
> +static void aggr_update_shadow(void)
> +{
> + int cpu, cpu2, s2, id, s;
> + u64 val;
> + struct perf_evsel *counter;
> +
> + for (s = 0; s < aggr_map->nr; s++) {
> + id = aggr_map->map[s];
> + evlist__for_each(evsel_list, counter) {
> + val = 0;
> + for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); 
> cpu++) {
> + cpu2 = perf_evsel__cpus(counter)->map[cpu];
> + s2 = aggr_get_id(evsel_list->cpus, cpu2);

I think you need to pass cpu's 'idx' into aggr_get_id,
because it will do evsel_list->cpus[cpu2] for you

jirka


Re: [PATCH 3/6] perf, tools, stat: Support metrics in --per-core/socket mode

2016-02-21 Thread Jiri Olsa
On Wed, Feb 17, 2016 at 02:44:02PM -0800, Andi Kleen wrote:

SNIP

>  
>   perf_stat__print_shadow_stats(counter, uval,
>   stat_config.aggr_mode == AGGR_GLOBAL ? 0 :
> - cpu_map__id_to_cpu(id),
> + first_shadow_cpu(counter, id),
>   );
> -
>   if (!csv_output) {
>   print_noise(counter, noise);
>   print_running(run, ena);
>   }
>  }
>  
> +static void aggr_update_shadow(void)
> +{
> + int cpu, cpu2, s2, id, s;
> + u64 val;
> + struct perf_evsel *counter;
> +
> + for (s = 0; s < aggr_map->nr; s++) {
> + id = aggr_map->map[s];
> + evlist__for_each(evsel_list, counter) {
> + val = 0;
> + for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); 
> cpu++) {
> + cpu2 = perf_evsel__cpus(counter)->map[cpu];
> + s2 = aggr_get_id(evsel_list->cpus, cpu2);

I think you need to pass cpu's 'idx' into aggr_get_id,
because it will do evsel_list->cpus[cpu2] for you

jirka


[PATCH 3/6] perf, tools, stat: Support metrics in --per-core/socket mode

2016-02-17 Thread Andi Kleen
From: Andi Kleen 

Enable metrics printing in --per-core / --per-socket mode. We need
to save the shadow metrics in a unique place. Always use the first
CPU in the aggregation. Then use the same CPU to retrieve the
shadow value later.

Example output:

% perf stat --per-core -a ./BC1s

 Performance counter stats for 'system wide':

S0-C0   22966.020381  task-clock (msec) #2.004 
CPUs utilized(100.00%)
S0-C0   2 49  context-switches  #0.017 
K/sec(100.00%)
S0-C0   2  4  cpu-migrations#0.001 
K/sec(100.00%)
S0-C0   2467  page-faults   #0.157 
K/sec
S0-C0   2  4,599,061,773  cycles#1.551 
GHz  (100.00%)
S0-C0   2  9,755,886,883  instructions  #2.12  
insn per cycle   (100.00%)
S0-C0   2  1,906,272,125  branches  #  642.704 
M/sec(100.00%)
S0-C0   2 81,180,867  branch-misses #4.26% 
of all branches
S0-C1   22965.995373  task-clock (msec) #2.003 
CPUs utilized(100.00%)
S0-C1   2 62  context-switches  #0.021 
K/sec(100.00%)
S0-C1   2  8  cpu-migrations#0.003 
K/sec(100.00%)
S0-C1   2281  page-faults   #0.095 
K/sec
S0-C1   2  6,347,290  cycles#0.002 
GHz  (100.00%)
S0-C1   2  4,654,156  instructions  #0.73  
insn per cycle   (100.00%)
S0-C1   2947,121  branches  #0.319 
M/sec(100.00%)
S0-C1   2 37,322  branch-misses #3.94% 
of all branches

   1.480409747 seconds time elapsed

v2: Rebase to older patches
Signed-off-by: Andi Kleen 
---
 tools/perf/builtin-stat.c | 58 +--
 1 file changed, 51 insertions(+), 7 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 6c2c1d2..715e5b5 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -741,6 +741,8 @@ struct outstate {
const char *prefix;
int  nfields;
u64  run, ena;
+   int  id, nr;
+   struct perf_evsel *evsel;
 };
 
 #define METRIC_LEN  35
@@ -756,12 +758,9 @@ static void do_new_line_std(struct outstate *os)
 {
fputc('\n', os->fh);
fputs(os->prefix, os->fh);
+   aggr_printout(os->evsel, os->id, os->nr);
if (stat_config.aggr_mode == AGGR_NONE)
fprintf(os->fh, "");
-   if (stat_config.aggr_mode == AGGR_CORE)
-   fprintf(os->fh, "  ");
-   if (stat_config.aggr_mode == AGGR_SOCKET)
-   fprintf(os->fh, "");
fprintf(os->fh, " ");
 }
 
@@ -799,6 +798,7 @@ static void new_line_csv(void *ctx)
fputc('\n', os->fh);
if (os->prefix)
fprintf(os->fh, "%s%s", os->prefix, csv_sep);
+   aggr_printout(os->evsel, os->id, os->nr);
for (i = 0; i < os->nfields; i++)
fputs(csv_sep, os->fh);
 }
@@ -856,6 +856,22 @@ static void nsec_printout(int id, int nr, struct 
perf_evsel *evsel, double avg)
fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
 }
 
+static int first_shadow_cpu(struct perf_evsel *evsel, int id)
+{
+   int i;
+
+   if (aggr_get_id == NULL)
+   return 0;
+
+   for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) {
+   int cpu2 = perf_evsel__cpus(evsel)->map[i];
+
+   if (aggr_get_id(evsel_list->cpus, cpu2) == id)
+   return cpu2;
+   }
+   return 0;
+}
+
 static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 {
FILE *output = stat_config.output;
@@ -892,7 +908,10 @@ static void printout(int id, int nr, struct perf_evsel 
*counter, double uval,
struct perf_stat_output_ctx out;
struct outstate os = {
.fh = stat_config.output,
-   .prefix = prefix ? prefix : ""
+   .prefix = prefix ? prefix : "",
+   .id = id,
+   .nr = nr,
+   .evsel = counter,
};
print_metric_t pm = print_metric_std;
void (*nl)(void *);
@@ -962,15 +981,38 @@ static void printout(int id, int nr, struct perf_evsel 
*counter, double uval,
 
perf_stat__print_shadow_stats(counter, uval,
stat_config.aggr_mode == AGGR_GLOBAL ? 0 :
- 

[PATCH 3/6] perf, tools, stat: Support metrics in --per-core/socket mode

2016-02-17 Thread Andi Kleen
From: Andi Kleen 

Enable metrics printing in --per-core / --per-socket mode. We need
to save the shadow metrics in a unique place. Always use the first
CPU in the aggregation. Then use the same CPU to retrieve the
shadow value later.

Example output:

% perf stat --per-core -a ./BC1s

 Performance counter stats for 'system wide':

S0-C0   22966.020381  task-clock (msec) #2.004 
CPUs utilized(100.00%)
S0-C0   2 49  context-switches  #0.017 
K/sec(100.00%)
S0-C0   2  4  cpu-migrations#0.001 
K/sec(100.00%)
S0-C0   2467  page-faults   #0.157 
K/sec
S0-C0   2  4,599,061,773  cycles#1.551 
GHz  (100.00%)
S0-C0   2  9,755,886,883  instructions  #2.12  
insn per cycle   (100.00%)
S0-C0   2  1,906,272,125  branches  #  642.704 
M/sec(100.00%)
S0-C0   2 81,180,867  branch-misses #4.26% 
of all branches
S0-C1   22965.995373  task-clock (msec) #2.003 
CPUs utilized(100.00%)
S0-C1   2 62  context-switches  #0.021 
K/sec(100.00%)
S0-C1   2  8  cpu-migrations#0.003 
K/sec(100.00%)
S0-C1   2281  page-faults   #0.095 
K/sec
S0-C1   2  6,347,290  cycles#0.002 
GHz  (100.00%)
S0-C1   2  4,654,156  instructions  #0.73  
insn per cycle   (100.00%)
S0-C1   2947,121  branches  #0.319 
M/sec(100.00%)
S0-C1   2 37,322  branch-misses #3.94% 
of all branches

   1.480409747 seconds time elapsed

v2: Rebase to older patches
Signed-off-by: Andi Kleen 
---
 tools/perf/builtin-stat.c | 58 +--
 1 file changed, 51 insertions(+), 7 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 6c2c1d2..715e5b5 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -741,6 +741,8 @@ struct outstate {
const char *prefix;
int  nfields;
u64  run, ena;
+   int  id, nr;
+   struct perf_evsel *evsel;
 };
 
 #define METRIC_LEN  35
@@ -756,12 +758,9 @@ static void do_new_line_std(struct outstate *os)
 {
fputc('\n', os->fh);
fputs(os->prefix, os->fh);
+   aggr_printout(os->evsel, os->id, os->nr);
if (stat_config.aggr_mode == AGGR_NONE)
fprintf(os->fh, "");
-   if (stat_config.aggr_mode == AGGR_CORE)
-   fprintf(os->fh, "  ");
-   if (stat_config.aggr_mode == AGGR_SOCKET)
-   fprintf(os->fh, "");
fprintf(os->fh, " ");
 }
 
@@ -799,6 +798,7 @@ static void new_line_csv(void *ctx)
fputc('\n', os->fh);
if (os->prefix)
fprintf(os->fh, "%s%s", os->prefix, csv_sep);
+   aggr_printout(os->evsel, os->id, os->nr);
for (i = 0; i < os->nfields; i++)
fputs(csv_sep, os->fh);
 }
@@ -856,6 +856,22 @@ static void nsec_printout(int id, int nr, struct 
perf_evsel *evsel, double avg)
fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
 }
 
+static int first_shadow_cpu(struct perf_evsel *evsel, int id)
+{
+   int i;
+
+   if (aggr_get_id == NULL)
+   return 0;
+
+   for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) {
+   int cpu2 = perf_evsel__cpus(evsel)->map[i];
+
+   if (aggr_get_id(evsel_list->cpus, cpu2) == id)
+   return cpu2;
+   }
+   return 0;
+}
+
 static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 {
FILE *output = stat_config.output;
@@ -892,7 +908,10 @@ static void printout(int id, int nr, struct perf_evsel 
*counter, double uval,
struct perf_stat_output_ctx out;
struct outstate os = {
.fh = stat_config.output,
-   .prefix = prefix ? prefix : ""
+   .prefix = prefix ? prefix : "",
+   .id = id,
+   .nr = nr,
+   .evsel = counter,
};
print_metric_t pm = print_metric_std;
void (*nl)(void *);
@@ -962,15 +981,38 @@ static void printout(int id, int nr, struct perf_evsel 
*counter, double uval,
 
perf_stat__print_shadow_stats(counter, uval,
stat_config.aggr_mode == AGGR_GLOBAL ? 0 :
-