[PATCH 7/7] perf, tools: Support metrics in --per-core/socket mode

2015-08-07 Thread Andi Kleen
From: Andi Kleen 

Enable metrics printing in --per-core / --per-socket mode. We need
to save the shadow metrics in a unique place. Always use the first
CPU in the aggregation. Then use the same CPU to retrieve the
shadow value later.

Example output:

% perf stat --per-core -a ./BC1s

 Performance counter stats for 'system wide':

S0-C0   22966.020381  task-clock (msec) #2.004 
CPUs utilized(100.00%)
S0-C0   2 49  context-switches  #0.017 
K/sec(100.00%)
S0-C0   2  4  cpu-migrations#0.001 
K/sec(100.00%)
S0-C0   2467  page-faults   #0.157 
K/sec
S0-C0   2  4,599,061,773  cycles#1.551 
GHz  (100.00%)
S0-C0   2  9,755,886,883  instructions  #2.12  
insn per cycle   (100.00%)
S0-C0   2  1,906,272,125  branches  #  642.704 
M/sec(100.00%)
S0-C0   2 81,180,867  branch-misses #4.26% 
of all branches
S0-C1   22965.995373  task-clock (msec) #2.003 
CPUs utilized(100.00%)
S0-C1   2 62  context-switches  #0.021 
K/sec(100.00%)
S0-C1   2  8  cpu-migrations#0.003 
K/sec(100.00%)
S0-C1   2281  page-faults   #0.095 
K/sec
S0-C1   2  6,347,290  cycles#0.002 
GHz  (100.00%)
S0-C1   2  4,654,156  instructions  #0.73  
insn per cycle   (100.00%)
S0-C1   2947,121  branches  #0.319 
M/sec(100.00%)
S0-C1   2 37,322  branch-misses #3.94% 
of all branches

   1.480409747 seconds time elapsed

Signed-off-by: Andi Kleen 
---
 tools/perf/builtin-stat.c | 62 ---
 1 file changed, 58 insertions(+), 4 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index d777bb6..ea5298a 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -488,6 +488,8 @@ struct outstate {
const char *prefix;
int  nfields;
u64  run, ena;
+   int  id, nr;
+   struct perf_evsel *evsel;
 };
 
 #define BASE_INDENT 41
@@ -498,13 +500,19 @@ struct outstate {
 static void new_line_no_aggr_std(void *ctx)
 {
struct outstate *os = ctx;
-   fprintf(os->fh, "\n%s%-*s", os->prefix, BASE_INDENT + NA_INDENT, "");
+
+   fprintf(os->fh, "\n%s", os->prefix);
+   aggr_printout(os->evsel, os->id, os->nr);
+   fprintf(os->fh, "%-*s", BASE_INDENT + NA_INDENT, "");
 }
 
 static void new_line_std(void *ctx)
 {
struct outstate *os = ctx;
-   fprintf(os->fh, "\n%s%-*s", os->prefix, BASE_INDENT + AGGR_INDENT, "");
+
+   fprintf(os->fh, "\n%s", os->prefix);
+   aggr_printout(os->evsel, os->id, os->nr);
+   fprintf(os->fh, "%-*s", BASE_INDENT + AGGR_INDENT, "");
 }
 
 static void print_metric_std(void *ctx, const char *color, const char *fmt,
@@ -535,6 +543,7 @@ static void new_line_csv(void *ctx)
fputc('\n', os->fh);
if (os->prefix)
fprintf(os->fh, "%s%s", os->prefix, csv_sep);
+   aggr_printout(os->evsel, os->id, os->nr);
for (i = 0; i < os->nfields; i++)
fputs(csv_sep, os->fh);
 }
@@ -598,6 +607,22 @@ static void nsec_printout(int id, int nr, struct 
perf_evsel *evsel, double avg)
fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
 }
 
+static int first_shadow_cpu(struct perf_evsel *evsel, int id)
+{
+   int i;
+
+   if (aggr_get_id == NULL)
+   return 0;
+
+   for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) {
+   int cpu2 = perf_evsel__cpus(evsel)->map[i];
+
+   if (aggr_get_id(evsel_list->cpus, cpu2) == id)
+   return cpu2;
+   }
+   return 0;
+}
+
 static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 {
FILE *output = stat_config.output;
@@ -633,7 +658,10 @@ static void printout(int id, int nr, struct perf_evsel 
*counter, double uval,
 {
struct outstate os = {
.fh = stat_config.output,
-   .prefix = prefix ? prefix : ""
+   .prefix = prefix ? prefix : "",
+   .id = id,
+   .nr = nr,
+   .evsel = counter,
};
print_metric_t pm = print_metric_std;
void (*nl)(void *);
@@ -693,7 +721,7 @@ static void printout(int id, int nr, struct perf_evsel 
*counter, double uval,
 
perf_stat__print_shadow_stats(counter, 

[PATCH 7/7] perf, tools: Support metrics in --per-core/socket mode

2015-08-07 Thread Andi Kleen
From: Andi Kleen a...@linux.intel.com

Enable metrics printing in --per-core / --per-socket mode. We need
to save the shadow metrics in a unique place. Always use the first
CPU in the aggregation. Then use the same CPU to retrieve the
shadow value later.

Example output:

% perf stat --per-core -a ./BC1s

 Performance counter stats for 'system wide':

S0-C0   22966.020381  task-clock (msec) #2.004 
CPUs utilized(100.00%)
S0-C0   2 49  context-switches  #0.017 
K/sec(100.00%)
S0-C0   2  4  cpu-migrations#0.001 
K/sec(100.00%)
S0-C0   2467  page-faults   #0.157 
K/sec
S0-C0   2  4,599,061,773  cycles#1.551 
GHz  (100.00%)
S0-C0   2  9,755,886,883  instructions  #2.12  
insn per cycle   (100.00%)
S0-C0   2  1,906,272,125  branches  #  642.704 
M/sec(100.00%)
S0-C0   2 81,180,867  branch-misses #4.26% 
of all branches
S0-C1   22965.995373  task-clock (msec) #2.003 
CPUs utilized(100.00%)
S0-C1   2 62  context-switches  #0.021 
K/sec(100.00%)
S0-C1   2  8  cpu-migrations#0.003 
K/sec(100.00%)
S0-C1   2281  page-faults   #0.095 
K/sec
S0-C1   2  6,347,290  cycles#0.002 
GHz  (100.00%)
S0-C1   2  4,654,156  instructions  #0.73  
insn per cycle   (100.00%)
S0-C1   2947,121  branches  #0.319 
M/sec(100.00%)
S0-C1   2 37,322  branch-misses #3.94% 
of all branches

   1.480409747 seconds time elapsed

Signed-off-by: Andi Kleen a...@linux.intel.com
---
 tools/perf/builtin-stat.c | 62 ---
 1 file changed, 58 insertions(+), 4 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index d777bb6..ea5298a 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -488,6 +488,8 @@ struct outstate {
const char *prefix;
int  nfields;
u64  run, ena;
+   int  id, nr;
+   struct perf_evsel *evsel;
 };
 
 #define BASE_INDENT 41
@@ -498,13 +500,19 @@ struct outstate {
 static void new_line_no_aggr_std(void *ctx)
 {
struct outstate *os = ctx;
-   fprintf(os-fh, \n%s%-*s, os-prefix, BASE_INDENT + NA_INDENT, );
+
+   fprintf(os-fh, \n%s, os-prefix);
+   aggr_printout(os-evsel, os-id, os-nr);
+   fprintf(os-fh, %-*s, BASE_INDENT + NA_INDENT, );
 }
 
 static void new_line_std(void *ctx)
 {
struct outstate *os = ctx;
-   fprintf(os-fh, \n%s%-*s, os-prefix, BASE_INDENT + AGGR_INDENT, );
+
+   fprintf(os-fh, \n%s, os-prefix);
+   aggr_printout(os-evsel, os-id, os-nr);
+   fprintf(os-fh, %-*s, BASE_INDENT + AGGR_INDENT, );
 }
 
 static void print_metric_std(void *ctx, const char *color, const char *fmt,
@@ -535,6 +543,7 @@ static void new_line_csv(void *ctx)
fputc('\n', os-fh);
if (os-prefix)
fprintf(os-fh, %s%s, os-prefix, csv_sep);
+   aggr_printout(os-evsel, os-id, os-nr);
for (i = 0; i  os-nfields; i++)
fputs(csv_sep, os-fh);
 }
@@ -598,6 +607,22 @@ static void nsec_printout(int id, int nr, struct 
perf_evsel *evsel, double avg)
fprintf(output, %s%s, csv_sep, evsel-cgrp-name);
 }
 
+static int first_shadow_cpu(struct perf_evsel *evsel, int id)
+{
+   int i;
+
+   if (aggr_get_id == NULL)
+   return 0;
+
+   for (i = 0; i  perf_evsel__nr_cpus(evsel); i++) {
+   int cpu2 = perf_evsel__cpus(evsel)-map[i];
+
+   if (aggr_get_id(evsel_list-cpus, cpu2) == id)
+   return cpu2;
+   }
+   return 0;
+}
+
 static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 {
FILE *output = stat_config.output;
@@ -633,7 +658,10 @@ static void printout(int id, int nr, struct perf_evsel 
*counter, double uval,
 {
struct outstate os = {
.fh = stat_config.output,
-   .prefix = prefix ? prefix : 
+   .prefix = prefix ? prefix : ,
+   .id = id,
+   .nr = nr,
+   .evsel = counter,
};
print_metric_t pm = print_metric_std;
void (*nl)(void *);
@@ -693,7 +721,7 @@ static void printout(int id, int nr, struct perf_evsel 
*counter, double uval,
 
perf_stat__print_shadow_stats(counter, uval,