[PATCH 3/6] perf, tools, stat: Support metrics in --per-core/socket mode
From: Andi KleenEnable metrics printing in --per-core / --per-socket mode. We need to save the shadow metrics in a unique place. Always use the first CPU in the aggregation. Then use the same CPU to retrieve the shadow value later. Example output: % perf stat --per-core -a ./BC1s Performance counter stats for 'system wide': S0-C0 22966.020381 task-clock (msec) #2.004 CPUs utilized(100.00%) S0-C0 2 49 context-switches #0.017 K/sec(100.00%) S0-C0 2 4 cpu-migrations#0.001 K/sec(100.00%) S0-C0 2467 page-faults #0.157 K/sec S0-C0 2 4,599,061,773 cycles#1.551 GHz (100.00%) S0-C0 2 9,755,886,883 instructions #2.12 insn per cycle (100.00%) S0-C0 2 1,906,272,125 branches # 642.704 M/sec(100.00%) S0-C0 2 81,180,867 branch-misses #4.26% of all branches S0-C1 22965.995373 task-clock (msec) #2.003 CPUs utilized(100.00%) S0-C1 2 62 context-switches #0.021 K/sec(100.00%) S0-C1 2 8 cpu-migrations#0.003 K/sec(100.00%) S0-C1 2281 page-faults #0.095 K/sec S0-C1 2 6,347,290 cycles#0.002 GHz (100.00%) S0-C1 2 4,654,156 instructions #0.73 insn per cycle (100.00%) S0-C1 2947,121 branches #0.319 M/sec(100.00%) S0-C1 2 37,322 branch-misses #3.94% of all branches 1.480409747 seconds time elapsed v2: Rebase to older patches v3: Document shadow cpus. Fix aggr_get_id argument. Fix -A shadows (Jiri) Signed-off-by: Andi Kleen --- tools/perf/builtin-stat.c | 61 +-- tools/perf/util/stat-shadow.c | 7 + 2 files changed, 60 insertions(+), 8 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 2ffb822..c79e571 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -740,6 +740,8 @@ struct outstate { bool newline; const char *prefix; int nfields; + int id, nr; + struct perf_evsel *evsel; }; #define METRIC_LEN 35 @@ -755,12 +757,9 @@ static void do_new_line_std(struct outstate *os) { fputc('\n', os->fh); fputs(os->prefix, os->fh); + aggr_printout(os->evsel, os->id, os->nr); if (stat_config.aggr_mode == AGGR_NONE) fprintf(os->fh, ""); - if (stat_config.aggr_mode == AGGR_CORE) - fprintf(os->fh, " "); - if (stat_config.aggr_mode == AGGR_SOCKET) - fprintf(os->fh, ""); fprintf(os->fh, " "); } @@ -798,6 +797,7 @@ static void new_line_csv(void *ctx) fputc('\n', os->fh); if (os->prefix) fprintf(os->fh, "%s%s", os->prefix, csv_sep); + aggr_printout(os->evsel, os->id, os->nr); for (i = 0; i < os->nfields; i++) fputs(csv_sep, os->fh); } @@ -855,6 +855,25 @@ static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); } +static int first_shadow_cpu(struct perf_evsel *evsel, int id) +{ + int i; + + if (stat_config.aggr_mode == AGGR_NONE) + return id; + + if (stat_config.aggr_mode == AGGR_GLOBAL) + return 0; + + for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) { + int cpu2 = perf_evsel__cpus(evsel)->map[i]; + + if (aggr_get_id(evsel_list->cpus, cpu2) == id) + return cpu2; + } + return 0; +} + static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) { FILE *output = stat_config.output; @@ -891,7 +910,10 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval, struct perf_stat_output_ctx out; struct outstate os = { .fh = stat_config.output, - .prefix = prefix ? prefix : "" + .prefix = prefix ? prefix : "", + .id = id, + .nr = nr, + .evsel = counter, }; print_metric_t pm = print_metric_std; void (*nl)(void *); @@ -958,16 +980,37 @@
[PATCH 3/6] perf, tools, stat: Support metrics in --per-core/socket mode
From: Andi Kleen Enable metrics printing in --per-core / --per-socket mode. We need to save the shadow metrics in a unique place. Always use the first CPU in the aggregation. Then use the same CPU to retrieve the shadow value later. Example output: % perf stat --per-core -a ./BC1s Performance counter stats for 'system wide': S0-C0 22966.020381 task-clock (msec) #2.004 CPUs utilized(100.00%) S0-C0 2 49 context-switches #0.017 K/sec(100.00%) S0-C0 2 4 cpu-migrations#0.001 K/sec(100.00%) S0-C0 2467 page-faults #0.157 K/sec S0-C0 2 4,599,061,773 cycles#1.551 GHz (100.00%) S0-C0 2 9,755,886,883 instructions #2.12 insn per cycle (100.00%) S0-C0 2 1,906,272,125 branches # 642.704 M/sec(100.00%) S0-C0 2 81,180,867 branch-misses #4.26% of all branches S0-C1 22965.995373 task-clock (msec) #2.003 CPUs utilized(100.00%) S0-C1 2 62 context-switches #0.021 K/sec(100.00%) S0-C1 2 8 cpu-migrations#0.003 K/sec(100.00%) S0-C1 2281 page-faults #0.095 K/sec S0-C1 2 6,347,290 cycles#0.002 GHz (100.00%) S0-C1 2 4,654,156 instructions #0.73 insn per cycle (100.00%) S0-C1 2947,121 branches #0.319 M/sec(100.00%) S0-C1 2 37,322 branch-misses #3.94% of all branches 1.480409747 seconds time elapsed v2: Rebase to older patches v3: Document shadow cpus. Fix aggr_get_id argument. Fix -A shadows (Jiri) Signed-off-by: Andi Kleen --- tools/perf/builtin-stat.c | 61 +-- tools/perf/util/stat-shadow.c | 7 + 2 files changed, 60 insertions(+), 8 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 2ffb822..c79e571 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -740,6 +740,8 @@ struct outstate { bool newline; const char *prefix; int nfields; + int id, nr; + struct perf_evsel *evsel; }; #define METRIC_LEN 35 @@ -755,12 +757,9 @@ static void do_new_line_std(struct outstate *os) { fputc('\n', os->fh); fputs(os->prefix, os->fh); + aggr_printout(os->evsel, os->id, os->nr); if (stat_config.aggr_mode == AGGR_NONE) fprintf(os->fh, ""); - if (stat_config.aggr_mode == AGGR_CORE) - fprintf(os->fh, " "); - if (stat_config.aggr_mode == AGGR_SOCKET) - fprintf(os->fh, ""); fprintf(os->fh, " "); } @@ -798,6 +797,7 @@ static void new_line_csv(void *ctx) fputc('\n', os->fh); if (os->prefix) fprintf(os->fh, "%s%s", os->prefix, csv_sep); + aggr_printout(os->evsel, os->id, os->nr); for (i = 0; i < os->nfields; i++) fputs(csv_sep, os->fh); } @@ -855,6 +855,25 @@ static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); } +static int first_shadow_cpu(struct perf_evsel *evsel, int id) +{ + int i; + + if (stat_config.aggr_mode == AGGR_NONE) + return id; + + if (stat_config.aggr_mode == AGGR_GLOBAL) + return 0; + + for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) { + int cpu2 = perf_evsel__cpus(evsel)->map[i]; + + if (aggr_get_id(evsel_list->cpus, cpu2) == id) + return cpu2; + } + return 0; +} + static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) { FILE *output = stat_config.output; @@ -891,7 +910,10 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval, struct perf_stat_output_ctx out; struct outstate os = { .fh = stat_config.output, - .prefix = prefix ? prefix : "" + .prefix = prefix ? prefix : "", + .id = id, + .nr = nr, + .evsel = counter, }; print_metric_t pm = print_metric_std; void (*nl)(void *); @@ -958,16 +980,37 @@ static void printout(int id, int nr, struct
Re: [PATCH 3/6] perf, tools, stat: Support metrics in --per-core/socket mode
> > + > > static void print_aggr(char *prefix) > > { > > FILE *output = stat_config.output; > > @@ -982,6 +1024,8 @@ static void print_aggr(char *prefix) > > if (!(aggr_map || aggr_get_id)) > > return; > > > > + aggr_update_shadow(); > > this should be called from perf_stat_process_counter, > not from display function I tried it, but the function needs a lot of stuff (aggr_map, evsel_list) that only exists in builtin-stat. Passing all that around is quite complicated and intrusive. I left it alone for now. -Andi
Re: [PATCH 3/6] perf, tools, stat: Support metrics in --per-core/socket mode
> > + > > static void print_aggr(char *prefix) > > { > > FILE *output = stat_config.output; > > @@ -982,6 +1024,8 @@ static void print_aggr(char *prefix) > > if (!(aggr_map || aggr_get_id)) > > return; > > > > + aggr_update_shadow(); > > this should be called from perf_stat_process_counter, > not from display function I tried it, but the function needs a lot of stuff (aggr_map, evsel_list) that only exists in builtin-stat. Passing all that around is quite complicated and intrusive. I left it alone for now. -Andi
Re: [PATCH 3/6] perf, tools, stat: Support metrics in --per-core/socket mode
On Mon, Feb 22, 2016 at 05:52:02PM +0100, Andi Kleen wrote: > On Sun, Feb 21, 2016 at 06:15:35PM +0100, Jiri Olsa wrote: > > On Wed, Feb 17, 2016 at 02:44:02PM -0800, Andi Kleen wrote: > > > > SNIP > > > > > @@ -892,7 +908,10 @@ static void printout(int id, int nr, struct > > > perf_evsel *counter, double uval, > > > struct perf_stat_output_ctx out; > > > struct outstate os = { > > > .fh = stat_config.output, > > > - .prefix = prefix ? prefix : "" > > > + .prefix = prefix ? prefix : "", > > > + .id = id, > > > + .nr = nr, > > > + .evsel = counter, > > > }; > > > print_metric_t pm = print_metric_std; > > > void (*nl)(void *); > > > @@ -962,15 +981,38 @@ static void printout(int id, int nr, struct > > > perf_evsel *counter, double uval, > > > > > > perf_stat__print_shadow_stats(counter, uval, > > > stat_config.aggr_mode == AGGR_GLOBAL ? 0 : > > > - cpu_map__id_to_cpu(id), > > > + first_shadow_cpu(counter, id), > > > > hum, IIUC you need to handle AGGR_NONE in here as well? > > AFAIK it works. aggr_get_id in first_shadow_cpu and cpu_map__id_to_cpu > handle this case, right? it does not look like.. however it'll be more clear once there's the doc/comment about used cpus for aggr modes I asked for in here: http://marc.info/?l=linux-kernel=145607533503803=2 thanks, jirka
Re: [PATCH 3/6] perf, tools, stat: Support metrics in --per-core/socket mode
On Mon, Feb 22, 2016 at 05:52:02PM +0100, Andi Kleen wrote: > On Sun, Feb 21, 2016 at 06:15:35PM +0100, Jiri Olsa wrote: > > On Wed, Feb 17, 2016 at 02:44:02PM -0800, Andi Kleen wrote: > > > > SNIP > > > > > @@ -892,7 +908,10 @@ static void printout(int id, int nr, struct > > > perf_evsel *counter, double uval, > > > struct perf_stat_output_ctx out; > > > struct outstate os = { > > > .fh = stat_config.output, > > > - .prefix = prefix ? prefix : "" > > > + .prefix = prefix ? prefix : "", > > > + .id = id, > > > + .nr = nr, > > > + .evsel = counter, > > > }; > > > print_metric_t pm = print_metric_std; > > > void (*nl)(void *); > > > @@ -962,15 +981,38 @@ static void printout(int id, int nr, struct > > > perf_evsel *counter, double uval, > > > > > > perf_stat__print_shadow_stats(counter, uval, > > > stat_config.aggr_mode == AGGR_GLOBAL ? 0 : > > > - cpu_map__id_to_cpu(id), > > > + first_shadow_cpu(counter, id), > > > > hum, IIUC you need to handle AGGR_NONE in here as well? > > AFAIK it works. aggr_get_id in first_shadow_cpu and cpu_map__id_to_cpu > handle this case, right? it does not look like.. however it'll be more clear once there's the doc/comment about used cpus for aggr modes I asked for in here: http://marc.info/?l=linux-kernel=145607533503803=2 thanks, jirka
Re: [PATCH 3/6] perf, tools, stat: Support metrics in --per-core/socket mode
On Sun, Feb 21, 2016 at 06:15:35PM +0100, Jiri Olsa wrote: > On Wed, Feb 17, 2016 at 02:44:02PM -0800, Andi Kleen wrote: > > SNIP > > > @@ -892,7 +908,10 @@ static void printout(int id, int nr, struct perf_evsel > > *counter, double uval, > > struct perf_stat_output_ctx out; > > struct outstate os = { > > .fh = stat_config.output, > > - .prefix = prefix ? prefix : "" > > + .prefix = prefix ? prefix : "", > > + .id = id, > > + .nr = nr, > > + .evsel = counter, > > }; > > print_metric_t pm = print_metric_std; > > void (*nl)(void *); > > @@ -962,15 +981,38 @@ static void printout(int id, int nr, struct > > perf_evsel *counter, double uval, > > > > perf_stat__print_shadow_stats(counter, uval, > > stat_config.aggr_mode == AGGR_GLOBAL ? 0 : > > - cpu_map__id_to_cpu(id), > > + first_shadow_cpu(counter, id), > > hum, IIUC you need to handle AGGR_NONE in here as well? AFAIK it works. aggr_get_id in first_shadow_cpu and cpu_map__id_to_cpu handle this case, right? -Andi
Re: [PATCH 3/6] perf, tools, stat: Support metrics in --per-core/socket mode
On Sun, Feb 21, 2016 at 06:15:35PM +0100, Jiri Olsa wrote: > On Wed, Feb 17, 2016 at 02:44:02PM -0800, Andi Kleen wrote: > > SNIP > > > @@ -892,7 +908,10 @@ static void printout(int id, int nr, struct perf_evsel > > *counter, double uval, > > struct perf_stat_output_ctx out; > > struct outstate os = { > > .fh = stat_config.output, > > - .prefix = prefix ? prefix : "" > > + .prefix = prefix ? prefix : "", > > + .id = id, > > + .nr = nr, > > + .evsel = counter, > > }; > > print_metric_t pm = print_metric_std; > > void (*nl)(void *); > > @@ -962,15 +981,38 @@ static void printout(int id, int nr, struct > > perf_evsel *counter, double uval, > > > > perf_stat__print_shadow_stats(counter, uval, > > stat_config.aggr_mode == AGGR_GLOBAL ? 0 : > > - cpu_map__id_to_cpu(id), > > + first_shadow_cpu(counter, id), > > hum, IIUC you need to handle AGGR_NONE in here as well? AFAIK it works. aggr_get_id in first_shadow_cpu and cpu_map__id_to_cpu handle this case, right? -Andi
Re: [PATCH 3/6] perf, tools, stat: Support metrics in --per-core/socket mode
On Wed, Feb 17, 2016 at 02:44:02PM -0800, Andi Kleen wrote: SNIP > @@ -892,7 +908,10 @@ static void printout(int id, int nr, struct perf_evsel > *counter, double uval, > struct perf_stat_output_ctx out; > struct outstate os = { > .fh = stat_config.output, > - .prefix = prefix ? prefix : "" > + .prefix = prefix ? prefix : "", > + .id = id, > + .nr = nr, > + .evsel = counter, > }; > print_metric_t pm = print_metric_std; > void (*nl)(void *); > @@ -962,15 +981,38 @@ static void printout(int id, int nr, struct perf_evsel > *counter, double uval, > > perf_stat__print_shadow_stats(counter, uval, > stat_config.aggr_mode == AGGR_GLOBAL ? 0 : > - cpu_map__id_to_cpu(id), > + first_shadow_cpu(counter, id), hum, IIUC you need to handle AGGR_NONE in here as well? thanks, jirka
Re: [PATCH 3/6] perf, tools, stat: Support metrics in --per-core/socket mode
On Wed, Feb 17, 2016 at 02:44:02PM -0800, Andi Kleen wrote: SNIP > @@ -892,7 +908,10 @@ static void printout(int id, int nr, struct perf_evsel > *counter, double uval, > struct perf_stat_output_ctx out; > struct outstate os = { > .fh = stat_config.output, > - .prefix = prefix ? prefix : "" > + .prefix = prefix ? prefix : "", > + .id = id, > + .nr = nr, > + .evsel = counter, > }; > print_metric_t pm = print_metric_std; > void (*nl)(void *); > @@ -962,15 +981,38 @@ static void printout(int id, int nr, struct perf_evsel > *counter, double uval, > > perf_stat__print_shadow_stats(counter, uval, > stat_config.aggr_mode == AGGR_GLOBAL ? 0 : > - cpu_map__id_to_cpu(id), > + first_shadow_cpu(counter, id), hum, IIUC you need to handle AGGR_NONE in here as well? thanks, jirka
Re: [PATCH 3/6] perf, tools, stat: Support metrics in --per-core/socket mode
On Wed, Feb 17, 2016 at 02:44:02PM -0800, Andi Kleen wrote: SNIP > +static void aggr_update_shadow(void) > +{ > + int cpu, cpu2, s2, id, s; > + u64 val; > + struct perf_evsel *counter; > + > + for (s = 0; s < aggr_map->nr; s++) { > + id = aggr_map->map[s]; > + evlist__for_each(evsel_list, counter) { > + val = 0; > + for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); > cpu++) { > + cpu2 = perf_evsel__cpus(counter)->map[cpu]; > + s2 = aggr_get_id(evsel_list->cpus, cpu2); > + if (s2 != id) > + continue; > + val += perf_counts(counter->counts, cpu, > 0)->val; > + } > + val = val * counter->scale; > + perf_stat__update_shadow_stats(counter, , > + > first_shadow_cpu(counter, id)); > + } > + } > +} > + > static void print_aggr(char *prefix) > { > FILE *output = stat_config.output; > @@ -982,6 +1024,8 @@ static void print_aggr(char *prefix) > if (!(aggr_map || aggr_get_id)) > return; > > + aggr_update_shadow(); this should be called from perf_stat_process_counter, not from display function also please document somewhere (best around shadow stats variables) what cpus (array members) are used for given AGGR_* thanks, jirka
Re: [PATCH 3/6] perf, tools, stat: Support metrics in --per-core/socket mode
On Wed, Feb 17, 2016 at 02:44:02PM -0800, Andi Kleen wrote: SNIP > +static void aggr_update_shadow(void) > +{ > + int cpu, cpu2, s2, id, s; > + u64 val; > + struct perf_evsel *counter; > + > + for (s = 0; s < aggr_map->nr; s++) { > + id = aggr_map->map[s]; > + evlist__for_each(evsel_list, counter) { > + val = 0; > + for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); > cpu++) { > + cpu2 = perf_evsel__cpus(counter)->map[cpu]; > + s2 = aggr_get_id(evsel_list->cpus, cpu2); > + if (s2 != id) > + continue; > + val += perf_counts(counter->counts, cpu, > 0)->val; > + } > + val = val * counter->scale; > + perf_stat__update_shadow_stats(counter, , > + > first_shadow_cpu(counter, id)); > + } > + } > +} > + > static void print_aggr(char *prefix) > { > FILE *output = stat_config.output; > @@ -982,6 +1024,8 @@ static void print_aggr(char *prefix) > if (!(aggr_map || aggr_get_id)) > return; > > + aggr_update_shadow(); this should be called from perf_stat_process_counter, not from display function also please document somewhere (best around shadow stats variables) what cpus (array members) are used for given AGGR_* thanks, jirka
Re: [PATCH 3/6] perf, tools, stat: Support metrics in --per-core/socket mode
On Wed, Feb 17, 2016 at 02:44:02PM -0800, Andi Kleen wrote: SNIP > > perf_stat__print_shadow_stats(counter, uval, > stat_config.aggr_mode == AGGR_GLOBAL ? 0 : > - cpu_map__id_to_cpu(id), > + first_shadow_cpu(counter, id), > ); > - > if (!csv_output) { > print_noise(counter, noise); > print_running(run, ena); > } > } > > +static void aggr_update_shadow(void) > +{ > + int cpu, cpu2, s2, id, s; > + u64 val; > + struct perf_evsel *counter; > + > + for (s = 0; s < aggr_map->nr; s++) { > + id = aggr_map->map[s]; > + evlist__for_each(evsel_list, counter) { > + val = 0; > + for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); > cpu++) { > + cpu2 = perf_evsel__cpus(counter)->map[cpu]; > + s2 = aggr_get_id(evsel_list->cpus, cpu2); I think you need to pass cpu's 'idx' into aggr_get_id, because it will do evsel_list->cpus[cpu2] for you jirka
Re: [PATCH 3/6] perf, tools, stat: Support metrics in --per-core/socket mode
On Wed, Feb 17, 2016 at 02:44:02PM -0800, Andi Kleen wrote: SNIP > > perf_stat__print_shadow_stats(counter, uval, > stat_config.aggr_mode == AGGR_GLOBAL ? 0 : > - cpu_map__id_to_cpu(id), > + first_shadow_cpu(counter, id), > ); > - > if (!csv_output) { > print_noise(counter, noise); > print_running(run, ena); > } > } > > +static void aggr_update_shadow(void) > +{ > + int cpu, cpu2, s2, id, s; > + u64 val; > + struct perf_evsel *counter; > + > + for (s = 0; s < aggr_map->nr; s++) { > + id = aggr_map->map[s]; > + evlist__for_each(evsel_list, counter) { > + val = 0; > + for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); > cpu++) { > + cpu2 = perf_evsel__cpus(counter)->map[cpu]; > + s2 = aggr_get_id(evsel_list->cpus, cpu2); I think you need to pass cpu's 'idx' into aggr_get_id, because it will do evsel_list->cpus[cpu2] for you jirka
[PATCH 3/6] perf, tools, stat: Support metrics in --per-core/socket mode
From: Andi KleenEnable metrics printing in --per-core / --per-socket mode. We need to save the shadow metrics in a unique place. Always use the first CPU in the aggregation. Then use the same CPU to retrieve the shadow value later. Example output: % perf stat --per-core -a ./BC1s Performance counter stats for 'system wide': S0-C0 22966.020381 task-clock (msec) #2.004 CPUs utilized(100.00%) S0-C0 2 49 context-switches #0.017 K/sec(100.00%) S0-C0 2 4 cpu-migrations#0.001 K/sec(100.00%) S0-C0 2467 page-faults #0.157 K/sec S0-C0 2 4,599,061,773 cycles#1.551 GHz (100.00%) S0-C0 2 9,755,886,883 instructions #2.12 insn per cycle (100.00%) S0-C0 2 1,906,272,125 branches # 642.704 M/sec(100.00%) S0-C0 2 81,180,867 branch-misses #4.26% of all branches S0-C1 22965.995373 task-clock (msec) #2.003 CPUs utilized(100.00%) S0-C1 2 62 context-switches #0.021 K/sec(100.00%) S0-C1 2 8 cpu-migrations#0.003 K/sec(100.00%) S0-C1 2281 page-faults #0.095 K/sec S0-C1 2 6,347,290 cycles#0.002 GHz (100.00%) S0-C1 2 4,654,156 instructions #0.73 insn per cycle (100.00%) S0-C1 2947,121 branches #0.319 M/sec(100.00%) S0-C1 2 37,322 branch-misses #3.94% of all branches 1.480409747 seconds time elapsed v2: Rebase to older patches Signed-off-by: Andi Kleen --- tools/perf/builtin-stat.c | 58 +-- 1 file changed, 51 insertions(+), 7 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 6c2c1d2..715e5b5 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -741,6 +741,8 @@ struct outstate { const char *prefix; int nfields; u64 run, ena; + int id, nr; + struct perf_evsel *evsel; }; #define METRIC_LEN 35 @@ -756,12 +758,9 @@ static void do_new_line_std(struct outstate *os) { fputc('\n', os->fh); fputs(os->prefix, os->fh); + aggr_printout(os->evsel, os->id, os->nr); if (stat_config.aggr_mode == AGGR_NONE) fprintf(os->fh, ""); - if (stat_config.aggr_mode == AGGR_CORE) - fprintf(os->fh, " "); - if (stat_config.aggr_mode == AGGR_SOCKET) - fprintf(os->fh, ""); fprintf(os->fh, " "); } @@ -799,6 +798,7 @@ static void new_line_csv(void *ctx) fputc('\n', os->fh); if (os->prefix) fprintf(os->fh, "%s%s", os->prefix, csv_sep); + aggr_printout(os->evsel, os->id, os->nr); for (i = 0; i < os->nfields; i++) fputs(csv_sep, os->fh); } @@ -856,6 +856,22 @@ static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); } +static int first_shadow_cpu(struct perf_evsel *evsel, int id) +{ + int i; + + if (aggr_get_id == NULL) + return 0; + + for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) { + int cpu2 = perf_evsel__cpus(evsel)->map[i]; + + if (aggr_get_id(evsel_list->cpus, cpu2) == id) + return cpu2; + } + return 0; +} + static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) { FILE *output = stat_config.output; @@ -892,7 +908,10 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval, struct perf_stat_output_ctx out; struct outstate os = { .fh = stat_config.output, - .prefix = prefix ? prefix : "" + .prefix = prefix ? prefix : "", + .id = id, + .nr = nr, + .evsel = counter, }; print_metric_t pm = print_metric_std; void (*nl)(void *); @@ -962,15 +981,38 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval, perf_stat__print_shadow_stats(counter, uval, stat_config.aggr_mode == AGGR_GLOBAL ? 0 : -
[PATCH 3/6] perf, tools, stat: Support metrics in --per-core/socket mode
From: Andi Kleen Enable metrics printing in --per-core / --per-socket mode. We need to save the shadow metrics in a unique place. Always use the first CPU in the aggregation. Then use the same CPU to retrieve the shadow value later. Example output: % perf stat --per-core -a ./BC1s Performance counter stats for 'system wide': S0-C0 22966.020381 task-clock (msec) #2.004 CPUs utilized(100.00%) S0-C0 2 49 context-switches #0.017 K/sec(100.00%) S0-C0 2 4 cpu-migrations#0.001 K/sec(100.00%) S0-C0 2467 page-faults #0.157 K/sec S0-C0 2 4,599,061,773 cycles#1.551 GHz (100.00%) S0-C0 2 9,755,886,883 instructions #2.12 insn per cycle (100.00%) S0-C0 2 1,906,272,125 branches # 642.704 M/sec(100.00%) S0-C0 2 81,180,867 branch-misses #4.26% of all branches S0-C1 22965.995373 task-clock (msec) #2.003 CPUs utilized(100.00%) S0-C1 2 62 context-switches #0.021 K/sec(100.00%) S0-C1 2 8 cpu-migrations#0.003 K/sec(100.00%) S0-C1 2281 page-faults #0.095 K/sec S0-C1 2 6,347,290 cycles#0.002 GHz (100.00%) S0-C1 2 4,654,156 instructions #0.73 insn per cycle (100.00%) S0-C1 2947,121 branches #0.319 M/sec(100.00%) S0-C1 2 37,322 branch-misses #3.94% of all branches 1.480409747 seconds time elapsed v2: Rebase to older patches Signed-off-by: Andi Kleen --- tools/perf/builtin-stat.c | 58 +-- 1 file changed, 51 insertions(+), 7 deletions(-) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 6c2c1d2..715e5b5 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -741,6 +741,8 @@ struct outstate { const char *prefix; int nfields; u64 run, ena; + int id, nr; + struct perf_evsel *evsel; }; #define METRIC_LEN 35 @@ -756,12 +758,9 @@ static void do_new_line_std(struct outstate *os) { fputc('\n', os->fh); fputs(os->prefix, os->fh); + aggr_printout(os->evsel, os->id, os->nr); if (stat_config.aggr_mode == AGGR_NONE) fprintf(os->fh, ""); - if (stat_config.aggr_mode == AGGR_CORE) - fprintf(os->fh, " "); - if (stat_config.aggr_mode == AGGR_SOCKET) - fprintf(os->fh, ""); fprintf(os->fh, " "); } @@ -799,6 +798,7 @@ static void new_line_csv(void *ctx) fputc('\n', os->fh); if (os->prefix) fprintf(os->fh, "%s%s", os->prefix, csv_sep); + aggr_printout(os->evsel, os->id, os->nr); for (i = 0; i < os->nfields; i++) fputs(csv_sep, os->fh); } @@ -856,6 +856,22 @@ static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) fprintf(output, "%s%s", csv_sep, evsel->cgrp->name); } +static int first_shadow_cpu(struct perf_evsel *evsel, int id) +{ + int i; + + if (aggr_get_id == NULL) + return 0; + + for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) { + int cpu2 = perf_evsel__cpus(evsel)->map[i]; + + if (aggr_get_id(evsel_list->cpus, cpu2) == id) + return cpu2; + } + return 0; +} + static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) { FILE *output = stat_config.output; @@ -892,7 +908,10 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval, struct perf_stat_output_ctx out; struct outstate os = { .fh = stat_config.output, - .prefix = prefix ? prefix : "" + .prefix = prefix ? prefix : "", + .id = id, + .nr = nr, + .evsel = counter, }; print_metric_t pm = print_metric_std; void (*nl)(void *); @@ -962,15 +981,38 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval, perf_stat__print_shadow_stats(counter, uval, stat_config.aggr_mode == AGGR_GLOBAL ? 0 : -