From: Andi Kleen <a...@linux.intel.com>

The uncore PMU has a lot of duplicated PMUs for different subsystems.
When expanding an uncore alias we usually end up with a large
number of identically named aliases, which makes perf stat
output difficult to read.

Automatically sum them up in perf stat, unless --no-merge is specified.

This can be default because only the uncores generally have duplicated
aliases. Other PMUs have unique names.

Before:

% perf stat --no-merge -a  -e unc_c_llc_lookup.any sleep 1

 Performance counter stats for 'system wide':

           694,976 Bytes unc_c_llc_lookup.any
           706,304 Bytes unc_c_llc_lookup.any
           956,608 Bytes unc_c_llc_lookup.any
           782,720 Bytes unc_c_llc_lookup.any
           605,696 Bytes unc_c_llc_lookup.any
           442,816 Bytes unc_c_llc_lookup.any
           659,328 Bytes unc_c_llc_lookup.any
           509,312 Bytes unc_c_llc_lookup.any
           263,936 Bytes unc_c_llc_lookup.any
           592,448 Bytes unc_c_llc_lookup.any
           672,448 Bytes unc_c_llc_lookup.any
           608,640 Bytes unc_c_llc_lookup.any
           641,024 Bytes unc_c_llc_lookup.any
           856,896 Bytes unc_c_llc_lookup.any
           808,832 Bytes unc_c_llc_lookup.any
           684,864 Bytes unc_c_llc_lookup.any
           710,464 Bytes unc_c_llc_lookup.any
           538,304 Bytes unc_c_llc_lookup.any

       1.002577660 seconds time elapsed

After:

% perf stat  -a  -e unc_c_llc_lookup.any sleep 1

 Performance counter stats for 'system wide':

         2,685,120 Bytes unc_c_llc_lookup.any

       1.002648032 seconds time elapsed

v2: Split collect_aliases. Rename alias flag.
Signed-off-by: Andi Kleen <a...@linux.intel.com>
---
 tools/perf/Documentation/perf-stat.txt |   3 +
 tools/perf/builtin-stat.c              | 132 +++++++++++++++++++++++++++------
 tools/perf/util/evsel.h                |   1 +
 3 files changed, 113 insertions(+), 23 deletions(-)

diff --git a/tools/perf/Documentation/perf-stat.txt 
b/tools/perf/Documentation/perf-stat.txt
index d96ccd4844df..320d8020bc5b 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -237,6 +237,9 @@ To interpret the results it is usually needed to know on 
which
 CPUs the workload runs on. If needed the CPUs can be forced using
 taskset.
 
+--no-merge::
+Do not merge results from same PMUs.
+
 EXAMPLES
 --------
 
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 688dea7cb08f..501d58a4925a 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -140,6 +140,7 @@ static unsigned int         unit_width                      
= 4; /* strlen("unit") */
 static bool                    forever                         = false;
 static bool                    metric_only                     = false;
 static bool                    force_metric_only               = false;
+static bool                    no_merge                        = false;
 static struct timespec         ref_time;
 static struct cpu_map          *aggr_map;
 static aggr_get_id_t           aggr_get_id;
@@ -1178,11 +1179,66 @@ static void aggr_update_shadow(void)
        }
 }
 
+static void collect_all_aliases(struct perf_evsel *counter,
+                           void (*cb)(struct perf_evsel *counter, void *data,
+                                      bool first),
+                           void *data)
+{
+       struct perf_evsel *alias;
+
+       alias = list_prepare_entry(counter, &(evsel_list->entries), node);
+       list_for_each_entry_continue (alias, &evsel_list->entries, node) {
+               if (strcmp(perf_evsel__name(alias), perf_evsel__name(counter)) 
||
+                   alias->scale != counter->scale ||
+                   alias->cgrp != counter->cgrp ||
+                   strcmp(alias->unit, counter->unit) ||
+                   nsec_counter(alias) != nsec_counter(counter))
+                       break;
+               alias->merged_stat = true;
+               cb(alias, data, false);
+       }
+}
+
+static void collect_aliases(struct perf_evsel *counter,
+                           void (*cb)(struct perf_evsel *counter, void *data,
+                                      bool first),
+                           void *data)
+{
+       cb(counter, data, true);
+       if (!no_merge)
+               collect_all_aliases(counter, cb, data);
+}
+
+struct aggr_data {
+       u64 ena, run, val;
+       int id;
+       int nr;
+       int cpu;
+};
+
+static void aggr_cb(struct perf_evsel *counter, void *data, bool first)
+{
+       struct aggr_data *ad = data;
+       int cpu, cpu2, s2;
+
+       for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
+               cpu2 = perf_evsel__cpus(counter)->map[cpu];
+               s2 = aggr_get_id(evsel_list->cpus, cpu2);
+               if (s2 != ad->id)
+                       continue;
+               ad->val += perf_counts(counter->counts, cpu, 0)->val;
+               ad->ena += perf_counts(counter->counts, cpu, 0)->ena;
+               ad->run += perf_counts(counter->counts, cpu, 0)->run;
+               if (first)
+                       ad->nr++;
+       }
+}
+
 static void print_aggr(char *prefix)
 {
        FILE *output = stat_config.output;
        struct perf_evsel *counter;
-       int cpu, s, s2, id, nr;
+       int s, id, nr;
        double uval;
        u64 ena, run, val;
        bool first;
@@ -1197,23 +1253,22 @@ static void print_aggr(char *prefix)
         * Without each counter has its own line.
         */
        for (s = 0; s < aggr_map->nr; s++) {
+               struct aggr_data ad;
                if (prefix && metric_only)
                        fprintf(output, "%s", prefix);
 
-               id = aggr_map->map[s];
+               ad.id = id = aggr_map->map[s];
                first = true;
                evlist__for_each_entry(evsel_list, counter) {
-                       val = ena = run = 0;
-                       nr = 0;
-                       for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); 
cpu++) {
-                               s2 = aggr_get_id(perf_evsel__cpus(counter), 
cpu);
-                               if (s2 != id)
-                                       continue;
-                               val += perf_counts(counter->counts, cpu, 
0)->val;
-                               ena += perf_counts(counter->counts, cpu, 
0)->ena;
-                               run += perf_counts(counter->counts, cpu, 
0)->run;
-                               nr++;
-                       }
+                       if (counter->merged_stat)
+                               continue;
+                       ad.val = ad.ena = ad.run = 0;
+                       ad.nr = 0;
+                       collect_aliases(counter, aggr_cb, &ad);
+                       nr = ad.nr;
+                       ena = ad.ena;
+                       run = ad.run;
+                       val = ad.val;
                        if (first && metric_only) {
                                first = false;
                                aggr_printout(counter, id, nr);
@@ -1257,6 +1312,21 @@ static void print_aggr_thread(struct perf_evsel 
*counter, char *prefix)
        }
 }
 
+struct caggr_data {
+       double avg, avg_enabled, avg_running;
+};
+
+static void counter_aggr_cb(struct perf_evsel *counter, void *data,
+                           bool first __maybe_unused)
+{
+       struct caggr_data *cd = data;
+       struct perf_stat_evsel *ps = counter->priv;
+
+       cd->avg += avg_stats(&ps->res_stats[0]);
+       cd->avg_enabled += avg_stats(&ps->res_stats[1]);
+       cd->avg_running += avg_stats(&ps->res_stats[2]);
+}
+
 /*
  * Print out the results of a single counter:
  * aggregated counts in system-wide mode
@@ -1264,23 +1334,32 @@ static void print_aggr_thread(struct perf_evsel 
*counter, char *prefix)
 static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
 {
        FILE *output = stat_config.output;
-       struct perf_stat_evsel *ps = counter->priv;
-       double avg = avg_stats(&ps->res_stats[0]);
        double uval;
-       double avg_enabled, avg_running;
+       struct caggr_data cd = { .avg = 0.0 };
 
-       avg_enabled = avg_stats(&ps->res_stats[1]);
-       avg_running = avg_stats(&ps->res_stats[2]);
+       if (counter->merged_stat)
+               return;
+       collect_aliases(counter, counter_aggr_cb, &cd);
 
        if (prefix && !metric_only)
                fprintf(output, "%s", prefix);
 
-       uval = avg * counter->scale;
-       printout(-1, 0, counter, uval, prefix, avg_running, avg_enabled, avg);
+       uval = cd.avg * counter->scale;
+       printout(-1, 0, counter, uval, prefix, cd.avg_running, cd.avg_enabled, 
cd.avg);
        if (!metric_only)
                fprintf(output, "\n");
 }
 
+static void counter_cb(struct perf_evsel *counter, void *data,
+                      bool first __maybe_unused)
+{
+       struct aggr_data *ad = data;
+
+       ad->val += perf_counts(counter->counts, ad->cpu, 0)->val;
+       ad->ena += perf_counts(counter->counts, ad->cpu, 0)->ena;
+       ad->run += perf_counts(counter->counts, ad->cpu, 0)->run;
+}
+
 /*
  * Print out the results of a single counter:
  * does not use aggregated count in system-wide
@@ -1292,10 +1371,16 @@ static void print_counter(struct perf_evsel *counter, 
char *prefix)
        double uval;
        int cpu;
 
+       if (counter->merged_stat)
+               return;
+
        for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
-               val = perf_counts(counter->counts, cpu, 0)->val;
-               ena = perf_counts(counter->counts, cpu, 0)->ena;
-               run = perf_counts(counter->counts, cpu, 0)->run;
+               struct aggr_data ad = { .cpu = cpu };
+
+               collect_aliases(counter, counter_cb, &ad);
+               val = ad.val;
+               ena = ad.ena;
+               run = ad.run;
 
                if (prefix)
                        fprintf(output, "%s", prefix);
@@ -1633,6 +1718,7 @@ static const struct option stat_options[] = {
                    "list of cpus to monitor in system-wide"),
        OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode,
                    "disable CPU count aggregation", AGGR_NONE),
+       OPT_BOOLEAN(0, "no-merge", &no_merge, "Do not merge identical named 
events"),
        OPT_STRING('x', "field-separator", &csv_sep, "separator",
                   "print counts with custom separator"),
        OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index b1503b0ecdff..aae496a9bec1 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -128,6 +128,7 @@ struct perf_evsel {
        bool                    cmdline_group_boundary;
        struct list_head        config_terms;
        int                     bpf_fd;
+       bool                    merged_stat;
 };
 
 union u64_swap {
-- 
2.5.5

Reply via email to