[PATCH 9/9] perf, tools, stat: Force --per-core mode for .agg-per-core aliases
From: Andi Kleen When an event alias is used that the kernel marked as .agg-per-core, force --per-core mode (and also require -a and forbid cgroups or per thread mode). This in term means, --topdown forces --per-core mode. This is needed for TopDown in SMT mode, because it needs to measure all threads in a core together and merge the values to compute the correct percentages of how the pipeline is limited. We do this if any alias is agg-per-core. Add the code to parse the .agg-per-core attributes and propagate the information to the evsel. Then the main stat code does the necessary checks and forces per core mode. Open issue: in combination with -C ... we get wrong values. I think that's a existing bug that needs to be debugged/fixed separately. Signed-off-by: Andi Kleen --- tools/perf/builtin-stat.c | 18 ++ tools/perf/util/evsel.h| 1 + tools/perf/util/parse-events.c | 1 + tools/perf/util/pmu.c | 23 +++ tools/perf/util/pmu.h | 2 ++ 5 files changed, 45 insertions(+) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index eec6c16..0df0aff 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1382,6 +1382,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) bool append_file = false; int output_fd = 0; const char *output_name = NULL; + struct perf_evsel *counter; const struct option options[] = { OPT_BOOLEAN('T', "transaction", _run, "hardware transaction statistics"), @@ -1563,6 +1564,23 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) if (add_default_attributes()) goto out; + evlist__for_each (evsel_list, counter) { + /* Enable per core mode if only a single event requires it. */ + if (counter->agg_per_core) { + if (stat_config.aggr_mode != AGGR_GLOBAL && + stat_config.aggr_mode != AGGR_CORE) { + pr_err("per core event configuration requires per core mode\n"); + goto out; + } + stat_config.aggr_mode = AGGR_CORE; + if (nr_cgroups || !target__has_cpu()) { + pr_err("per core event configuration requires system-wide mode (-a)\n"); + goto out; + } + break; + } + } + target__validate(); if (perf_evlist__create_maps(evsel_list, ) < 0) { diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 6a12908..85f02b8 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -100,6 +100,7 @@ struct perf_evsel { boolsystem_wide; booltracking; boolper_pkg; + boolagg_per_core; /* parse modifier helper */ int exclude_GH; int nr_members; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 828936d..d2a5938 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -759,6 +759,7 @@ int parse_events_add_pmu(struct parse_events_evlist *data, evsel->unit = info.unit; evsel->scale = info.scale; evsel->per_pkg = info.per_pkg; + evsel->agg_per_core = info.agg_per_core; evsel->snapshot = info.snapshot; } diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index ce56354..abedb6a 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -189,6 +189,23 @@ perf_pmu__parse_per_pkg(struct perf_pmu_alias *alias, char *dir, char *name) return 0; } +static void +perf_pmu__parse_agg_per_core(struct perf_pmu_alias *alias, char *dir, char *name) +{ + char path[PATH_MAX]; + FILE *f; + int flag; + + snprintf(path, PATH_MAX, "%s/%s.agg-per-core", dir, name); + + f = fopen(path, "r"); + if (f && fscanf(f, "%d", ) == 1) { + alias->agg_per_core = flag != 0; + fclose(f); + } +} + + static int perf_pmu__parse_snapshot(struct perf_pmu_alias *alias, char *dir, char *name) { @@ -237,6 +254,7 @@ static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, perf_pmu__parse_scale(alias, dir, name); perf_pmu__parse_per_pkg(alias, dir, name); perf_pmu__parse_snapshot(alias, dir, name); + perf_pmu__parse_agg_per_core(alias, dir, name); } list_add_tail(>list, list); @@ -271,6 +289,8 @@ static inline bool pmu_alias_info_file(char *name) return true; if (len > 9
[PATCH 9/9] perf, tools, stat: Force --per-core mode for .agg-per-core aliases
From: Andi Kleen a...@linux.intel.com When an event alias is used that the kernel marked as .agg-per-core, force --per-core mode (and also require -a and forbid cgroups or per thread mode). This in term means, --topdown forces --per-core mode. This is needed for TopDown in SMT mode, because it needs to measure all threads in a core together and merge the values to compute the correct percentages of how the pipeline is limited. We do this if any alias is agg-per-core. Add the code to parse the .agg-per-core attributes and propagate the information to the evsel. Then the main stat code does the necessary checks and forces per core mode. Open issue: in combination with -C ... we get wrong values. I think that's a existing bug that needs to be debugged/fixed separately. Signed-off-by: Andi Kleen a...@linux.intel.com --- tools/perf/builtin-stat.c | 18 ++ tools/perf/util/evsel.h| 1 + tools/perf/util/parse-events.c | 1 + tools/perf/util/pmu.c | 23 +++ tools/perf/util/pmu.h | 2 ++ 5 files changed, 45 insertions(+) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index eec6c16..0df0aff 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1382,6 +1382,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) bool append_file = false; int output_fd = 0; const char *output_name = NULL; + struct perf_evsel *counter; const struct option options[] = { OPT_BOOLEAN('T', transaction, transaction_run, hardware transaction statistics), @@ -1563,6 +1564,23 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) if (add_default_attributes()) goto out; + evlist__for_each (evsel_list, counter) { + /* Enable per core mode if only a single event requires it. */ + if (counter-agg_per_core) { + if (stat_config.aggr_mode != AGGR_GLOBAL + stat_config.aggr_mode != AGGR_CORE) { + pr_err(per core event configuration requires per core mode\n); + goto out; + } + stat_config.aggr_mode = AGGR_CORE; + if (nr_cgroups || !target__has_cpu(target)) { + pr_err(per core event configuration requires system-wide mode (-a)\n); + goto out; + } + break; + } + } + target__validate(target); if (perf_evlist__create_maps(evsel_list, target) 0) { diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 6a12908..85f02b8 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -100,6 +100,7 @@ struct perf_evsel { boolsystem_wide; booltracking; boolper_pkg; + boolagg_per_core; /* parse modifier helper */ int exclude_GH; int nr_members; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 828936d..d2a5938 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -759,6 +759,7 @@ int parse_events_add_pmu(struct parse_events_evlist *data, evsel-unit = info.unit; evsel-scale = info.scale; evsel-per_pkg = info.per_pkg; + evsel-agg_per_core = info.agg_per_core; evsel-snapshot = info.snapshot; } diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index ce56354..abedb6a 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -189,6 +189,23 @@ perf_pmu__parse_per_pkg(struct perf_pmu_alias *alias, char *dir, char *name) return 0; } +static void +perf_pmu__parse_agg_per_core(struct perf_pmu_alias *alias, char *dir, char *name) +{ + char path[PATH_MAX]; + FILE *f; + int flag; + + snprintf(path, PATH_MAX, %s/%s.agg-per-core, dir, name); + + f = fopen(path, r); + if (f fscanf(f, %d, flag) == 1) { + alias-agg_per_core = flag != 0; + fclose(f); + } +} + + static int perf_pmu__parse_snapshot(struct perf_pmu_alias *alias, char *dir, char *name) { @@ -237,6 +254,7 @@ static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, perf_pmu__parse_scale(alias, dir, name); perf_pmu__parse_per_pkg(alias, dir, name); perf_pmu__parse_snapshot(alias, dir, name); + perf_pmu__parse_agg_per_core(alias, dir, name); } list_add_tail(alias-list, list); @@ -271,6 +289,8 @@ static inline bool pmu_alias_info_file(char