With this patch, we can use the 'percore' event qualifier in perf-stat.
root@skl:/tmp# perf stat -e
cpu/event=0,umask=0x3,percore=1/,cpu/event=0,umask=0x3/ -a -A -I1000
1.000773050 S0-C0 98,352,832
cpu/event=0,umask=0x3,percore=1/ (50.01%)
1.000773050 S0-C1103,763,057
cpu/event=0,umask=0x3,percore=1/ (50.02%)
1.000773050 S0-C2196,776,995
cpu/event=0,umask=0x3,percore=1/ (50.02%)
1.000773050 S0-C3176,493,779
cpu/event=0,umask=0x3,percore=1/ (50.02%)
1.000773050 CPU0 47,699,641 cpu/event=0,umask=0x3/
(50.02%)
1.000773050 CPU1 49,052,451 cpu/event=0,umask=0x3/
(49.98%)
1.000773050 CPU2 102,771,422 cpu/event=0,umask=0x3/
(49.98%)
1.000773050 CPU3 100,784,662 cpu/event=0,umask=0x3/
(49.98%)
1.000773050 CPU4 43,171,342 cpu/event=0,umask=0x3/
(49.98%)
1.000773050 CPU5 54,152,158 cpu/event=0,umask=0x3/
(49.98%)
1.000773050 CPU6 93,618,410 cpu/event=0,umask=0x3/
(49.98%)
1.000773050 CPU7 74,477,589 cpu/event=0,umask=0x3/
(49.99%)
In this example, we count the event 'ref-cycles' per-core and per-CPU in
one perf stat command-line. From the output, we can see:
S0-C0 = CPU0 + CPU4
S0-C1 = CPU1 + CPU5
S0-C2 = CPU2 + CPU6
S0-C3 = CPU3 + CPU7
So the result is expected (tiny difference is ignored).
Note that, the 'percore' event qualifier needs to use with option '-A'.
v4:
---
Rebase to latest perf/core branch.
v3:
---
No change
v2:
---
Change 'coresum' to 'percore'.
Signed-off-by: Jin Yao
---
tools/perf/Documentation/perf-stat.txt | 4
tools/perf/builtin-stat.c | 21 +
tools/perf/util/stat-display.c | 43 ++
tools/perf/util/stat.c | 8 ---
4 files changed, 69 insertions(+), 7 deletions(-)
diff --git a/tools/perf/Documentation/perf-stat.txt
b/tools/perf/Documentation/perf-stat.txt
index 39c05f8..1e312c2 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -43,6 +43,10 @@ report::
param1 and param2 are defined as formats for the PMU in
/sys/bus/event_source/devices//format/*
+ 'percore' is a event qualifier that sums up the event counts for both
+ hardware threads in a core. For example:
+ perf stat -A -a -e cpu/event,percore=1/,otherevent ...
+
- a symbolically formed event like 'pmu/config=M,config1=N,config2=K/'
where M, N, K are numbers (in decimal, hex, octal format).
Acceptable values for each of 'config', 'config1' and 'config2'
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 7f9c4b7..4a79fa9 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -847,6 +847,18 @@ static int perf_stat__get_core_cached(struct
perf_stat_config *config,
return perf_stat__get_aggr(config, perf_stat__get_core, map, idx);
}
+static bool term_percore_set(void)
+{
+ struct perf_evsel *counter;
+
+ evlist__for_each_entry(evsel_list, counter) {
+ if (counter->percore)
+ return true;
+ }
+
+ return false;
+}
+
static int perf_stat_init_aggr_mode(void)
{
int nr;
@@ -867,6 +879,15 @@ static int perf_stat_init_aggr_mode(void)
stat_config.aggr_get_id = perf_stat__get_core_cached;
break;
case AGGR_NONE:
+ if (term_percore_set()) {
+ if (cpu_map__build_core_map(evsel_list->cpus,
+ &stat_config.aggr_map)) {
+ perror("cannot build core map");
+ return -1;
+ }
+ stat_config.aggr_get_id = perf_stat__get_core_cached;
+ }
+ break;
case AGGR_GLOBAL:
case AGGR_THREAD:
case AGGR_UNSET:
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index f5b4ee7..4c53bae 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -88,9 +88,17 @@ static void aggr_printout(struct perf_stat_config *config,
config->csv_sep);
break;
case AGGR_NONE:
- fprintf(config->output, "CPU%*d%s",
-