Perf memory profiling doesn't support aux trace data so the tool cannot receive the synthesized samples from hardware tracing data. On the Arm64 platform, though it doesn't support PMU events for memory load and store, but Armv8's SPE is a good candidate for memory profiling, the hardware tracer can record memory accessing operations with physical address and virtual address for different cache level and it also stats the memory operations for remote access and TLB.
To allow the perf memory tool to support AUX trace, this patches adds the aux callbacks for session structure. It passes the predefined synth options (like llc, flc, remote_access, tlb, etc) so this notifies the tracing decoder to generate corresponding samples. This patch also invokes the standard API perf_event__process_attr() to register sample IDs into evlist. Signed-off-by: Leo Yan <leo....@linaro.org> --- tools/perf/builtin-mem.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index a7204634893c..6c8b5e956a4a 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -7,6 +7,7 @@ #include "perf.h" #include <subcmd/parse-options.h> +#include "util/auxtrace.h" #include "util/trace-event.h" #include "util/tool.h" #include "util/session.h" @@ -249,6 +250,15 @@ static int process_sample_event(struct perf_tool *tool, static int report_raw_events(struct perf_mem *mem) { + struct itrace_synth_opts itrace_synth_opts = { + .set = true, + .flc = true, /* First level cache samples */ + .llc = true, /* Last level cache samples */ + .tlb = true, /* TLB samples */ + .remote_access = true, /* Remote access samples */ + .default_no_sample = true, + }; + struct perf_data data = { .path = input_name, .mode = PERF_DATA_MODE_READ, @@ -261,6 +271,8 @@ static int report_raw_events(struct perf_mem *mem) if (IS_ERR(session)) return PTR_ERR(session); + session->itrace_synth_opts = &itrace_synth_opts; + if (mem->cpu_list) { ret = perf_session__cpu_bitmap(session, mem->cpu_list, mem->cpu_bitmap); @@ -394,6 +406,19 @@ parse_mem_ops(const struct option *opt, const char *str, int unset) return ret; } +static int process_attr(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct evlist **pevlist) +{ + int err; + + err = perf_event__process_attr(tool, event, pevlist); + if (err) + return err; + + return 0; +} + int cmd_mem(int argc, const char **argv) { struct stat st; @@ -405,8 +430,12 @@ int cmd_mem(int argc, const char **argv) .comm = perf_event__process_comm, .lost = perf_event__process_lost, .fork = perf_event__process_fork, + .attr = process_attr, .build_id = perf_event__process_build_id, .namespaces = perf_event__process_namespaces, + .auxtrace_info = perf_event__process_auxtrace_info, + .auxtrace = perf_event__process_auxtrace, + .auxtrace_error = perf_event__process_auxtrace_error, .ordered_events = true, }, .input_name = "perf.data", -- 2.17.1