This patch contains core logic for enabling perf kvm {record|report} on powerpc.
For perf kvm record, This patch will replace default event(cycle) with kvm_hv:kvm_guest_exit while recording guest data from host. For perf kvm report, This patch makes use of the 'kvm_guest_exit' tracepoint and checks the exit reason for any kvm exit. If it is HV_DECREMENTER, then the instruction pointer dumped along with this tracepoint is retrieved and mapped with the guest kallsyms. Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com> Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com> --- changes in v2: - Breakdown of v1 patch into two sub patches - Merged parse-tp.c and evlist.c from tools/perf/arch/powerpc/util/ into single file with name kvm.c tools/perf/arch/powerpc/util/Build | 1 + tools/perf/arch/powerpc/util/kvm.c | 104 +++++++++++++++++++++++++++++++++++++ tools/perf/util/event.c | 12 ++++- tools/perf/util/evlist.c | 9 ++++ tools/perf/util/evlist.h | 1 + tools/perf/util/evsel.c | 7 +++ tools/perf/util/evsel.h | 4 ++ tools/perf/util/session.c | 9 ++-- tools/perf/util/util.c | 5 ++ tools/perf/util/util.h | 1 + 10 files changed, 147 insertions(+), 6 deletions(-) create mode 100644 tools/perf/arch/powerpc/util/kvm.c diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build index 7b8b0d1..eb819e0 100644 --- a/tools/perf/arch/powerpc/util/Build +++ b/tools/perf/arch/powerpc/util/Build @@ -1,5 +1,6 @@ libperf-y += header.o libperf-y += sym-handling.o +libperf-y += kvm.o libperf-$(CONFIG_DWARF) += dwarf-regs.o libperf-$(CONFIG_DWARF) += skip-callchain-idx.o diff --git a/tools/perf/arch/powerpc/util/kvm.c b/tools/perf/arch/powerpc/util/kvm.c new file mode 100644 index 0000000..317f29a --- /dev/null +++ b/tools/perf/arch/powerpc/util/kvm.c @@ -0,0 +1,104 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * Copyright (C) 2016 Hemant Kumar Shaw, IBM Corporation + * Copyright (C) 2016 Ravikumar B. Bangoria, IBM Corporation + */ + +#include <linux/err.h> +#include "../../../util/evsel.h" +#include "../../../util/evlist.h" +#include "../../../util/trace-event.h" +#include "../../../util/session.h" +#include "../../../util/util.h" + +#define KVMPPC_EXIT "kvm_hv:kvm_guest_exit" +#define HV_DECREMENTER 2432 +#define HV_BIT 3 +#define PR_BIT 49 +#define PPC_MAX 63 + +/* + * To sample for only guest, record kvm_hv:kvm_guest_exit. + * Otherwise go via normal way(cycles). + */ +int perf_evlist__arch_add_default(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel; + + if (!perf_guest_only()) + return -1; + + evsel = perf_evsel__newtp_idx("kvm_hv", "kvm_guest_exit", 0); + if (IS_ERR(evsel)) + return PTR_ERR(evsel); + + perf_evlist__add(evlist, evsel); + return 0; +} + +static bool is_kvmppc_exit_event(struct perf_evsel *evsel) +{ + static unsigned int kvmppc_exit; + + if (evsel->attr.type != PERF_TYPE_TRACEPOINT) + return false; + + if (unlikely(kvmppc_exit == 0)) { + if (strcmp(KVMPPC_EXIT, evsel->name)) + return false; + kvmppc_exit = evsel->attr.config; + } else if (kvmppc_exit != evsel->attr.config) { + return false; + } + + return true; +} + +static bool is_hv_dec_trap(struct perf_evsel *evsel, struct perf_sample *sample) +{ + int trap = perf_evsel__intval(evsel, sample, "trap"); + return trap == HV_DECREMENTER; +} + +/* + * Get the instruction pointer from the tracepoint data + */ +u64 arch__get_ip(struct perf_evsel *evsel, struct perf_sample *sample) +{ + if (perf_guest_only() && + is_kvmppc_exit_event(evsel) && + is_hv_dec_trap(evsel, sample)) + return perf_evsel__intval(evsel, sample, "pc"); + + return sample->ip; +} + +/* + * Get the HV and PR bits and accordingly, determine the cpumode + */ +u8 arch__get_cpumode(const union perf_event *event, struct perf_evsel *evsel, + struct perf_sample *sample) +{ + unsigned long hv, pr, msr; + u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + + if (!perf_guest_only() || !is_kvmppc_exit_event(evsel)) + goto ret; + + if (sample->raw_data && is_hv_dec_trap(evsel, sample)) { + msr = perf_evsel__intval(evsel, sample, "msr"); + hv = msr & ((unsigned long)1 << (PPC_MAX - HV_BIT)); + pr = msr & ((unsigned long)1 << (PPC_MAX - PR_BIT)); + + if (!hv && pr) + cpumode = PERF_RECORD_MISC_GUEST_USER; + else + cpumode = PERF_RECORD_MISC_GUEST_KERNEL; + } + +ret: + return cpumode; +} diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index f86e172..b8105a6 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -1291,6 +1291,13 @@ void thread__find_addr_location(struct thread *thread, al->sym = NULL; } +u8 __weak arch__get_cpumode(const union perf_event *event, + struct perf_evsel *evsel __maybe_unused, + struct perf_sample *sample __maybe_unused) +{ + return event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; +} + /* * Callers need to drop the reference to al->thread, obtained in * machine__findnew_thread() @@ -1301,13 +1308,14 @@ int perf_event__preprocess_sample(const union perf_event *event, struct perf_sample *sample, struct perf_evsel *evsel) { - u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + u8 cpumode; struct thread *thread = machine__findnew_thread(machine, sample->pid, sample->tid); - if (thread == NULL) return -1; + al->cpumode = cpumode = arch__get_cpumode(event, evsel, sample); + dump_printf(" ... thread: %s:%d\n", thread__comm_str(thread), thread->tid); /* * Have we already created the kernel maps for this machine? diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index d81f13d..d0dca72 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -231,6 +231,12 @@ void perf_event_attr__set_max_precise_ip(struct perf_event_attr *attr) } } +int __weak +perf_evlist__arch_add_default(struct perf_evlist *evlist __maybe_unused) +{ + return -1; +} + int perf_evlist__add_default(struct perf_evlist *evlist) { struct perf_event_attr attr = { @@ -239,6 +245,9 @@ int perf_evlist__add_default(struct perf_evlist *evlist) }; struct perf_evsel *evsel; + if (!perf_evlist__arch_add_default(evlist)) + return 0; + event_attr_init(&attr); perf_event_attr__set_max_precise_ip(&attr); diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 7c4d9a2..98e24cd 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -75,6 +75,7 @@ void perf_evlist__delete(struct perf_evlist *evlist); void perf_evlist__add(struct perf_evlist *evlist, struct perf_evsel *entry); void perf_evlist__remove(struct perf_evlist *evlist, struct perf_evsel *evsel); +int perf_evlist__arch_add_default(struct perf_evlist *evlist); int perf_evlist__add_default(struct perf_evlist *evlist); int __perf_evlist__add_default_attrs(struct perf_evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 4678086..afe1091 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1607,6 +1607,12 @@ static inline bool overflow(const void *endp, u16 max_size, const void *offset, #define OVERFLOW_CHECK_u64(offset) \ OVERFLOW_CHECK(offset, sizeof(u64), sizeof(u64)) +u64 __weak arch__get_ip(struct perf_evsel *evsel __maybe_unused, + struct perf_sample *sample) +{ + return sample->ip; +} + int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, struct perf_sample *data) { @@ -1780,6 +1786,7 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, OVERFLOW_CHECK(array, data->raw_size, max_size); data->raw_data = (void *)array; array = (void *)array + data->raw_size; + data->ip = arch__get_ip(evsel, data); } if (type & PERF_SAMPLE_BRANCH_STACK) { diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 8e75434..eb6f52e 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -400,4 +400,8 @@ typedef int (*attr__fprintf_f)(FILE *, const char *, const char *, void *); int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, attr__fprintf_f attr__fprintf, void *priv); +u64 arch__get_ip(struct perf_evsel *evsel, struct perf_sample *sample); +u8 arch__get_cpumode(const union perf_event *event, struct perf_evsel *evsel, + struct perf_sample *sample); + #endif /* __PERF_EVSEL_H */ diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 40b7a0d..1081ee0 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1130,10 +1130,11 @@ static void dump_sample(struct perf_evsel *evsel, union perf_event *event, } static struct machine *machines__find_for_cpumode(struct machines *machines, - union perf_event *event, - struct perf_sample *sample) + union perf_event *event, + struct perf_sample *sample, + struct perf_evsel *evsel) { - const u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + u8 cpumode = arch__get_cpumode(event, evsel, sample); struct machine *machine; if (perf_guest && @@ -1237,7 +1238,7 @@ static int machines__deliver_event(struct machines *machines, evsel = perf_evlist__id2evsel(evlist, sample->id); - machine = machines__find_for_cpumode(machines, event, sample); + machine = machines__find_for_cpumode(machines, event, sample, evsel); switch (event->header.type) { case PERF_RECORD_SAMPLE: diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index 7a2da7e..5e48ef1 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -37,6 +37,11 @@ bool test_attr__enabled; bool perf_host = true; bool perf_guest = false; +bool perf_guest_only(void) +{ + return !perf_host && perf_guest; +} + void event_attr_init(struct perf_event_attr *attr) { if (!perf_host) diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 61650f0..eff1d8f 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -344,5 +344,6 @@ int fetch_kernel_version(unsigned int *puint, const char *perf_tip(const char *dirpath); bool is_regular_file(const char *file); +bool perf_guest_only(void); #endif /* GIT_COMPAT_UTIL_H */ -- 2.1.4