'perf kvm record' on powerpc will record kvm_hv:kvm_guest_exit event instead of cycles. However, to have some kind of periodicity, we can't use all the kvm exits, rather exits which are bound to happen in certain intervals. HV_DECREMENTER Interrupt forces the threads to exit after an interval of 10 ms.
This patch makes use of the 'kvm_guest_exit' tracepoint and checks the exit reason for any kvm exit. If it is HV_DECREMENTER, then the instruction pointer dumped along with this tracepoint is retrieved and mapped with the guest kallsyms. Signed-off-by: Ravi Bangoria <ravi.bango...@linux.vnet.ibm.com> Signed-off-by: Hemant Kumar <hem...@linux.vnet.ibm.com> --- tools/perf/util/event.c | 7 +++-- tools/perf/util/evsel.c | 77 +++++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/evsel.h | 7 +++++ tools/perf/util/session.c | 7 +++-- 4 files changed, 92 insertions(+), 6 deletions(-) diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index bc0a3f0..31bbc50 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -1299,15 +1299,16 @@ int perf_event__preprocess_sample(const union perf_event *event, struct machine *machine, struct addr_location *al, struct perf_sample *sample, - struct perf_evsel *evsel __maybe_unused) + struct perf_evsel *evsel) { - u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + u8 cpumode; struct thread *thread = machine__findnew_thread(machine, sample->pid, sample->tid); - if (thread == NULL) return -1; + al->cpumode = cpumode = arch__get_cpumode(event, evsel, sample); + dump_printf(" ... thread: %s:%d\n", thread__comm_str(thread), thread->tid); /* * Have we already created the kernel maps for this machine? diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 0902fe4..a4d309e 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1622,6 +1622,82 @@ static inline bool overflow(const void *endp, u16 max_size, const void *offset, #define OVERFLOW_CHECK_u64(offset) \ OVERFLOW_CHECK(offset, sizeof(u64), sizeof(u64)) +#define KVMPPC_EXIT "kvm_hv:kvm_guest_exit" +#define HV_DECREMENTER 2432 +#define HV_BIT 3 +#define PR_BIT 49 +#define PPC_MAX 63 + +bool is_kvmppc_exit_event(struct perf_evsel *evsel) +{ + static unsigned int kvmppc_exit; + + if (evsel->attr.type != PERF_TYPE_TRACEPOINT) + return false; + + if (unlikely(kvmppc_exit == 0)) { + if (strcmp(KVMPPC_EXIT, evsel->name)) + return false; + kvmppc_exit = evsel->attr.config; + } else if (kvmppc_exit != evsel->attr.config) { + return false; + } + + return true; +} + +bool is_hv_dec_trap(struct perf_evsel *evsel, struct perf_sample *sample) +{ + int trap = perf_evsel__intval(evsel, sample, "trap"); + return trap == HV_DECREMENTER; +} + +bool is_perf_data_reorded_on_ppc(struct perf_evlist *evlist) +{ + if (evlist && evlist->env && evlist->env->arch) + return !strcmp(evlist->env->arch, "ppc64") || + !strcmp(evlist->env->arch, "ppc64le"); + return false; +} + +u8 arch__get_cpumode(const union perf_event *event, + struct perf_evsel *evsel, + struct perf_sample *sample) +{ + unsigned long hv, pr, msr; + u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + + if (!(is_perf_data_reorded_on_ppc(evsel->evlist) && + perf_guest_only() && + is_kvmppc_exit_event(evsel))) + goto ret; + + if (sample->raw_data && is_hv_dec_trap(evsel, sample)) { + msr = perf_evsel__intval(evsel, sample, "msr"); + hv = msr & ((unsigned long)1 << (PPC_MAX - HV_BIT)); + pr = msr & ((unsigned long)1 << (PPC_MAX - PR_BIT)); + + if (!hv && pr) + cpumode = PERF_RECORD_MISC_GUEST_USER; + else + cpumode = PERF_RECORD_MISC_GUEST_KERNEL; + } + +ret: + return cpumode; +} + +u64 arch__get_ip(struct perf_evsel *evsel, struct perf_sample *sample) +{ + if (is_perf_data_reorded_on_ppc(evsel->evlist) && + perf_guest_only() && + is_kvmppc_exit_event(evsel) && + is_hv_dec_trap(evsel, sample)) + return perf_evsel__intval(evsel, sample, "pc"); + + return sample->ip; +} + int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, struct perf_sample *data) { @@ -1795,6 +1871,7 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, OVERFLOW_CHECK(array, data->raw_size, max_size); data->raw_data = (void *)array; array = (void *)array + data->raw_size; + data->ip = arch__get_ip(evsel, data); } if (type & PERF_SAMPLE_BRANCH_STACK) { diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index efad78f..a5f5cb5 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -408,4 +408,11 @@ typedef int (*attr__fprintf_f)(FILE *, const char *, const char *, void *); int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, attr__fprintf_f attr__fprintf, void *priv); +u64 arch__get_ip(struct perf_evsel *evsel, struct perf_sample *sample); +u8 arch__get_cpumode(const union perf_event *event, struct perf_evsel *evsel, + struct perf_sample *sample); +bool is_kvmppc_exit_event(struct perf_evsel *evsel); +bool is_hv_dec_trap(struct perf_evsel *evsel, struct perf_sample *sample); +bool is_perf_data_reorded_on_ppc(struct perf_evlist *evlist); + #endif /* __PERF_EVSEL_H */ diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 40b7a0d..52beee8 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1131,9 +1131,10 @@ static void dump_sample(struct perf_evsel *evsel, union perf_event *event, static struct machine *machines__find_for_cpumode(struct machines *machines, union perf_event *event, - struct perf_sample *sample) + struct perf_sample *sample, + struct perf_evsel *evsel) { - const u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + const u8 cpumode = arch__get_cpumode(event, evsel, sample); struct machine *machine; if (perf_guest && @@ -1237,7 +1238,7 @@ static int machines__deliver_event(struct machines *machines, evsel = perf_evlist__id2evsel(evlist, sample->id); - machine = machines__find_for_cpumode(machines, event, sample); + machine = machines__find_for_cpumode(machines, event, sample, evsel); switch (event->header.type) { case PERF_RECORD_SAMPLE: -- 2.1.4