Re: [RFC PATCH 11/43] KVM: PPC: Book3S HV P9: Implement PMU save/restore in C

2021-07-09 Thread Athira Rajeev



> On 22-Jun-2021, at 4:27 PM, Nicholas Piggin  wrote:
> 
> Implement the P9 path PMU save/restore code in C, and remove the
> POWER9/10 code from the P7/8 path assembly.
> 
> -449 cycles (8533) POWER9 virt-mode NULL hcall
> 
> Signed-off-by: Nicholas Piggin 
> ---
> arch/powerpc/include/asm/asm-prototypes.h |   5 -
> arch/powerpc/kvm/book3s_hv.c  | 205 --
> arch/powerpc/kvm/book3s_hv_interrupts.S   |  13 +-
> arch/powerpc/kvm/book3s_hv_rmhandlers.S   |  43 +
> 4 files changed, 200 insertions(+), 66 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/asm-prototypes.h 
> b/arch/powerpc/include/asm/asm-prototypes.h
> index 02ee6f5ac9fe..928db8ef9a5a 100644
> --- a/arch/powerpc/include/asm/asm-prototypes.h
> +++ b/arch/powerpc/include/asm/asm-prototypes.h
> @@ -136,11 +136,6 @@ static inline void kvmppc_restore_tm_hv(struct kvm_vcpu 
> *vcpu, u64 msr,
>   bool preserve_nv) { }
> #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
> 
> -void kvmhv_save_host_pmu(void);
> -void kvmhv_load_host_pmu(void);
> -void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use);
> -void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu);
> -
> void kvmppc_p9_enter_guest(struct kvm_vcpu *vcpu);
> 
> long kvmppc_h_set_dabr(struct kvm_vcpu *vcpu, unsigned long dabr);
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index f7349d150828..b1b94b3563b7 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -3635,6 +3635,188 @@ static noinline void kvmppc_run_core(struct 
> kvmppc_vcore *vc)
>   trace_kvmppc_run_core(vc, 1);
> }
> 
> +/*
> + * Privileged (non-hypervisor) host registers to save.
> + */
> +struct p9_host_os_sprs {
> + unsigned long dscr;
> + unsigned long tidr;
> + unsigned long iamr;
> + unsigned long amr;
> + unsigned long fscr;
> +
> + unsigned int pmc1;
> + unsigned int pmc2;
> + unsigned int pmc3;
> + unsigned int pmc4;
> + unsigned int pmc5;
> + unsigned int pmc6;
> + unsigned long mmcr0;
> + unsigned long mmcr1;
> + unsigned long mmcr2;
> + unsigned long mmcr3;
> + unsigned long mmcra;
> + unsigned long siar;
> + unsigned long sier1;
> + unsigned long sier2;
> + unsigned long sier3;
> + unsigned long sdar;
> +};
> +
> +static void freeze_pmu(unsigned long mmcr0, unsigned long mmcra)
> +{
> + if (!(mmcr0 & MMCR0_FC))
> + goto do_freeze;
> + if (mmcra & MMCRA_SAMPLE_ENABLE)
> + goto do_freeze;
> + if (cpu_has_feature(CPU_FTR_ARCH_31)) {
> + if (!(mmcr0 & MMCR0_PMCCEXT))
> + goto do_freeze;
> + if (!(mmcra & MMCRA_BHRB_DISABLE))
> + goto do_freeze;
> + }
> + return;


Hi Nick

When freezing the PMU, do we need to also set pmcregs_in_use to zero ?

Also, why we need these above conditions like MMCRA_SAMPLE_ENABLE,  
MMCR0_PMCCEXT checks also before freezing ?

> +
> +do_freeze:
> + mmcr0 = MMCR0_FC;
> + mmcra = 0;
> + if (cpu_has_feature(CPU_FTR_ARCH_31)) {
> + mmcr0 |= MMCR0_PMCCEXT;
> + mmcra = MMCRA_BHRB_DISABLE;
> + }
> +
> + mtspr(SPRN_MMCR0, mmcr0);
> + mtspr(SPRN_MMCRA, mmcra);
> + isync();
> +}
> +
> +static void save_p9_host_pmu(struct p9_host_os_sprs *host_os_sprs)
> +{
> + if (ppc_get_pmu_inuse()) {
> + /*
> +  * It might be better to put PMU handling (at least for the
> +  * host) in the perf subsystem because it knows more about what
> +  * is being used.
> +  */
> +
> + /* POWER9, POWER10 do not implement HPMC or SPMC */
> +
> + host_os_sprs->mmcr0 = mfspr(SPRN_MMCR0);
> + host_os_sprs->mmcra = mfspr(SPRN_MMCRA);
> +
> + freeze_pmu(host_os_sprs->mmcr0, host_os_sprs->mmcra);
> +
> + host_os_sprs->pmc1 = mfspr(SPRN_PMC1);
> + host_os_sprs->pmc2 = mfspr(SPRN_PMC2);
> + host_os_sprs->pmc3 = mfspr(SPRN_PMC3);
> + host_os_sprs->pmc4 = mfspr(SPRN_PMC4);
> + host_os_sprs->pmc5 = mfspr(SPRN_PMC5);
> + host_os_sprs->pmc6 = mfspr(SPRN_PMC6);
> + host_os_sprs->mmcr1 = mfspr(SPRN_MMCR1);
> + host_os_sprs->mmcr2 = mfspr(SPRN_MMCR2);
> + host_os_sprs->sdar = mfspr(SPRN_SDAR);
> + host_os_sprs->siar = mfspr(SPRN_SIAR);
> + host_os_sprs->sier1 = mfspr(SPRN_SIER);
> +
> + if (cpu_has_feature(CPU_FTR_ARCH_31)) {
> + host_os_sprs->mmcr3 = mfspr(SPRN_MMCR3);
> + host_os_sprs->sier2 = mfspr(SPRN_SIER2);
> + host_os_sprs->sier3 = mfspr(SPRN_SIER3);
> + }
> + }
> +}
> +
> +static void load_p9_guest_pmu(struct kvm_vcpu *vcpu)
> +{
> + mtspr(SPRN_PMC1, vcpu->arch.pmc[0]);
> + mtspr(SPRN_PMC2, vcpu->arch.pmc[1]);
> + 

Re: [RFC PATCH 27/43] KVM: PPC: Book3S HV P9: Move host OS save/restore functions to built-in

2021-07-07 Thread Athira Rajeev



> On 22-Jun-2021, at 4:27 PM, Nicholas Piggin  wrote:
> 
> Move the P9 guest/host register switching functions to the built-in
> P9 entry code, and export it for nested to use as well.
> 
> This allows more flexibility in scheduling these supervisor privileged
> SPR accesses with the HV privileged and PR SPR accesses in the low level
> entry code.
> 
> Signed-off-by: Nicholas Piggin 
> ---
> arch/powerpc/kvm/book3s_hv.c  | 351 +-
> arch/powerpc/kvm/book3s_hv.h  |  39 +++
> arch/powerpc/kvm/book3s_hv_p9_entry.c | 332 
> 3 files changed, 372 insertions(+), 350 deletions(-)
> create mode 100644 arch/powerpc/kvm/book3s_hv.h
> 
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index 35749b0b663f..a7660af22161 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -79,6 +79,7 @@
> #include 
> 
> #include "book3s.h"
> +#include "book3s_hv.h"
> 
> #define CREATE_TRACE_POINTS
> #include "trace_hv.h"
> @@ -3675,356 +3676,6 @@ static noinline void kvmppc_run_core(struct 
> kvmppc_vcore *vc)
>   trace_kvmppc_run_core(vc, 1);
> }
> 
> -/*
> - * Privileged (non-hypervisor) host registers to save.
> - */
> -struct p9_host_os_sprs {
> - unsigned long dscr;
> - unsigned long tidr;
> - unsigned long iamr;
> - unsigned long amr;
> - unsigned long fscr;
> -
> - unsigned int pmc1;
> - unsigned int pmc2;
> - unsigned int pmc3;
> - unsigned int pmc4;
> - unsigned int pmc5;
> - unsigned int pmc6;
> - unsigned long mmcr0;
> - unsigned long mmcr1;
> - unsigned long mmcr2;
> - unsigned long mmcr3;
> - unsigned long mmcra;
> - unsigned long siar;
> - unsigned long sier1;
> - unsigned long sier2;
> - unsigned long sier3;
> - unsigned long sdar;
> -};
> -
> -static void freeze_pmu(unsigned long mmcr0, unsigned long mmcra)
> -{
> - if (!(mmcr0 & MMCR0_FC))
> - goto do_freeze;
> - if (mmcra & MMCRA_SAMPLE_ENABLE)
> - goto do_freeze;
> - if (cpu_has_feature(CPU_FTR_ARCH_31)) {
> - if (!(mmcr0 & MMCR0_PMCCEXT))
> - goto do_freeze;
> - if (!(mmcra & MMCRA_BHRB_DISABLE))
> - goto do_freeze;
> - }
> - return;
> -
> -do_freeze:
> - mmcr0 = MMCR0_FC;
> - mmcra = 0;
> - if (cpu_has_feature(CPU_FTR_ARCH_31)) {
> - mmcr0 |= MMCR0_PMCCEXT;
> - mmcra = MMCRA_BHRB_DISABLE;
> - }
> -
> - mtspr(SPRN_MMCR0, mmcr0);
> - mtspr(SPRN_MMCRA, mmcra);
> - isync();
> -}
> -
> -static void switch_pmu_to_guest(struct kvm_vcpu *vcpu,
> - struct p9_host_os_sprs *host_os_sprs)
> -{
> - struct lppaca *lp;
> - int load_pmu = 1;
> -
> - lp = vcpu->arch.vpa.pinned_addr;
> - if (lp)
> - load_pmu = lp->pmcregs_in_use;
> -
> - if (load_pmu)
> -   vcpu->arch.hfscr |= HFSCR_PM;
> -
> - /* Save host */
> - if (ppc_get_pmu_inuse()) {
> - /*
> -  * It might be better to put PMU handling (at least for the
> -  * host) in the perf subsystem because it knows more about what
> -  * is being used.
> -  */
> -
> - /* POWER9, POWER10 do not implement HPMC or SPMC */
> -
> - host_os_sprs->mmcr0 = mfspr(SPRN_MMCR0);
> - host_os_sprs->mmcra = mfspr(SPRN_MMCRA);
> -
> - freeze_pmu(host_os_sprs->mmcr0, host_os_sprs->mmcra);
> -
> - host_os_sprs->pmc1 = mfspr(SPRN_PMC1);
> - host_os_sprs->pmc2 = mfspr(SPRN_PMC2);
> - host_os_sprs->pmc3 = mfspr(SPRN_PMC3);
> - host_os_sprs->pmc4 = mfspr(SPRN_PMC4);
> - host_os_sprs->pmc5 = mfspr(SPRN_PMC5);
> - host_os_sprs->pmc6 = mfspr(SPRN_PMC6);
> - host_os_sprs->mmcr1 = mfspr(SPRN_MMCR1);
> - host_os_sprs->mmcr2 = mfspr(SPRN_MMCR2);
> - host_os_sprs->sdar = mfspr(SPRN_SDAR);
> - host_os_sprs->siar = mfspr(SPRN_SIAR);
> - host_os_sprs->sier1 = mfspr(SPRN_SIER);
> -
> - if (cpu_has_feature(CPU_FTR_ARCH_31)) {
> - host_os_sprs->mmcr3 = mfspr(SPRN_MMCR3);
> - host_os_sprs->sier2 = mfspr(SPRN_SIER2);
> - host_os_sprs->sier3 = mfspr(SPRN_SIER3);
> - }
> - }
> -
> -#ifdef CONFIG_PPC_PSERIES
> - if (kvmhv_on_pseries()) {
> - if (vcpu->arch.vpa.pinned_addr) {
> - struct lppaca *lp = vcpu->arch.vpa.pinned_addr;
> - get_lppaca()->pmcregs_in_use = lp->pmcregs_in_use;
> - } else {
> - get_lppaca()->pmcregs_in_use = 1;
> - }
> - }
> -#endif
> -
> - /* Load guest */
> - if (vcpu->arch.hfscr & HFSCR_PM) {
> - mtspr(SPRN_PMC1, vcpu->arch.pmc[0]);
> - mtspr(SPRN_PMC2, 

[PATCH] powerpc/perf: Fix cycles/instructions as PM_CYC/PM_INST_CMPL in power10

2021-07-07 Thread Athira Rajeev
From: Athira Rajeev 

Power10 performance monitoring unit (PMU) driver uses performance
monitor counter 5 (PMC5) and performance monitor counter 6 (PMC6)
for counting instructions and cycles. Event used for cycles is
PM_RUN_CYC and instructions is PM_RUN_INST_CMPL. But counting of these
events in wait state is controlled by the CC56RUN bit setting in
Monitor Mode Control Register0 (MMCR0). If the CC56RUN bit is not
set, PMC5/6 will not count when CTRL[RUN] is zero.

Patch sets the CC56RUN bit in MMCR0 for power10 which makes PMC5 and
PMC6 count instructions and cycles regardless of the run bit. With this
change, these events are also now renamed to PM_CYC and PM_INST_CMPL
rather than PM_RUN_CYC and PM_RUN_INST_CMPL.

Fixes: a64e697cef23 ("powerpc/perf: power10 Performance Monitoring support")
Signed-off-by: Athira Rajeev 
Reviewed-by: Madhavan Srinivasan 
---
Notes on testing done for this change:
 Tested this patch change with a kernel module that
 turns off and turns on the runlatch. kernel module also
 reads the counter values for PMC5 and PMC6 during the
 period when runlatch is off.
 - Started PMU counters via "perf stat" and loaded the
   test module.
 - Checked the counter values captured from module during
   the runlatch off period.
 - Verified that counters were frozen without the patch and
   with the patch, observed counters were incrementing.

 arch/powerpc/perf/power10-events-list.h |  8 +++---
 arch/powerpc/perf/power10-pmu.c | 44 +++--
 2 files changed, 35 insertions(+), 17 deletions(-)

diff --git a/arch/powerpc/perf/power10-events-list.h 
b/arch/powerpc/perf/power10-events-list.h
index 93be719..564f1409 100644
--- a/arch/powerpc/perf/power10-events-list.h
+++ b/arch/powerpc/perf/power10-events-list.h
@@ -9,10 +9,10 @@
 /*
  * Power10 event codes.
  */
-EVENT(PM_RUN_CYC,  0x600f4);
+EVENT(PM_CYC,  0x600f4);
 EVENT(PM_DISP_STALL_CYC,   0x100f8);
 EVENT(PM_EXEC_STALL,   0x30008);
-EVENT(PM_RUN_INST_CMPL,0x500fa);
+EVENT(PM_INST_CMPL,0x500fa);
 EVENT(PM_BR_CMPL,   0x4d05e);
 EVENT(PM_BR_MPRED_CMPL, 0x400f6);
 EVENT(PM_BR_FIN,   0x2f04a);
@@ -50,8 +50,8 @@
 /* ITLB Reloaded */
 EVENT(PM_ITLB_MISS,0x400fc);
 
-EVENT(PM_RUN_CYC_ALT,  0x0001e);
-EVENT(PM_RUN_INST_CMPL_ALT,0x2);
+EVENT(PM_CYC_ALT,  0x0001e);
+EVENT(PM_INST_CMPL_ALT,0x2);
 
 /*
  * Memory Access Events
diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c
index f9d64c6..9dd75f3 100644
--- a/arch/powerpc/perf/power10-pmu.c
+++ b/arch/powerpc/perf/power10-pmu.c
@@ -91,8 +91,8 @@
 
 /* Table of alternatives, sorted by column 0 */
 static const unsigned int power10_event_alternatives[][MAX_ALT] = {
-   { PM_RUN_CYC_ALT,   PM_RUN_CYC },
-   { PM_RUN_INST_CMPL_ALT, PM_RUN_INST_CMPL },
+   { PM_CYC_ALT,   PM_CYC },
+   { PM_INST_CMPL_ALT, PM_INST_CMPL },
 };
 
 static int power10_get_alternatives(u64 event, unsigned int flags, u64 alt[])
@@ -118,8 +118,8 @@ static int power10_check_attr_config(struct perf_event *ev)
return 0;
 }
 
-GENERIC_EVENT_ATTR(cpu-cycles, PM_RUN_CYC);
-GENERIC_EVENT_ATTR(instructions,   PM_RUN_INST_CMPL);
+GENERIC_EVENT_ATTR(cpu-cycles, PM_CYC);
+GENERIC_EVENT_ATTR(instructions,   PM_INST_CMPL);
 GENERIC_EVENT_ATTR(branch-instructions,PM_BR_CMPL);
 GENERIC_EVENT_ATTR(branch-misses,  PM_BR_MPRED_CMPL);
 GENERIC_EVENT_ATTR(cache-references,   PM_LD_REF_L1);
@@ -148,8 +148,8 @@ static int power10_check_attr_config(struct perf_event *ev)
 CACHE_EVENT_ATTR(iTLB-load-misses, PM_ITLB_MISS);
 
 static struct attribute *power10_events_attr_dd1[] = {
-   GENERIC_EVENT_PTR(PM_RUN_CYC),
-   GENERIC_EVENT_PTR(PM_RUN_INST_CMPL),
+   GENERIC_EVENT_PTR(PM_CYC),
+   GENERIC_EVENT_PTR(PM_INST_CMPL),
GENERIC_EVENT_PTR(PM_BR_CMPL),
GENERIC_EVENT_PTR(PM_BR_MPRED_CMPL),
GENERIC_EVENT_PTR(PM_LD_REF_L1),
@@ -173,8 +173,8 @@ static int power10_check_attr_config(struct perf_event *ev)
 };
 
 static struct attribute *power10_events_attr[] = {
-   GENERIC_EVENT_PTR(PM_RUN_CYC),
-   GENERIC_EVENT_PTR(PM_RUN_INST_CMPL),
+   GENERIC_EVENT_PTR(PM_CYC),
+   GENERIC_EVENT_PTR(PM_INST_CMPL),
GENERIC_EVENT_PTR(PM_BR_FIN),
GENERIC_EVENT_PTR(PM_MPRED_BR_FIN),
GENERIC_EVENT_PTR(PM_LD_REF_L1),
@@ -271,8 +271,8 @@ static int power10_check_attr_config(struct perf_event *ev)
 };
 
 static int power10_generic_events_dd1[] = {
-   [PERF

[PATCH 0/2] powerpc/perf: Add instruction and data address registers to extended regs

2021-06-20 Thread Athira Rajeev
Patch set adds PMU registers namely Sampled Instruction Address Register
(SIAR) and Sampled Data Address Register (SDAR) as part of extended regs
in PowerPC. These registers provides the instruction/data address and
adding these to extended regs helps in debug purposes.

Patch 1/2 adds SIAR and SDAR as part of the extended regs mask.
Patch 2/2 includes perf tools side changes to add the SPRs to
sample_reg_mask to use with -I? option.

Athira Rajeev (2):
  powerpc/perf: Expose instruction and data address registers as part of
extended regs
  tools/perf: Add perf tools support to expose instruction and data
address registers as part of extended regs

 arch/powerpc/include/uapi/asm/perf_regs.h   | 12 +++-
 arch/powerpc/perf/perf_regs.c   |  4 
 tools/arch/powerpc/include/uapi/asm/perf_regs.h | 12 +++-
 tools/perf/arch/powerpc/include/perf_regs.h |  2 ++
 tools/perf/arch/powerpc/util/perf_regs.c|  2 ++
 5 files changed, 22 insertions(+), 10 deletions(-)

-- 
1.8.3.1



[PATCH 1/2] powerpc/perf: Expose instruction and data address registers as part of extended regs

2021-06-20 Thread Athira Rajeev
Patch adds support to include Sampled Instruction Address Register
(SIAR) and Sampled Data Address Register (SDAR) SPRs as part of extended
registers. Update the definition of PERF_REG_PMU_MASK_300/31 and
PERF_REG_EXTENDED_MAX to include these SPR's.

Signed-off-by: Athira Rajeev 
---
 arch/powerpc/include/uapi/asm/perf_regs.h | 12 +++-
 arch/powerpc/perf/perf_regs.c |  4 
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/uapi/asm/perf_regs.h 
b/arch/powerpc/include/uapi/asm/perf_regs.h
index 578b3ee..cf5eee5 100644
--- a/arch/powerpc/include/uapi/asm/perf_regs.h
+++ b/arch/powerpc/include/uapi/asm/perf_regs.h
@@ -61,6 +61,8 @@ enum perf_event_powerpc_regs {
PERF_REG_POWERPC_PMC4,
PERF_REG_POWERPC_PMC5,
PERF_REG_POWERPC_PMC6,
+   PERF_REG_POWERPC_SDAR,
+   PERF_REG_POWERPC_SIAR,
/* Max regs without the extended regs */
PERF_REG_POWERPC_MAX = PERF_REG_POWERPC_MMCRA + 1,
 };
@@ -72,16 +74,16 @@ enum perf_event_powerpc_regs {
 
 /*
  * PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_300
- * includes 9 SPRS from MMCR0 to PMC6 excluding the
+ * includes 11 SPRS from MMCR0 to SIAR excluding the
  * unsupported SPRS in PERF_EXCLUDE_REG_EXT_300.
  */
-#define PERF_REG_PMU_MASK_300   ((0xfffULL << PERF_REG_POWERPC_MMCR0) - 
PERF_EXCLUDE_REG_EXT_300)
+#define PERF_REG_PMU_MASK_300   ((0x3fffULL << PERF_REG_POWERPC_MMCR0) - 
PERF_EXCLUDE_REG_EXT_300)
 
 /*
  * PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_31
- * includes 12 SPRs from MMCR0 to PMC6.
+ * includes 14 SPRs from MMCR0 to SIAR.
  */
-#define PERF_REG_PMU_MASK_31   (0xfffULL << PERF_REG_POWERPC_MMCR0)
+#define PERF_REG_PMU_MASK_31   (0x3fffULL << PERF_REG_POWERPC_MMCR0)
 
-#define PERF_REG_EXTENDED_MAX  (PERF_REG_POWERPC_PMC6 + 1)
+#define PERF_REG_EXTENDED_MAX  (PERF_REG_POWERPC_SIAR + 1)
 #endif /* _UAPI_ASM_POWERPC_PERF_REGS_H */
diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c
index b931eed..51d31b6 100644
--- a/arch/powerpc/perf/perf_regs.c
+++ b/arch/powerpc/perf/perf_regs.c
@@ -90,7 +90,11 @@ static u64 get_ext_regs_value(int idx)
return mfspr(SPRN_SIER2);
case PERF_REG_POWERPC_SIER3:
return mfspr(SPRN_SIER3);
+   case PERF_REG_POWERPC_SDAR:
+   return mfspr(SPRN_SDAR);
 #endif
+   case PERF_REG_POWERPC_SIAR:
+   return mfspr(SPRN_SIAR);
default: return 0;
}
 }
-- 
1.8.3.1



[PATCH 2/2] tools/perf: Add perf tools support to expose instruction and data address registers as part of extended regs

2021-06-20 Thread Athira Rajeev
Patch enables presenting of Sampled Instruction Address Register (SIAR)
and Sampled Data Address Register (SDAR) SPRs as part of extended regsiters
for perf tool. Add these SPR's to sample_reg_mask in the tool side (to use
with -I? option).

Signed-off-by: Athira Rajeev 
---
 tools/arch/powerpc/include/uapi/asm/perf_regs.h | 12 +++-
 tools/perf/arch/powerpc/include/perf_regs.h |  2 ++
 tools/perf/arch/powerpc/util/perf_regs.c|  2 ++
 3 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/tools/arch/powerpc/include/uapi/asm/perf_regs.h 
b/tools/arch/powerpc/include/uapi/asm/perf_regs.h
index 578b3ee..cf5eee5 100644
--- a/tools/arch/powerpc/include/uapi/asm/perf_regs.h
+++ b/tools/arch/powerpc/include/uapi/asm/perf_regs.h
@@ -61,6 +61,8 @@ enum perf_event_powerpc_regs {
PERF_REG_POWERPC_PMC4,
PERF_REG_POWERPC_PMC5,
PERF_REG_POWERPC_PMC6,
+   PERF_REG_POWERPC_SDAR,
+   PERF_REG_POWERPC_SIAR,
/* Max regs without the extended regs */
PERF_REG_POWERPC_MAX = PERF_REG_POWERPC_MMCRA + 1,
 };
@@ -72,16 +74,16 @@ enum perf_event_powerpc_regs {
 
 /*
  * PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_300
- * includes 9 SPRS from MMCR0 to PMC6 excluding the
+ * includes 11 SPRS from MMCR0 to SIAR excluding the
  * unsupported SPRS in PERF_EXCLUDE_REG_EXT_300.
  */
-#define PERF_REG_PMU_MASK_300   ((0xfffULL << PERF_REG_POWERPC_MMCR0) - 
PERF_EXCLUDE_REG_EXT_300)
+#define PERF_REG_PMU_MASK_300   ((0x3fffULL << PERF_REG_POWERPC_MMCR0) - 
PERF_EXCLUDE_REG_EXT_300)
 
 /*
  * PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_31
- * includes 12 SPRs from MMCR0 to PMC6.
+ * includes 14 SPRs from MMCR0 to SIAR.
  */
-#define PERF_REG_PMU_MASK_31   (0xfffULL << PERF_REG_POWERPC_MMCR0)
+#define PERF_REG_PMU_MASK_31   (0x3fffULL << PERF_REG_POWERPC_MMCR0)
 
-#define PERF_REG_EXTENDED_MAX  (PERF_REG_POWERPC_PMC6 + 1)
+#define PERF_REG_EXTENDED_MAX  (PERF_REG_POWERPC_SIAR + 1)
 #endif /* _UAPI_ASM_POWERPC_PERF_REGS_H */
diff --git a/tools/perf/arch/powerpc/include/perf_regs.h 
b/tools/perf/arch/powerpc/include/perf_regs.h
index 04e5dc0..93339d1 100644
--- a/tools/perf/arch/powerpc/include/perf_regs.h
+++ b/tools/perf/arch/powerpc/include/perf_regs.h
@@ -77,6 +77,8 @@
[PERF_REG_POWERPC_PMC4] = "pmc4",
[PERF_REG_POWERPC_PMC5] = "pmc5",
[PERF_REG_POWERPC_PMC6] = "pmc6",
+   [PERF_REG_POWERPC_SDAR] = "sdar",
+   [PERF_REG_POWERPC_SIAR] = "siar",
 };
 
 static inline const char *__perf_reg_name(int id)
diff --git a/tools/perf/arch/powerpc/util/perf_regs.c 
b/tools/perf/arch/powerpc/util/perf_regs.c
index 8116a25..8d07a78 100644
--- a/tools/perf/arch/powerpc/util/perf_regs.c
+++ b/tools/perf/arch/powerpc/util/perf_regs.c
@@ -74,6 +74,8 @@
SMPL_REG(pmc4, PERF_REG_POWERPC_PMC4),
SMPL_REG(pmc5, PERF_REG_POWERPC_PMC5),
SMPL_REG(pmc6, PERF_REG_POWERPC_PMC6),
+   SMPL_REG(sdar, PERF_REG_POWERPC_SDAR),
+   SMPL_REG(siar, PERF_REG_POWERPC_SIAR),
SMPL_REG_END
 };
 
-- 
1.8.3.1



[PATCH] powerpc/perf: Fix crash with 'perf_instruction_pointer' when pmu is not set

2021-06-17 Thread Athira Rajeev
On systems without any specific PMU driver support registered, running
perf record causes Oops.

The relevant portion from call trace:

BUG: Kernel NULL pointer dereference on read at 0x0040
Faulting instruction address: 0xc0021f0c
Oops: Kernel access of bad area, sig: 11 [#1]
BE PAGE_SIZE=4K PREEMPT CMPCPRO
SAF3000 DIE NOTIFICATION
CPU: 0 PID: 442 Comm: null_syscall Not tainted 
5.13.0-rc6-s3k-dev-01645-g7649ee3d2957 #5164
NIP:  c0021f0c LR: c00e8ad8 CTR: c00d8a5c
NIP [c0021f0c] perf_instruction_pointer+0x10/0x60
LR [c00e8ad8] perf_prepare_sample+0x344/0x674
Call Trace:
[e6775880] [c00e8810] perf_prepare_sample+0x7c/0x674 (unreliable)
[e67758c0] [c00e8e44] perf_event_output_forward+0x3c/0x94
[e6775910] [c00dea8c] __perf_event_overflow+0x74/0x14c
[e6775930] [c00dec5c] perf_swevent_hrtimer+0xf8/0x170
[e6775a40] [c008c8d0] __hrtimer_run_queues.constprop.0+0x160/0x318
[e6775a90] [c008d94c] hrtimer_interrupt+0x148/0x3b0
[e6775ae0] [c000c0c0] timer_interrupt+0xc4/0x22c
[e6775b10] [c00046f0] Decrementer_virt+0xb8/0xbc

During perf record session, perf_instruction_pointer() is called to
capture the sample ip. This function in core-book3s accesses ppmu->flags.
If a platform specific PMU driver is not registered, ppmu is set to NULL
and accessing its members results in a crash. Fix this crash by checking
if ppmu is set.

Fixes: 2ca13a4cc56c ("powerpc/perf: Use regs->nip when SIAR is zero")
[ Including stable for kernel versions 5.11 and 5.12 ]
Cc: sta...@vger.kernel.org
Signed-off-by: Athira Rajeev 
Reported-by: Christophe Leroy 
Tested-by: Christophe Leroy 
---
 arch/powerpc/perf/core-book3s.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 16d4d1b..5162241 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -2254,7 +2254,7 @@ unsigned long perf_instruction_pointer(struct pt_regs 
*regs)
bool use_siar = regs_use_siar(regs);
unsigned long siar = mfspr(SPRN_SIAR);
 
-   if (ppmu->flags & PPMU_P10_DD1) {
+   if (ppmu && (ppmu->flags & PPMU_P10_DD1)) {
if (siar)
return siar;
else
-- 
1.8.3.1



Re: Oops (NULL pointer) with 'perf record' of selftest 'null_syscall'

2021-06-17 Thread Athira Rajeev



> On 17-Jun-2021, at 10:05 PM, Christophe Leroy  
> wrote:
> 
> 
> 
> Le 17/06/2021 à 08:36, Athira Rajeev a écrit :
>>> On 16-Jun-2021, at 11:56 AM, Christophe Leroy  
>>> wrote:
>>> 
>>> 
>>> 
>>> Le 16/06/2021 à 05:40, Athira Rajeev a écrit :
>>>>> On 16-Jun-2021, at 8:53 AM, Madhavan Srinivasan  
>>>>> wrote:
>>>>> 
>>>>> 
>>>>> On 6/15/21 8:35 PM, Christophe Leroy wrote:
>>>>>> For your information, I'm getting the following Oops. Detected with 
>>>>>> 5.13-rc6, it also oopses on 5.12 and 5.11.
>>>>>> Runs ok on 5.10. I'm starting bisecting now.
>>>>> 
>>>>> 
>>>>> Thanks for reporting, got the issue. What has happened in this case is 
>>>>> that, pmu device is not registered
>>>>> and trying to access the instruction point which will land in 
>>>>> perf_instruction_pointer(). And recently I have added
>>>>> a workaround patch for power10 DD1 which has caused this breakage. My 
>>>>> bad. We are working on a fix patch
>>>>> for the same and will post it out. Sorry again.
>>>>> 
>>>> Hi Christophe,
>>>> Can you please try with below patch in your environment and test if it 
>>>> works for you.
>>>> From 55d3afc9369dfbe28a7152c8e9f856c11c7fe43d Mon Sep 17 00:00:00 2001
>>>> From: Athira Rajeev 
>>>> Date: Tue, 15 Jun 2021 22:28:11 -0400
>>>> Subject: [PATCH] powerpc/perf: Fix crash with 'perf_instruction_pointer' 
>>>> when
>>>> pmu is not set
>>>> On systems without any specific PMU driver support registered, running
>>>> perf record causes oops:
>>>> [   38.841073] NIP [c013af54] perf_instruction_pointer+0x24/0x100
>>>> [   38.841079] LR [c03c7358] perf_prepare_sample+0x4e8/0x820
>>>> [   38.841085] --- interrupt: 300
>>>> [   38.841088] [c0001cf03440] [c03c6ef8] 
>>>> perf_prepare_sample+0x88/0x820 (unreliable)
>>>> [   38.841096] [c0001cf034a0] [c03c76d0] 
>>>> perf_event_output_forward+0x40/0xc0
>>>> [   38.841104] [c0001cf03520] [c03b45e8] 
>>>> __perf_event_overflow+0x88/0x1b0
>>>> [   38.841112] [c0001cf03570] [c03b480c] 
>>>> perf_swevent_hrtimer+0xfc/0x1a0
>>>> [   38.841119] [c0001cf03740] [c02399cc] 
>>>> __hrtimer_run_queues+0x17c/0x380
>>>> [   38.841127] [c0001cf037c0] [c023a5f8] 
>>>> hrtimer_interrupt+0x128/0x2f0
>>>> [   38.841135] [c0001cf03870] [c002962c] 
>>>> timer_interrupt+0x13c/0x370
>>>> [   38.841143i] [c0001cf038d0] [c0009ba4] 
>>>> decrementer_common_virt+0x1a4/0x1b0
>>>> [   38.841151] --- interrupt: 900 at copypage_power7+0xd4/0x1c0
>>>> During perf record session, perf_instruction_pointer() is called to
>>>> capture the sample ip. This function in core-book3s accesses ppmu->flags.
>>>> If a platform specific PMU driver is not registered, ppmu is set to NULL
>>>> and accessing its members results in a crash. Fix this crash by checking
>>>> if ppmu is set.
>>>> Signed-off-by: Athira Rajeev 
>>>> Reported-by: Christophe Leroy 
>>> 
>>> Fixes: 2ca13a4cc56c ("powerpc/perf: Use regs->nip when SIAR is zero")
>>> Cc: sta...@vger.kernel.org
>>> Tested-by: Christophe Leroy 
>> Hi Christophe,
>> Thanks for testing with the change. I have a newer version where I have 
>> added braces around the check.
>> Can you please check once and can I add your tested-by for the below patch.
> 
> Yes it works, you can add my Tested-by:
> Please also add Cc: sta...@vger.kernel.org, this needs to be backported as 
> soon as possible.

Sure Christophe, will add Cc also. Thanks for testing.

Athira
> 
> Thanks
> Christophe



Re: Oops (NULL pointer) with 'perf record' of selftest 'null_syscall'

2021-06-17 Thread Athira Rajeev
On 16-Jun-2021, at 11:56 AM, Christophe Leroy  wrote:Le 16/06/2021 à 05:40, Athira Rajeev a écrit :On 16-Jun-2021, at 8:53 AM, Madhavan Srinivasan  wrote:On 6/15/21 8:35 PM, Christophe Leroy wrote:For your information, I'm getting the following Oops. Detected with 5.13-rc6, it also oopses on 5.12 and 5.11.Runs ok on 5.10. I'm starting bisecting now.Thanks for reporting, got the issue. What has happened in this case is that, pmu device is not registeredand trying to access the instruction point which will land in perf_instruction_pointer(). And recently I have addeda workaround patch for power10 DD1 which has caused this breakage. My bad. We are working on a fix patchfor the same and will post it out. Sorry again.Hi Christophe,Can you please try with below patch in your environment and test if it works for you.From 55d3afc9369dfbe28a7152c8e9f856c11c7fe43d Mon Sep 17 00:00:00 2001From: Athira Rajeev Date: Tue, 15 Jun 2021 22:28:11 -0400Subject: [PATCH] powerpc/perf: Fix crash with 'perf_instruction_pointer' when pmu is not setOn systems without any specific PMU driver support registered, runningperf record causes oops:[   38.841073] NIP [c013af54] perf_instruction_pointer+0x24/0x100[   38.841079] LR [c03c7358] perf_prepare_sample+0x4e8/0x820[   38.841085] --- interrupt: 300[   38.841088] [c0001cf03440] [c03c6ef8] perf_prepare_sample+0x88/0x820 (unreliable)[   38.841096] [c0001cf034a0] [c03c76d0] perf_event_output_forward+0x40/0xc0[   38.841104] [c0001cf03520] [c03b45e8] __perf_event_overflow+0x88/0x1b0[   38.841112] [c0001cf03570] [c03b480c] perf_swevent_hrtimer+0xfc/0x1a0[   38.841119] [c0001cf03740] [c02399cc] __hrtimer_run_queues+0x17c/0x380[   38.841127] [c0001cf037c0] [c023a5f8] hrtimer_interrupt+0x128/0x2f0[   38.841135] [c0001cf03870] [c002962c] timer_interrupt+0x13c/0x370[   38.841143i] [c0001cf038d0] [c0009ba4] decrementer_common_virt+0x1a4/0x1b0[   38.841151] --- interrupt: 900 at copypage_power7+0xd4/0x1c0During perf record session, perf_instruction_pointer() is called tocapture the sample ip. This function in core-book3s accesses ppmu->flags.If a platform specific PMU driver is not registered, ppmu is set to NULLand accessing its members results in a crash. Fix this crash by checkingif ppmu is set.Signed-off-by: Athira Rajeev Reported-by: Christophe Leroy Fixes: 2ca13a4cc56c ("powerpc/perf: Use regs->nip when SIAR is zero")Cc: sta...@vger.kernel.orgTested-by: Christophe Leroy Hi Christophe,Thanks for testing with the change. I have a newer version where I have added braces around the check.Can you please check once and can I add your tested-by for the below patch.From 621cd0449c8503a016c0b1ae63639061aa5134a8 Mon Sep 17 00:00:00 2001From: Athira Rajeev Date: Tue, 15 Jun 2021 22:28:11 -0400Subject: [PATCH] powerpc/perf: Fix crash with 'perf_instruction_pointer' when pmu is not setOn systems without any specific PMU driver support registered, runningperf record causes Oops.The relevant portion from call trace:BUG: Kernel NULL pointer dereference on read at 0x0040Faulting instruction address: 0xc0021f0cOops: Kernel access of bad area, sig: 11 [#1]BE PAGE_SIZE=4K PREEMPT CMPCPROSAF3000 DIE NOTIFICATIONCPU: 0 PID: 442 Comm: null_syscall Not tainted 5.13.0-rc6-s3k-dev-01645-g7649ee3d2957 #5164NIP:  c0021f0c LR: c00e8ad8 CTR: c00d8a5cNIP [c0021f0c] perf_instruction_pointer+0x10/0x60LR [c00e8ad8] perf_prepare_sample+0x344/0x674Call Trace:[e6775880] [c00e8810] perf_prepare_sample+0x7c/0x674 (unreliable)[e67758c0] [c00e8e44] perf_event_output_forward+0x3c/0x94[e6775910] [c00dea8c] __perf_event_overflow+0x74/0x14c[e6775930] [c00dec5c] perf_swevent_hrtimer+0xf8/0x170[e6775a40] [c008c8d0] __hrtimer_run_queues.constprop.0+0x160/0x318[e6775a90] [c008d94c] hrtimer_interrupt+0x148/0x3b0[e6775ae0] [c000c0c0] timer_interrupt+0xc4/0x22c[e6775b10] [c00046f0] Decrementer_virt+0xb8/0xbcDuring perf record session, perf_instruction_pointer() is called tocapture the sample ip. This function in core-book3s accesses ppmu->flags.If a platform specific PMU driver is not registered, ppmu is set to NULLand accessing its members results in a crash. Fix this crash by checkingif ppmu is set.Fixes: 2ca13a4cc56c ("powerpc/perf: Use regs->nip when SIAR is zero")Signed-off-by: Athira Rajeev Reported-by: Christophe Leroy --- arch/powerpc/perf/core-book3s.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-)diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.cindex 16d4d1b..5162241 100644--- a/arch/powerpc/perf/core-book3s.c+++ b/arch/powerpc/perf/core-book3s.c@@ -2254,7 +2254,7 @@ unsigned long perf_instruction_pointer(struct pt_regs *regs) 	bool use_siar = regs_use_siar(regs); 	unsigned long siar = mfspr(SPRN_SIAR);-	if (ppmu->flags & PPMU_P10_DD1) {+	if (ppmu && (ppmu->flags & PPMU_P10_DD1)) { 		if (siar) 			return siar; 		else-- 1.8.3.1Th

Re: Oops (NULL pointer) with 'perf record' of selftest 'null_syscall'

2021-06-15 Thread Athira Rajeev



> On 16-Jun-2021, at 8:53 AM, Madhavan Srinivasan  wrote:
> 
> 
> On 6/15/21 8:35 PM, Christophe Leroy wrote:
>> For your information, I'm getting the following Oops. Detected with 
>> 5.13-rc6, it also oopses on 5.12 and 5.11.
>> Runs ok on 5.10. I'm starting bisecting now.
> 
> 
> Thanks for reporting, got the issue. What has happened in this case is that, 
> pmu device is not registered
> and trying to access the instruction point which will land in 
> perf_instruction_pointer(). And recently I have added
> a workaround patch for power10 DD1 which has caused this breakage. My bad. We 
> are working on a fix patch
> for the same and will post it out. Sorry again.
> 

Hi Christophe,

Can you please try with below patch in your environment and test if it works 
for you.

From 55d3afc9369dfbe28a7152c8e9f856c11c7fe43d Mon Sep 17 00:00:00 2001
From: Athira Rajeev 
Date: Tue, 15 Jun 2021 22:28:11 -0400
Subject: [PATCH] powerpc/perf: Fix crash with 'perf_instruction_pointer' when
 pmu is not set

On systems without any specific PMU driver support registered, running
perf record causes oops:

[   38.841073] NIP [c013af54] perf_instruction_pointer+0x24/0x100
[   38.841079] LR [c03c7358] perf_prepare_sample+0x4e8/0x820
[   38.841085] --- interrupt: 300
[   38.841088] [c0001cf03440] [c03c6ef8] 
perf_prepare_sample+0x88/0x820 (unreliable)
[   38.841096] [c0001cf034a0] [c03c76d0] 
perf_event_output_forward+0x40/0xc0
[   38.841104] [c0001cf03520] [c03b45e8] 
__perf_event_overflow+0x88/0x1b0
[   38.841112] [c0001cf03570] [c03b480c] 
perf_swevent_hrtimer+0xfc/0x1a0
[   38.841119] [c0001cf03740] [c02399cc] 
__hrtimer_run_queues+0x17c/0x380
[   38.841127] [c0001cf037c0] [c023a5f8] 
hrtimer_interrupt+0x128/0x2f0
[   38.841135] [c0001cf03870] [c002962c] timer_interrupt+0x13c/0x370
[   38.841143i] [c0001cf038d0] [c0009ba4] 
decrementer_common_virt+0x1a4/0x1b0
[   38.841151] --- interrupt: 900 at copypage_power7+0xd4/0x1c0

During perf record session, perf_instruction_pointer() is called to
capture the sample ip. This function in core-book3s accesses ppmu->flags.
If a platform specific PMU driver is not registered, ppmu is set to NULL
and accessing its members results in a crash. Fix this crash by checking
if ppmu is set.

Signed-off-by: Athira Rajeev 
Reported-by: Christophe Leroy 
---
 arch/powerpc/perf/core-book3s.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 16d4d1b6a1ff..816756588cb7 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -2254,7 +2254,7 @@ unsigned long perf_instruction_pointer(struct pt_regs 
*regs)
bool use_siar = regs_use_siar(regs);
unsigned long siar = mfspr(SPRN_SIAR);
 
-   if (ppmu->flags & PPMU_P10_DD1) {
+   if (ppmu && ppmu->flags & PPMU_P10_DD1) {
if (siar)
return siar;
else
-- 
2.27.0


Thanks
Athira

> Maddy
> 
> 
>> 
>> root@vgoippro:/tmp# perf record /root/null_syscall
>> [  285.559987] BUG: Kernel NULL pointer dereference on read at 0x0040
>> [  285.566533] Faulting instruction address: 0xc0021f0c
>> [  285.571486] Oops: Kernel access of bad area, sig: 11 [#1]
>> [  285.576872] BE PAGE_SIZE=4K PREEMPT CMPCPRO
>> [  285.581080] SAF3000 DIE NOTIFICATION
>> [  285.584661] CPU: 0 PID: 442 Comm: null_syscall Not tainted 
>> 5.13.0-rc6-s3k-dev-01645-g7649ee3d2957 #5164
>> [  285.594035] NIP:  c0021f0c LR: c00e8ad8 CTR: c00d8a5c
>> [  285.599074] REGS: e67757d0 TRAP: 0300   Not tainted 
>> (5.13.0-rc6-s3k-dev-01645-g7649ee3d2957)
>> [  285.607576] MSR:  1032   CR: 44775b18 XER: 2000
>> [  285.614063] DAR: 0040 DSISR: 2000
>> [  285.614063] GPR00: c00e8810 e6775880 c1c52640 e6775b20 7cb36ae0 f028 
>> 43ebeedc 5ccc47d0
>> [  285.614063] GPR08:  0900 e6775b20 0001  1025b2c0 
>> 10013088 10012ee0
>> [  285.614063] GPR16: b000 0007 0001 c00deb64 0042 0001 
>> 78db7b23 c0b13200
>> [  285.614063] GPR24:    e6775b20 c13b8560 0107 
>> e6775940 e67758e8
>> [  285.651693] NIP [c0021f0c] perf_instruction_pointer+0x10/0x60
>> [  285.657460] LR [c00e8ad8] perf_prepare_sample+0x344/0x674
>> [  285.662859] Call Trace:
>> [  285.665301] [e6775880] [c00e8810] perf_prepare_sample+0x7c/0x674 
>> (unreliable)
>> [  285.672452] [e67758c0] [c00e8e44] perf_event_output_forward+0x3c/0x94
>> [  285.678903] [e6775910] [c00dea8c] __perf_event_overflow+0x74/0x14c
>> [  285.685108] [e6775930] [c00dec5c] perf_swevent_hrtimer+0xf8/0x170
>>

[PATCH V3 2/2] selftests/powerpc: EBB selftest for MMCR0 control for PMU SPRs in ISA v3.1

2021-05-25 Thread Athira Rajeev
With the MMCR0 control bit (PMCCEXT) in ISA v3.1, read access to
group B registers is restricted when MMCR0 PMCC=0b00. In other
platforms (like power9), the older behaviour works where group B
PMU SPRs are readable.

Patch creates a selftest which verifies that the test takes a
SIGILL when attempting to read PMU registers via helper function
"dump_ebb_state" for ISA v3.1.

Signed-off-by: Athira Rajeev 
---
 tools/testing/selftests/powerpc/pmu/ebb/Makefile   |  2 +-
 .../powerpc/pmu/ebb/regs_access_pmccext_test.c | 63 ++
 2 files changed, 64 insertions(+), 1 deletion(-)
 create mode 100644 
tools/testing/selftests/powerpc/pmu/ebb/regs_access_pmccext_test.c

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/Makefile 
b/tools/testing/selftests/powerpc/pmu/ebb/Makefile
index c5ecb46..0101606 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/ebb/Makefile
@@ -24,7 +24,7 @@ TEST_GEN_PROGS := reg_access_test event_attributes_test 
cycles_test   \
 fork_cleanup_test ebb_on_child_test\
 ebb_on_willing_child_test back_to_back_ebbs_test   \
 lost_exception_test no_handler_test\
-cycles_with_mmcr2_test
+cycles_with_mmcr2_test regs_access_pmccext_test
 
 top_srcdir = ../../../../../..
 include ../../../lib.mk
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/regs_access_pmccext_test.c 
b/tools/testing/selftests/powerpc/pmu/ebb/regs_access_pmccext_test.c
new file mode 100644
index 000..1eda8e9
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/regs_access_pmccext_test.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2021, Athira Rajeev, IBM Corp.
+ */
+
+#include 
+#include 
+#include 
+#include 
+
+#include "ebb.h"
+
+
+/*
+ * Test that closing the EBB event clears MMCR0_PMCC and
+ * sets MMCR0_PMCCEXT preventing further read access to the
+ * group B PMU registers.
+ */
+
+static int regs_access_pmccext(void)
+{
+   struct event event;
+
+   SKIP_IF(!ebb_is_supported());
+
+   event_init_named(, 0x1001e, "cycles");
+   event_leader_ebb_init();
+
+   FAIL_IF(event_open());
+
+   ebb_enable_pmc_counting(1);
+   setup_ebb_handler(standard_ebb_callee);
+   ebb_global_enable();
+   FAIL_IF(ebb_event_enable());
+
+   mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+
+   while (ebb_state.stats.ebb_count < 1)
+   FAIL_IF(core_busy_loop());
+
+   ebb_global_disable();
+   event_close();
+
+   FAIL_IF(ebb_state.stats.ebb_count == 0);
+
+   /*
+* For ISA v3.1, verify the test takes a SIGILL when reading
+* PMU regs after the event is closed. With the control bit
+* in MMCR0 (PMCCEXT) restricting access to group B PMU regs,
+* sigill is expected.
+*/
+   if (have_hwcap2(PPC_FEATURE2_ARCH_3_1))
+   FAIL_IF(catch_sigill(dump_ebb_state));
+   else
+   dump_ebb_state();
+
+   return 0;
+}
+
+int main(void)
+{
+   return test_harness(regs_access_pmccext, "regs_access_pmccext");
+}
-- 
1.8.3.1



[PATCH V3 1/2] selftests/powerpc: Fix "no_handler" EBB selftest

2021-05-25 Thread Athira Rajeev
The "no_handler_test" in ebb selftests attempts to read the PMU
registers twice via helper function "dump_ebb_state". First dump is
just before closing of event and the second invocation is done after
closing of the event. The original intention of second
dump_ebb_state was to dump the state of registers at the end of
the test when the counters are frozen. But this will be achieved
with the first call itself since sample period is set to low value
and PMU will be frozen by then. Hence patch removes the
dump which was done before closing of the event.

Signed-off-by: Athira Rajeev 
Reported-by: Shirisha Ganta 
---
 tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c 
b/tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c
index fc5bf48..01e827c 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c
@@ -50,8 +50,6 @@ static int no_handler_test(void)
 
event_close();
 
-   dump_ebb_state();
-
/* The real test is that we never took an EBB at 0x0 */
 
return 0;
-- 
1.8.3.1



[PATCH V3 0/2] selftests/powerpc: Updates to EBB selftest for ISA v3.1

2021-05-25 Thread Athira Rajeev
The "no_handler_test" in ebb selftests attempts to read the PMU
registers after closing of the event via helper function
"dump_ebb_state". With the MMCR0 control bit (PMCCEXT) in ISA v3.1,
read access to group B registers is restricted when MMCR0 PMCC=0b00.
Hence the call to dump_ebb_state after closing of event will generate
a SIGILL, which is expected.

Test has below in logs:
<<>>
!! child died by signal 4
failure: no_handler_test
<<>>

In other platforms (like power9), the older behaviour works where
group B PMU SPRs are readable. The "dump_ebb_state" is called twice
in the test. The second call after closing of event was done inorder
to dump state of registers when the counters are frozen. But since
the counters should already be frozen by the time first dump is done,
patch1 drops the second call to "dump_ebb_state".

To address the new sigill behaviour in ISA v3.1, patch2 creates
a separate selftest.

Changelog:
v2 -> v3:
Fixed a space issue in patch2.

v1 -> v2:
Addressed review comments from Michael Ellerman.
First version attempted to address the SIGILL
behaviour in existing "no_handler_test" test itself.
As per mpe's suggestion, moved that to a separate test
and removed the second call to "dump_ebb_state" since
that is actually not needed.

Athira Rajeev (2):
  selftests/powerpc: Fix "no_handler" EBB selftest
  selftests/powerpc: EBB selftest for MMCR0 control for PMU SPRs in ISA
v3.1

 tools/testing/selftests/powerpc/pmu/ebb/Makefile   |  2 +-
 .../selftests/powerpc/pmu/ebb/no_handler_test.c|  2 -
 .../powerpc/pmu/ebb/regs_access_pmccext_test.c | 63 ++
 3 files changed, 64 insertions(+), 3 deletions(-)
 create mode 100644 
tools/testing/selftests/powerpc/pmu/ebb/regs_access_pmccext_test.c

-- 
1.8.3.1



[V2 2/2] selftests/powerpc: EBB selftest for MMCR0 control for PMU SPRs in ISA v3.1

2021-05-25 Thread Athira Rajeev
With the MMCR0 control bit (PMCCEXT) in ISA v3.1, read access to
group B registers is restricted when MMCR0 PMCC=0b00. In other
platforms (like power9), the older behaviour works where group B
PMU SPRs are readable.

Patch creates a selftest which verifies that the test takes a
SIGILL when attempting to read PMU registers via helper function
"dump_ebb_state" for ISA v3.1.

Signed-off-by: Athira Rajeev 
---
 tools/testing/selftests/powerpc/pmu/ebb/Makefile   |  2 +-
 .../powerpc/pmu/ebb/regs_access_pmccext_test.c | 63 ++
 2 files changed, 64 insertions(+), 1 deletion(-)
 create mode 100644 
tools/testing/selftests/powerpc/pmu/ebb/regs_access_pmccext_test.c

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/Makefile 
b/tools/testing/selftests/powerpc/pmu/ebb/Makefile
index c5ecb46..0101606 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/ebb/Makefile
@@ -24,7 +24,7 @@ TEST_GEN_PROGS := reg_access_test event_attributes_test 
cycles_test   \
 fork_cleanup_test ebb_on_child_test\
 ebb_on_willing_child_test back_to_back_ebbs_test   \
 lost_exception_test no_handler_test\
-cycles_with_mmcr2_test
+cycles_with_mmcr2_test regs_access_pmccext_test
 
 top_srcdir = ../../../../../..
 include ../../../lib.mk
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/regs_access_pmccext_test.c 
b/tools/testing/selftests/powerpc/pmu/ebb/regs_access_pmccext_test.c
new file mode 100644
index 000..5f1a040
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/regs_access_pmccext_test.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2021, Athira Rajeev, IBM Corp.
+ */
+
+#include 
+#include 
+#include 
+#include 
+
+#include "ebb.h"
+
+
+/*
+ * Test that closing the EBB event clears MMCR0_PMCC and
+ * sets MMCR0_PMCCEXT preventing further read access to the
+ * group B PMU registers.
+ */
+
+static int regs_access_pmccext(void)
+{
+   struct event event;
+
+   SKIP_IF(!ebb_is_supported());
+
+   event_init_named(, 0x1001e, "cycles");
+   event_leader_ebb_init();
+
+   FAIL_IF(event_open());
+
+   ebb_enable_pmc_counting(1);
+   setup_ebb_handler(standard_ebb_callee);
+   ebb_global_enable();
+   FAIL_IF(ebb_event_enable());
+
+   mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+
+   while (ebb_state.stats.ebb_count < 1)
+   FAIL_IF(core_busy_loop());
+
+   ebb_global_disable();
+   event_close();
+
+   FAIL_IF(ebb_state.stats.ebb_count == 0);
+
+   /*
+* For ISA v3.1, verify the test takes a SIGILL when reading
+* PMU regs after the event is closed. With the control bit
+* in MMCR0 (PMCCEXT) restricting access to group B PMU regs,
+* sigill is expected.
+*/
+   if (have_hwcap2(PPC_FEATURE2_ARCH_3_1))
+   FAIL_IF(catch_sigill(dump_ebb_state));
+   else
+   dump_ebb_state();
+
+   return 0;
+}
+
+int main(void)
+{
+   return test_harness(regs_access_pmccext,"regs_access_pmccext");
+}
-- 
1.8.3.1



[V2 1/2] selftests/powerpc: Fix "no_handler" EBB selftest

2021-05-25 Thread Athira Rajeev
The "no_handler_test" in ebb selftests attempts to read the PMU
registers twice via helper function "dump_ebb_state". First dump is
just before closing of event and the second invocation is done after
closing of the event. The original intention of second
dump_ebb_state was to dump the state of registers at the end of
the test when the counters are frozen. But this will be achieved
with the first call itself since sample period is set to low value
and PMU will be frozen by then. Hence patch removes the
dump which was done before closing of the event.

Signed-off-by: Athira Rajeev 
Reported-by: Shirisha Ganta 
---
 tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c 
b/tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c
index fc5bf48..01e827c 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c
@@ -50,8 +50,6 @@ static int no_handler_test(void)
 
event_close();
 
-   dump_ebb_state();
-
/* The real test is that we never took an EBB at 0x0 */
 
return 0;
-- 
1.8.3.1



[V2 0/2] selftests/powerpc: Updates to EBB selftest for ISA v3.1

2021-05-25 Thread Athira Rajeev
The "no_handler_test" in ebb selftests attempts to read the PMU
registers after closing of the event via helper function
"dump_ebb_state". With the MMCR0 control bit (PMCCEXT) in ISA v3.1,
read access to group B registers is restricted when MMCR0 PMCC=0b00.
Hence the call to dump_ebb_state after closing of event will generate
a SIGILL, which is expected.

Test has below in logs:
<<>>
!! child died by signal 4
failure: no_handler_test
<<>>

In other platforms (like power9), the older behaviour works where
group B PMU SPRs are readable. The "dump_ebb_state" is called twice
in the test. The second call after closing of event was done inorder
to dump state of registers when the counters are frozen. But since
the counters should already be frozen by the time first dump is done,
patch1 drops the second call to "dump_ebb_state".

To address the new sigill behaviour in ISA v3.1, patch2 creates
a separate selftest.

Changelog:
v1 -> v2:
Addressed review comments from Michael Ellerman.
First version attempted to address the SIGILL
behaviour in existing "no_handler_test" test itself.
As per mpe's suggestion, moved that to a separate test
and removed the second call to "dump_ebb_state" since
that is actually not needed.

Athira Rajeev (2):
  selftests/powerpc: Fix "no_handler" EBB selftest
  selftests/powerpc: EBB selftest for MMCR0 control for PMU SPRs in ISA
v3.1

 tools/testing/selftests/powerpc/pmu/ebb/Makefile   |  2 +-
 .../selftests/powerpc/pmu/ebb/no_handler_test.c|  2 -
 .../powerpc/pmu/ebb/regs_access_pmccext_test.c | 63 ++
 3 files changed, 64 insertions(+), 3 deletions(-)
 create mode 100644 
tools/testing/selftests/powerpc/pmu/ebb/regs_access_pmccext_test.c

-- 
1.8.3.1



Re: [PATCH V2 1/1] powerpc/perf: Fix PMU callbacks to clear pending PMI before resetting an overflown PMC

2021-05-23 Thread Athira Rajeev



> On 17-May-2021, at 8:50 AM, Nicholas Piggin  wrote:
> 
> Sorry I missed this :(
> 
> Excerpts from Athira Rajeev's message of April 20, 2021 1:01 pm:
>> Running perf fuzzer showed below in dmesg logs:
>> "Can't find PMC that caused IRQ"
>> 
>> This means a PMU exception happened, but none of the PMC's (Performance
>> Monitor Counter) were found to be overflown. There are some corner cases
>> that clears the PMCs after PMI gets masked. In such cases, the perf
>> interrupt handler will not find the active PMC values that had caused
>> the overflow and thus leads to this message while replaying.
>> 
>> Case 1: PMU Interrupt happens during replay of other interrupts and
>> counter values gets cleared by PMU callbacks before replay:
>> 
>> During replay of interrupts like timer, __do_irq and doorbell exception, we
>> conditionally enable interrupts via may_hard_irq_enable(). This could
>> potentially create a window to generate a PMI. Since irq soft mask is set
>> to ALL_DISABLED, the PMI will get masked here. We could get IPIs run before
>> perf interrupt is replayed and the PMU events could deleted or stopped.
>> This will change the PMU SPR values and resets the counters. Snippet of
>> ftrace log showing PMU callbacks invoked in "__do_irq":
>> 
>> -0 [051] dns. 132025441306354: __do_irq <-call_do_irq
>> -0 [051] dns. 132025441306430: irq_enter <-__do_irq
>> -0 [051] dns. 132025441306503: irq_enter_rcu <-__do_irq
>> -0 [051] dnH. 132025441306599: xive_get_irq <-__do_irq
>> <<>>
>> -0 [051] dnH. 132025441307770: 
>> generic_smp_call_function_single_interrupt <-smp_ipi_demux_relaxed
>> -0 [051] dnH. 132025441307839: flush_smp_call_function_queue 
>> <-smp_ipi_demux_relaxed
>> -0 [051] dnH. 132025441308057: _raw_spin_lock <-event_function
>> -0 [051] dnH. 132025441308206: power_pmu_disable <-perf_pmu_disable
>> -0 [051] dnH. 132025441308337: power_pmu_del <-event_sched_out
>> -0 [051] dnH. 132025441308407: power_pmu_read <-power_pmu_del
>> -0 [051] dnH. 132025441308477: read_pmc <-power_pmu_read
>> -0 [051] dnH. 132025441308590: isa207_disable_pmc <-power_pmu_del
>> -0 [051] dnH. 132025441308663: write_pmc <-power_pmu_del
>> -0 [051] dnH. 132025441308787: power_pmu_event_idx 
>> <-perf_event_update_userpage
>> -0 [051] dnH. 132025441308859: rcu_read_unlock_strict 
>> <-perf_event_update_userpage
>> -0 [051] dnH. 132025441308975: power_pmu_enable <-perf_pmu_enable
>> <<>>
>> -0 [051] dnH. 132025441311108: irq_exit <-__do_irq
>> -0 [051] dns. 132025441311319: performance_monitor_exception 
>> <-replay_soft_interrupts
>> 
>> Case 2: PMI's masked during local_* operations, example local_add.
>> If the local_add operation happens within a local_irq_save, replay of
>> PMI will be during local_irq_restore. Similar to case 1, this could
>> also create a window before replay where PMU events gets deleted or
>> stopped.
>> 
>> Patch adds a fix to update the PMU callback functions (del,stop,enable) to
>> check for pending perf interrupt. If there is an overflown PMC and pending
>> perf interrupt indicated in Paca or by PMAO bit set in MMCR0, clear the PMI
>> bit in paca to drop that sample. Also clear the MMCR0 PMAO bit which
>> otherwise could lead to spurious interrupts in some corner cases. Example,
>> a timer after power_pmu_del which will re-enable interrupts since PMI is
>> cleared and triggers a PMI again since PMAO bit is still set. Another
>> condition occures if had disabled MSR[EE] right before perf interrupt
>> came in. Re-enabling interrupt will trigger PMI since PMAO is still set.
>> But fails to find valid overflow if PMC get cleared before enabling EE.
>> 
>> We can't just replay PMI any time. Hence this approach is preferred rather
>> than replaying PMI before resetting overflown PMC. Patch also documents
>> core-book3s on a race condition which can trigger these PMC messages during
>> idle path in PowerNV.
>> 
>> Fixes: f442d004806e ("powerpc/64s: Add support to mask perf interrupts and 
>> replay them")
>> Reported-by: Nageswara R Sastry 
>> Suggested-by: Nicholas Piggin 
> 
> I would say you can leave ^ this line out. You and Maddy did the hard 
> work of coming up with the fix, I just suggested a few minor changes.

Thanks Nick for reviewing the patch and your suggestions helped us
in the solution approach :) 

> 
>> Suggested-by: Madhavan Srinivasan 
>> Signed-off-by: Athira Rajeev 
>> ---
>> arch/powerpc/include

[PATCH] selftests/powerpc: Fix "no_handler" EBB selftest for ISA v3.1

2021-05-20 Thread Athira Rajeev
The "no_handler_test" in ebb selftests attempts to read the PMU
registers after closing of the event via helper function
"dump_ebb_state". With the MMCR0 control bit (PMCCEXT) in ISA v3.1,
read access to group B registers is restricted when MMCR0 PMCC=0b00.
Hence the call to dump_ebb_state after closing of event will generate
a SIGILL, which is expected.

Test has below in logs:

<<>>
!! child died by signal 4
failure: no_handler_test
<<>>

In other platforms (like power9), the older behaviour works where
group B PMU SPRs are readable. Patch fixes the selftest to handle
the sigill for ISA v3.1.

Signed-off-by: Athira Rajeev 
Reported-by: Shirisha Ganta 
---
 tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c | 12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c 
b/tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c
index fc5bf48..5f57a9d 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c
@@ -50,7 +50,17 @@ static int no_handler_test(void)
 
event_close();
 
-   dump_ebb_state();
+   /*
+* For ISA v3.1, verify the test takes a SIGILL when reading
+* PMU regs after the event is closed. With the control bit
+* in MMCR0 (PMCCEXT) restricting access to group B PMU regs,
+* sigill is expected.
+*/
+
+   if (have_hwcap2(PPC_FEATURE2_ARCH_3_1))
+   FAIL_IF(catch_sigill(dump_ebb_state));
+   else
+   dump_ebb_state();
 
/* The real test is that we never took an EBB at 0x0 */
 
-- 
1.8.3.1



[PATCH V2 1/1] powerpc/perf: Fix PMU callbacks to clear pending PMI before resetting an overflown PMC

2021-04-19 Thread Athira Rajeev
Running perf fuzzer showed below in dmesg logs:
"Can't find PMC that caused IRQ"

This means a PMU exception happened, but none of the PMC's (Performance
Monitor Counter) were found to be overflown. There are some corner cases
that clears the PMCs after PMI gets masked. In such cases, the perf
interrupt handler will not find the active PMC values that had caused
the overflow and thus leads to this message while replaying.

Case 1: PMU Interrupt happens during replay of other interrupts and
counter values gets cleared by PMU callbacks before replay:

During replay of interrupts like timer, __do_irq and doorbell exception, we
conditionally enable interrupts via may_hard_irq_enable(). This could
potentially create a window to generate a PMI. Since irq soft mask is set
to ALL_DISABLED, the PMI will get masked here. We could get IPIs run before
perf interrupt is replayed and the PMU events could deleted or stopped.
This will change the PMU SPR values and resets the counters. Snippet of
ftrace log showing PMU callbacks invoked in "__do_irq":

-0 [051] dns. 132025441306354: __do_irq <-call_do_irq
-0 [051] dns. 132025441306430: irq_enter <-__do_irq
-0 [051] dns. 132025441306503: irq_enter_rcu <-__do_irq
-0 [051] dnH. 132025441306599: xive_get_irq <-__do_irq
<<>>
-0 [051] dnH. 132025441307770: generic_smp_call_function_single_interrupt 
<-smp_ipi_demux_relaxed
-0 [051] dnH. 132025441307839: flush_smp_call_function_queue 
<-smp_ipi_demux_relaxed
-0 [051] dnH. 132025441308057: _raw_spin_lock <-event_function
-0 [051] dnH. 132025441308206: power_pmu_disable <-perf_pmu_disable
-0 [051] dnH. 132025441308337: power_pmu_del <-event_sched_out
-0 [051] dnH. 132025441308407: power_pmu_read <-power_pmu_del
-0 [051] dnH. 132025441308477: read_pmc <-power_pmu_read
-0 [051] dnH. 132025441308590: isa207_disable_pmc <-power_pmu_del
-0 [051] dnH. 132025441308663: write_pmc <-power_pmu_del
-0 [051] dnH. 132025441308787: power_pmu_event_idx 
<-perf_event_update_userpage
-0 [051] dnH. 132025441308859: rcu_read_unlock_strict 
<-perf_event_update_userpage
-0 [051] dnH. 132025441308975: power_pmu_enable <-perf_pmu_enable
<<>>
-0 [051] dnH. 132025441311108: irq_exit <-__do_irq
-0 [051] dns. 132025441311319: performance_monitor_exception 
<-replay_soft_interrupts

Case 2: PMI's masked during local_* operations, example local_add.
If the local_add operation happens within a local_irq_save, replay of
PMI will be during local_irq_restore. Similar to case 1, this could
also create a window before replay where PMU events gets deleted or
stopped.

Patch adds a fix to update the PMU callback functions (del,stop,enable) to
check for pending perf interrupt. If there is an overflown PMC and pending
perf interrupt indicated in Paca or by PMAO bit set in MMCR0, clear the PMI
bit in paca to drop that sample. Also clear the MMCR0 PMAO bit which
otherwise could lead to spurious interrupts in some corner cases. Example,
a timer after power_pmu_del which will re-enable interrupts since PMI is
cleared and triggers a PMI again since PMAO bit is still set. Another
condition occures if had disabled MSR[EE] right before perf interrupt
came in. Re-enabling interrupt will trigger PMI since PMAO is still set.
But fails to find valid overflow if PMC get cleared before enabling EE.

We can't just replay PMI any time. Hence this approach is preferred rather
than replaying PMI before resetting overflown PMC. Patch also documents
core-book3s on a race condition which can trigger these PMC messages during
idle path in PowerNV.

Fixes: f442d004806e ("powerpc/64s: Add support to mask perf interrupts and 
replay them")
Reported-by: Nageswara R Sastry 
Suggested-by: Nicholas Piggin 
Suggested-by: Madhavan Srinivasan 
Signed-off-by: Athira Rajeev 
---
 arch/powerpc/include/asm/hw_irq.h | 19 
 arch/powerpc/perf/core-book3s.c   | 77 +++
 2 files changed, 96 insertions(+)

diff --git a/arch/powerpc/include/asm/hw_irq.h 
b/arch/powerpc/include/asm/hw_irq.h
index 56a98936a6a9..7e192bd8253b 100644
--- a/arch/powerpc/include/asm/hw_irq.h
+++ b/arch/powerpc/include/asm/hw_irq.h
@@ -215,6 +215,23 @@ static inline bool arch_irqs_disabled(void)
return arch_irqs_disabled_flags(arch_local_save_flags());
 }
 
+static inline int get_clear_pmi_irq_pending(void)
+{
+   /*
+* Some corner cases could clear the PMU counter overflow
+* while a masked PMI is pending. One of such case is
+* when a PMI happens during interrupt replay and perf
+* counter values gets cleared by PMU callbacks before
+* replay. So the pending PMI must be cleared here.
+*/
+   if (get_paca()->irq_happened & PACA_IRQ_PMI) {
+   WARN_ON_ONCE(mfmsr() & MSR_EE);
+   get_paca()->irq_happened &= ~PACA_IRQ_PMI;
+   return 1;
+   }
+   return 0;
+}
+
 #if

[PATCH V2 0/1] powerpc/perf: Clear pending PMI in ppmu callbacks

2021-04-19 Thread Athira Rajeev
Running perf fuzzer testsuite popped up below messages
in the dmesg logs:

"Can't find PMC that caused IRQ"

This means a PMU exception happened, but none of the PMC's (Performance
Monitor Counter) were found to be overflown. Perf interrupt handler checks
the PMC's to see which PMC has overflown and if none of the PMCs are
overflown ( counter value not >= 0x8000 ), it throws warning:
"Can't find PMC that caused IRQ".

Powerpc has capability to mask and replay a performance monitoring
interrupt (PMI). In case of replayed PMI, there are some corner cases
that clears the PMCs after masking. In such cases, the perf interrupt
handler will not find the active PMC values that had caused the overflow
and thus leading to this message. This patchset attempts to fix those
corner cases.

However there is one more case in PowerNV where these messages are
emitted during system wide profiling or when a specific CPU is monitored
for an event. That is, when a counter overflow just before entering idle
and a PMI gets triggered after wakeup from idle. Since PMCs
are not saved in the idle path, perf interrupt handler will not
find overflown counter value and emits the "Can't find PMC" messages.
This patch documents this race condition in powerpc core-book3s.

Patch fixes the ppmu callbacks to disable pending interrupt before clearing
the overflown PMC and documents the race condition in idle path.

Changelog:
Changes from v1 -> v2
   Addressed review comments from Nicholas Piggin
   - Moved the PMI pending check and clearing function
 to arch/powerpc/include/asm/hw_irq.h and renamed
 function to "get_clear_pmi_irq_pending"
   - Along with checking for pending PMI bit in Paca,
 look for PMAO bit in MMCR0 register to decide on
 pending PMI interrupt.

Athira Rajeev (1):
  powerpc/perf: Fix PMU callbacks to clear pending PMI before resetting
an overflown PMC

 arch/powerpc/include/asm/hw_irq.h | 19 
 arch/powerpc/perf/core-book3s.c   | 77 +++
 2 files changed, 96 insertions(+)

-- 
2.26.2



Re: [PATCH] powerpc/perf: Fix PMU callbacks to clear pending PMI before resetting an overflown PMC

2021-04-19 Thread Athira Rajeev
On 12-Apr-2021, at 12:49 PM, Athira Rajeev  wrote:On 12-Apr-2021, at 8:38 AM, Nicholas Piggin  wrote:Excerpts from Athira Rajeev's message of April 9, 2021 10:53 pm:On 09-Apr-2021, at 6:38 AM, Nicholas Piggin  wrote:Hi Nick,Thanks for checking the patch and sharing review comments.I was going to nitpick "overflown" here as something birds do, but somesources says overflown is okay for past tense.You could use "overflowed" for that, but I understand the issue with the word: you are talking about counters that are currently in an "overflow" state, but the overflow occurred in the past and is not still happeningso you "overflowing" doesn't exactly fit either.overflown kind of works for some reason you can kind of use it forpresent tense!Ok sure, Yes counter is currently in an “overflow” state.Excerpts from Athira Rajeev's message of April 7, 2021 12:47 am:Running perf fuzzer showed below in dmesg logs:"Can't find PMC that caused IRQ"This means a PMU exception happened, but none of the PMC's (PerformanceMonitor Counter) were found to be overflown. There are some corner casesthat clears the PMCs after PMI gets masked. In such cases, the perfinterrupt handler will not find the active PMC values that had causedthe overflow and thus leads to this message while replaying.Case 1: PMU Interrupt happens during replay of other interrupts andcounter values gets cleared by PMU callbacks before replay:During replay of interrupts like timer, __do_irq and doorbell exception, weconditionally enable interrupts via may_hard_irq_enable(). This couldpotentially create a window to generate a PMI. Since irq soft mask is setto ALL_DISABLED, the PMI will get masked here.I wonder if may_hard_irq_enable shouldn't enable if PMI is softdisabled. And also maybe replay should not set ALL_DISABLED ifthere are no PMI interrupts pending.Still, I think those are a bit more tricky and might take a whileto get right or just not be worth while, so I think your patch isfine.Ok Nick.We could get IPIs run beforeperf interrupt is replayed and the PMU events could deleted or stopped.This will change the PMU SPR values and resets the counters. Snippet offtrace log showing PMU callbacks invoked in "__do_irq":-0 [051] dns. 132025441306354: __do_irq <-call_do_irq-0 [051] dns. 132025441306430: irq_enter <-__do_irq-0 [051] dns. 132025441306503: irq_enter_rcu <-__do_irq-0 [051] dnH. 132025441306599: xive_get_irq <-__do_irq<<>>-0 [051] dnH. 132025441307770: generic_smp_call_function_single_interrupt <-smp_ipi_demux_relaxed-0 [051] dnH. 132025441307839: flush_smp_call_function_queue <-smp_ipi_demux_relaxed-0 [051] dnH. 132025441308057: _raw_spin_lock <-event_function-0 [051] dnH. 132025441308206: power_pmu_disable <-perf_pmu_disable-0 [051] dnH. 132025441308337: power_pmu_del <-event_sched_out-0 [051] dnH. 132025441308407: power_pmu_read <-power_pmu_del-0 [051] dnH. 132025441308477: read_pmc <-power_pmu_read-0 [051] dnH. 132025441308590: isa207_disable_pmc <-power_pmu_del-0 [051] dnH. 132025441308663: write_pmc <-power_pmu_del-0 [051] dnH. 132025441308787: power_pmu_event_idx <-perf_event_update_userpage-0 [051] dnH. 132025441308859: rcu_read_unlock_strict <-perf_event_update_userpage-0 [051] dnH. 132025441308975: power_pmu_enable <-perf_pmu_enable<<>>-0 [051] dnH. 132025441311108: irq_exit <-__do_irq-0 [051] dns. 132025441311319: performance_monitor_exception <-replay_soft_interruptsCase 2: PMI's masked during local_* operations, example local_add.If the local_add operation happens within a local_irq_save, replay ofPMI will be during local_irq_restore. Similar to case 1, this couldalso create a window before replay where PMU events gets deleted orstopped.Here as well perhaps PMIs should be replayed if they are unmaskedeven if other interrupts are still masked. Again that might be morecomplexity than it's worth.Ok..Patch adds a fix to update the PMU callback functions (del,stop,enable) tocheck for pending perf interrupt. If there is an overflown PMC and pendingperf interrupt indicated in Paca, clear the PMI bit in paca to drop thatsample. In case of power_pmu_del, also clear the MMCR0 PMAO bit whichotherwise could lead to spurious interrupts in some corner cases. Example,a timer after power_pmu_del which will re-enable interrupts since PMI iscleared and triggers a PMI again since PMAO bit is still set.We can't just replay PMI any time. Hence this approach is preferred ratherthan replaying PMI before resetting overflown PMC. Patch also documentscore-book3s on a race condition which can trigger these PMC messages duringidle path in PowerNV.Fixes: f442d004806e ("powerpc/64s: Add support to mask perf interrupts and replay them")Reported-by: Nageswara R Sastry Suggested-by: Nicholas Piggin Suggested-by: Madhavan Srinivasan Signed-off-by: Athira Rajeev ---arch/powerpc/include/asm/pmc.h  | 11 +arch/powerpc/

Re: [PATCH] powerpc/perf: Fix PMU callbacks to clear pending PMI before resetting an overflown PMC

2021-04-12 Thread Athira Rajeev
On 12-Apr-2021, at 8:38 AM, Nicholas Piggin  wrote:Excerpts from Athira Rajeev's message of April 9, 2021 10:53 pm:On 09-Apr-2021, at 6:38 AM, Nicholas Piggin  wrote:Hi Nick,Thanks for checking the patch and sharing review comments.I was going to nitpick "overflown" here as something birds do, but somesources says overflown is okay for past tense.You could use "overflowed" for that, but I understand the issue with the word: you are talking about counters that are currently in an "overflow" state, but the overflow occurred in the past and is not still happeningso you "overflowing" doesn't exactly fit either.overflown kind of works for some reason you can kind of use it forpresent tense!Ok sure, Yes counter is currently in an “overflow” state.Excerpts from Athira Rajeev's message of April 7, 2021 12:47 am:Running perf fuzzer showed below in dmesg logs:"Can't find PMC that caused IRQ"This means a PMU exception happened, but none of the PMC's (PerformanceMonitor Counter) were found to be overflown. There are some corner casesthat clears the PMCs after PMI gets masked. In such cases, the perfinterrupt handler will not find the active PMC values that had causedthe overflow and thus leads to this message while replaying.Case 1: PMU Interrupt happens during replay of other interrupts andcounter values gets cleared by PMU callbacks before replay:During replay of interrupts like timer, __do_irq and doorbell exception, weconditionally enable interrupts via may_hard_irq_enable(). This couldpotentially create a window to generate a PMI. Since irq soft mask is setto ALL_DISABLED, the PMI will get masked here.I wonder if may_hard_irq_enable shouldn't enable if PMI is softdisabled. And also maybe replay should not set ALL_DISABLED ifthere are no PMI interrupts pending.Still, I think those are a bit more tricky and might take a whileto get right or just not be worth while, so I think your patch isfine.Ok Nick.We could get IPIs run beforeperf interrupt is replayed and the PMU events could deleted or stopped.This will change the PMU SPR values and resets the counters. Snippet offtrace log showing PMU callbacks invoked in "__do_irq":-0 [051] dns. 132025441306354: __do_irq <-call_do_irq-0 [051] dns. 132025441306430: irq_enter <-__do_irq-0 [051] dns. 132025441306503: irq_enter_rcu <-__do_irq-0 [051] dnH. 132025441306599: xive_get_irq <-__do_irq<<>>-0 [051] dnH. 132025441307770: generic_smp_call_function_single_interrupt <-smp_ipi_demux_relaxed-0 [051] dnH. 132025441307839: flush_smp_call_function_queue <-smp_ipi_demux_relaxed-0 [051] dnH. 132025441308057: _raw_spin_lock <-event_function-0 [051] dnH. 132025441308206: power_pmu_disable <-perf_pmu_disable-0 [051] dnH. 132025441308337: power_pmu_del <-event_sched_out-0 [051] dnH. 132025441308407: power_pmu_read <-power_pmu_del-0 [051] dnH. 132025441308477: read_pmc <-power_pmu_read-0 [051] dnH. 132025441308590: isa207_disable_pmc <-power_pmu_del-0 [051] dnH. 132025441308663: write_pmc <-power_pmu_del-0 [051] dnH. 132025441308787: power_pmu_event_idx <-perf_event_update_userpage-0 [051] dnH. 132025441308859: rcu_read_unlock_strict <-perf_event_update_userpage-0 [051] dnH. 132025441308975: power_pmu_enable <-perf_pmu_enable<<>>-0 [051] dnH. 132025441311108: irq_exit <-__do_irq-0 [051] dns. 132025441311319: performance_monitor_exception <-replay_soft_interruptsCase 2: PMI's masked during local_* operations, example local_add.If the local_add operation happens within a local_irq_save, replay ofPMI will be during local_irq_restore. Similar to case 1, this couldalso create a window before replay where PMU events gets deleted orstopped.Here as well perhaps PMIs should be replayed if they are unmaskedeven if other interrupts are still masked. Again that might be morecomplexity than it's worth.Ok..Patch adds a fix to update the PMU callback functions (del,stop,enable) tocheck for pending perf interrupt. If there is an overflown PMC and pendingperf interrupt indicated in Paca, clear the PMI bit in paca to drop thatsample. In case of power_pmu_del, also clear the MMCR0 PMAO bit whichotherwise could lead to spurious interrupts in some corner cases. Example,a timer after power_pmu_del which will re-enable interrupts since PMI iscleared and triggers a PMI again since PMAO bit is still set.We can't just replay PMI any time. Hence this approach is preferred ratherthan replaying PMI before resetting overflown PMC. Patch also documentscore-book3s on a race condition which can trigger these PMC messages duringidle path in PowerNV.Fixes: f442d004806e ("powerpc/64s: Add support to mask perf interrupts and replay them")Reported-by: Nageswara R Sastry Suggested-by: Nicholas Piggin Suggested-by: Madhavan Srinivasan Signed-off-by: Athira Rajeev ---arch/powerpc/include/asm/pmc.h  | 11 +arch/powerpc/perf/core-book3s.c | 55 

Re: [PATCH] powerpc/perf: Fix PMU callbacks to clear pending PMI before resetting an overflown PMC

2021-04-09 Thread Athira Rajeev



> On 09-Apr-2021, at 6:38 AM, Nicholas Piggin  wrote:
> 
Hi Nick,

Thanks for checking the patch and sharing review comments.

> I was going to nitpick "overflown" here as something birds do, but some
> sources says overflown is okay for past tense.
> 
> You could use "overflowed" for that, but I understand the issue with the 
> word: you are talking about counters that are currently in an "overflow" 
> state, but the overflow occurred in the past and is not still happening
> so you "overflowing" doesn't exactly fit either.
> 
> overflown kind of works for some reason you can kind of use it for
> present tense!

Ok sure, Yes counter is currently in an “overflow” state.

> 
> Excerpts from Athira Rajeev's message of April 7, 2021 12:47 am:
>> Running perf fuzzer showed below in dmesg logs:
>> "Can't find PMC that caused IRQ"
>> 
>> This means a PMU exception happened, but none of the PMC's (Performance
>> Monitor Counter) were found to be overflown. There are some corner cases
>> that clears the PMCs after PMI gets masked. In such cases, the perf
>> interrupt handler will not find the active PMC values that had caused
>> the overflow and thus leads to this message while replaying.
>> 
>> Case 1: PMU Interrupt happens during replay of other interrupts and
>> counter values gets cleared by PMU callbacks before replay:
>> 
>> During replay of interrupts like timer, __do_irq and doorbell exception, we
>> conditionally enable interrupts via may_hard_irq_enable(). This could
>> potentially create a window to generate a PMI. Since irq soft mask is set
>> to ALL_DISABLED, the PMI will get masked here.
> 
> I wonder if may_hard_irq_enable shouldn't enable if PMI is soft
> disabled. And also maybe replay should not set ALL_DISABLED if
> there are no PMI interrupts pending.
> 
> Still, I think those are a bit more tricky and might take a while
> to get right or just not be worth while, so I think your patch is
> fine.

Ok Nick.
> 
>> We could get IPIs run before
>> perf interrupt is replayed and the PMU events could deleted or stopped.
>> This will change the PMU SPR values and resets the counters. Snippet of
>> ftrace log showing PMU callbacks invoked in "__do_irq":
>> 
>> -0 [051] dns. 132025441306354: __do_irq <-call_do_irq
>> -0 [051] dns. 132025441306430: irq_enter <-__do_irq
>> -0 [051] dns. 132025441306503: irq_enter_rcu <-__do_irq
>> -0 [051] dnH. 132025441306599: xive_get_irq <-__do_irq
>> <<>>
>> -0 [051] dnH. 132025441307770: 
>> generic_smp_call_function_single_interrupt <-smp_ipi_demux_relaxed
>> -0 [051] dnH. 132025441307839: flush_smp_call_function_queue 
>> <-smp_ipi_demux_relaxed
>> -0 [051] dnH. 132025441308057: _raw_spin_lock <-event_function
>> -0 [051] dnH. 132025441308206: power_pmu_disable <-perf_pmu_disable
>> -0 [051] dnH. 132025441308337: power_pmu_del <-event_sched_out
>> -0 [051] dnH. 132025441308407: power_pmu_read <-power_pmu_del
>> -0 [051] dnH. 132025441308477: read_pmc <-power_pmu_read
>> -0 [051] dnH. 132025441308590: isa207_disable_pmc <-power_pmu_del
>> -0 [051] dnH. 132025441308663: write_pmc <-power_pmu_del
>> -0 [051] dnH. 132025441308787: power_pmu_event_idx 
>> <-perf_event_update_userpage
>> -0 [051] dnH. 132025441308859: rcu_read_unlock_strict 
>> <-perf_event_update_userpage
>> -0 [051] dnH. 132025441308975: power_pmu_enable <-perf_pmu_enable
>> <<>>
>> -0 [051] dnH. 132025441311108: irq_exit <-__do_irq
>> -0 [051] dns. 132025441311319: performance_monitor_exception 
>> <-replay_soft_interrupts
>> 
>> Case 2: PMI's masked during local_* operations, example local_add.
>> If the local_add operation happens within a local_irq_save, replay of
>> PMI will be during local_irq_restore. Similar to case 1, this could
>> also create a window before replay where PMU events gets deleted or
>> stopped.
> 
> Here as well perhaps PMIs should be replayed if they are unmasked
> even if other interrupts are still masked. Again that might be more
> complexity than it's worth.
Ok..

> 
>> 
>> Patch adds a fix to update the PMU callback functions (del,stop,enable) to
>> check for pending perf interrupt. If there is an overflown PMC and pending
>> perf interrupt indicated in Paca, clear the PMI bit in paca to drop that
>> sample. In case of power_pmu_del, also clear the MMCR0 PMAO bit which
>> otherwise could lead to spurious interrupts in some corner cases. Example,
>> a timer after power_pmu_del which will re-enable interr

Re: [PATCH] powerpc/perf: prevent mixed EBB and non-EBB events

2021-04-06 Thread Athira Rajeev



> On 05-Mar-2021, at 11:20 AM, Athira Rajeev  
> wrote:
> 
> 
> 
>> On 24-Feb-2021, at 5:51 PM, Thadeu Lima de Souza Cascardo 
>>  wrote:
>> 
>> EBB events must be under exclusive groups, so there is no mix of EBB and
>> non-EBB events on the same PMU. This requirement worked fine as perf core
>> would not allow other pinned events to be scheduled together with exclusive
>> events.
>> 
>> This assumption was broken by commit 1908dc911792 ("perf: Tweak
>> perf_event_attr::exclusive semantics").
>> 
>> After that, the test cpu_event_pinned_vs_ebb_test started succeeding after
>> read_events, but worse, the task would not have given access to PMC1, so
>> when it tried to write to it, it was killed with "illegal instruction".
>> 
>> Preventing mixed EBB and non-EBB events from being add to the same PMU will
>> just revert to the previous behavior and the test will succeed.
> 
> 
> Hi,
> 
> Thanks for checking this. I checked your patch which is fixing 
> “check_excludes” to make
> sure all events must agree on EBB. But in the PMU group constraints, we 
> already have check for
> EBB events. This is in arch/powerpc/perf/isa207-common.c ( 
> isa207_get_constraint function ).
> 
> <<>>
> mask  |= CNST_EBB_VAL(ebb);
> value |= CNST_EBB_MASK;
> <<>>
> 
> But the above setting for mask and value is interchanged. We actually need to 
> fix here.
> 

Hi,

I have sent a patch for fixing this EBB mask/value setting.
This is the link to patch:

powerpc/perf: Fix PMU constraint check for EBB events
https://patchwork.ozlabs.org/project/linuxppc-dev/list/?series=237669

Thanks
Athira

> Below patch should fix this:
> 
> diff --git a/arch/powerpc/perf/isa207-common.c 
> b/arch/powerpc/perf/isa207-common.c
> index e4f577da33d8..8b5eeb6fb2fb 100644
> --- a/arch/powerpc/perf/isa207-common.c
> +++ b/arch/powerpc/perf/isa207-common.c
> @@ -447,8 +447,8 @@ int isa207_get_constraint(u64 event, unsigned long 
> *maskp, unsigned long *valp,
> * EBB events are pinned & exclusive, so this should never actually
> * hit, but we leave it as a fallback in case.
> */
> -   mask  |= CNST_EBB_VAL(ebb);
> -   value |= CNST_EBB_MASK;
> +   mask  |= CNST_EBB_MASK;
> +   value |= CNST_EBB_VAL(ebb);
> 
>*maskp = mask;
>*valp = value;
> 
> 
> Can you please try with this patch.
> 
> Thanks
> Athira
> 
> 
>> 
>> Fixes: 1908dc911792 (perf: Tweak perf_event_attr::exclusive semantics)
>> Signed-off-by: Thadeu Lima de Souza Cascardo 
>> ---
>> arch/powerpc/perf/core-book3s.c | 20 
>> 1 file changed, 16 insertions(+), 4 deletions(-)
>> 
>> diff --git a/arch/powerpc/perf/core-book3s.c 
>> b/arch/powerpc/perf/core-book3s.c
>> index 43599e671d38..d767f7944f85 100644
>> --- a/arch/powerpc/perf/core-book3s.c
>> +++ b/arch/powerpc/perf/core-book3s.c
>> @@ -1010,9 +1010,25 @@ static int check_excludes(struct perf_event **ctrs, 
>> unsigned int cflags[],
>>int n_prev, int n_new)
>> {
>>  int eu = 0, ek = 0, eh = 0;
>> +bool ebb = false;
>>  int i, n, first;
>>  struct perf_event *event;
>> 
>> +n = n_prev + n_new;
>> +if (n <= 1)
>> +return 0;
>> +
>> +first = 1;
>> +for (i = 0; i < n; ++i) {
>> +event = ctrs[i];
>> +if (first) {
>> +ebb = is_ebb_event(event);
>> +first = 0;
>> +} else if (is_ebb_event(event) != ebb) {
>> +return -EAGAIN;
>> +}
>> +}
>> +
>>  /*
>>   * If the PMU we're on supports per event exclude settings then we
>>   * don't need to do any of this logic. NB. This assumes no PMU has both
>> @@ -1021,10 +1037,6 @@ static int check_excludes(struct perf_event **ctrs, 
>> unsigned int cflags[],
>>  if (ppmu->flags & PPMU_ARCH_207S)
>>  return 0;
>> 
>> -n = n_prev + n_new;
>> -if (n <= 1)
>> -return 0;
>> -
>>  first = 1;
>>  for (i = 0; i < n; ++i) {
>>  if (cflags[i] & PPMU_LIMITED_PMC_OK) {
>> -- 
>> 2.27.0



[PATCH] powerpc/perf: Fix PMU constraint check for EBB events

2021-04-06 Thread Athira Rajeev
The power PMU group constraints includes check for EBB events
to make sure all events in a group must agree on EBB. This
will prevent scheduling EBB and non-EBB events together.
But in the existing check, settings for constraint mask and
value is interchanged. Patch fixes the same.

Before the patch, PMU selftest "cpu_event_pinned_vs_ebb_test"
fails with below in dmesg logs. This happens because EBB event
gets enabled along with a non-EBB cpu event.

<<>>
[35600.453346] cpu_event_pinne[41326]: illegal instruction (4)
at 10004a18 nip 10004a18 lr 100049f8 code 1 in
cpu_event_pinned_vs_ebb_test[1000+1]
<<>>

Test results after the patch:

 ./pmu/ebb/cpu_event_pinned_vs_ebb_test
test: cpu_event_pinned_vs_ebb
tags: git_version:v5.12-rc5-93-gf28c3125acd3-dirty
Binding to cpu 8
EBB Handler is at 0x100050c8
read error on event 0x7fffe6bd4040!
PM_RUN_INST_CMPL: result 9872 running/enabled 37930432
success: cpu_event_pinned_vs_ebb

Fixes: 4df489991182 ("powerpc/perf: Add power8 EBB support")
Reported-by: Thadeu Lima de Souza Cascardo 
Signed-off-by: Athira Rajeev 
---
 arch/powerpc/perf/isa207-common.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/perf/isa207-common.c 
b/arch/powerpc/perf/isa207-common.c
index e4f577da33d8..8b5eeb6fb2fb 100644
--- a/arch/powerpc/perf/isa207-common.c
+++ b/arch/powerpc/perf/isa207-common.c
@@ -447,8 +447,8 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, 
unsigned long *valp,
 * EBB events are pinned & exclusive, so this should never actually
 * hit, but we leave it as a fallback in case.
 */
-   mask  |= CNST_EBB_VAL(ebb);
-   value |= CNST_EBB_MASK;
+   mask  |= CNST_EBB_MASK;
+   value |= CNST_EBB_VAL(ebb);
 
*maskp = mask;
*valp = value;
-- 
1.8.3.1



[PATCH] powerpc/perf: Fix PMU callbacks to clear pending PMI before resetting an overflown PMC

2021-04-06 Thread Athira Rajeev
Running perf fuzzer showed below in dmesg logs:
"Can't find PMC that caused IRQ"

This means a PMU exception happened, but none of the PMC's (Performance
Monitor Counter) were found to be overflown. There are some corner cases
that clears the PMCs after PMI gets masked. In such cases, the perf
interrupt handler will not find the active PMC values that had caused
the overflow and thus leads to this message while replaying.

Case 1: PMU Interrupt happens during replay of other interrupts and
counter values gets cleared by PMU callbacks before replay:

During replay of interrupts like timer, __do_irq and doorbell exception, we
conditionally enable interrupts via may_hard_irq_enable(). This could
potentially create a window to generate a PMI. Since irq soft mask is set
to ALL_DISABLED, the PMI will get masked here. We could get IPIs run before
perf interrupt is replayed and the PMU events could deleted or stopped.
This will change the PMU SPR values and resets the counters. Snippet of
ftrace log showing PMU callbacks invoked in "__do_irq":

-0 [051] dns. 132025441306354: __do_irq <-call_do_irq
-0 [051] dns. 132025441306430: irq_enter <-__do_irq
-0 [051] dns. 132025441306503: irq_enter_rcu <-__do_irq
-0 [051] dnH. 132025441306599: xive_get_irq <-__do_irq
<<>>
-0 [051] dnH. 132025441307770: generic_smp_call_function_single_interrupt 
<-smp_ipi_demux_relaxed
-0 [051] dnH. 132025441307839: flush_smp_call_function_queue 
<-smp_ipi_demux_relaxed
-0 [051] dnH. 132025441308057: _raw_spin_lock <-event_function
-0 [051] dnH. 132025441308206: power_pmu_disable <-perf_pmu_disable
-0 [051] dnH. 132025441308337: power_pmu_del <-event_sched_out
-0 [051] dnH. 132025441308407: power_pmu_read <-power_pmu_del
-0 [051] dnH. 132025441308477: read_pmc <-power_pmu_read
-0 [051] dnH. 132025441308590: isa207_disable_pmc <-power_pmu_del
-0 [051] dnH. 132025441308663: write_pmc <-power_pmu_del
-0 [051] dnH. 132025441308787: power_pmu_event_idx 
<-perf_event_update_userpage
-0 [051] dnH. 132025441308859: rcu_read_unlock_strict 
<-perf_event_update_userpage
-0 [051] dnH. 132025441308975: power_pmu_enable <-perf_pmu_enable
<<>>
-0 [051] dnH. 132025441311108: irq_exit <-__do_irq
-0 [051] dns. 132025441311319: performance_monitor_exception 
<-replay_soft_interrupts

Case 2: PMI's masked during local_* operations, example local_add.
If the local_add operation happens within a local_irq_save, replay of
PMI will be during local_irq_restore. Similar to case 1, this could
also create a window before replay where PMU events gets deleted or
stopped.

Patch adds a fix to update the PMU callback functions (del,stop,enable) to
check for pending perf interrupt. If there is an overflown PMC and pending
perf interrupt indicated in Paca, clear the PMI bit in paca to drop that
sample. In case of power_pmu_del, also clear the MMCR0 PMAO bit which
otherwise could lead to spurious interrupts in some corner cases. Example,
a timer after power_pmu_del which will re-enable interrupts since PMI is
cleared and triggers a PMI again since PMAO bit is still set.

We can't just replay PMI any time. Hence this approach is preferred rather
than replaying PMI before resetting overflown PMC. Patch also documents
core-book3s on a race condition which can trigger these PMC messages during
idle path in PowerNV.

Fixes: f442d004806e ("powerpc/64s: Add support to mask perf interrupts and 
replay them")
Reported-by: Nageswara R Sastry 
Suggested-by: Nicholas Piggin 
Suggested-by: Madhavan Srinivasan 
Signed-off-by: Athira Rajeev 
---
 arch/powerpc/include/asm/pmc.h  | 11 +
 arch/powerpc/perf/core-book3s.c | 55 +
 2 files changed, 66 insertions(+)

diff --git a/arch/powerpc/include/asm/pmc.h b/arch/powerpc/include/asm/pmc.h
index c6bbe9778d3c..97b4bd8de25b 100644
--- a/arch/powerpc/include/asm/pmc.h
+++ b/arch/powerpc/include/asm/pmc.h
@@ -34,11 +34,22 @@ static inline void ppc_set_pmu_inuse(int inuse)
 #endif
 }
 
+static inline int clear_paca_irq_pmi(void)
+{
+   if (get_paca()->irq_happened & PACA_IRQ_PMI) {
+   WARN_ON_ONCE(mfmsr() & MSR_EE);
+   get_paca()->irq_happened &= ~PACA_IRQ_PMI;
+   return 1;
+   }
+   return 0;
+}
+
 extern void power4_enable_pmcs(void);
 
 #else /* CONFIG_PPC64 */
 
 static inline void ppc_set_pmu_inuse(int inuse) { }
+static inline int clear_paca_irq_pmi(void) { return 0; }
 
 #endif
 
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 766f064f00fb..18ca3c90f866 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -847,6 +847,20 @@ static void write_pmc(int idx, unsigned long val)
}
 }
 
+static int pmc_overflown(int idx)
+{
+   unsigned long val[8];
+   int i;
+
+   for (i = 0; i < ppmu->n_counter; i++)
+  

[PATCH] powerpc/perf: Clear pending PMI in ppmu callbacks

2021-04-06 Thread Athira Rajeev
Running perf fuzzer testsuite popped up below messages
in the dmesg logs:

"Can't find PMC that caused IRQ"

This means a PMU exception happened, but none of the PMC's (Performance
Monitor Counter) were found to be overflown. Perf interrupt handler checks
the PMC's to see which PMC has overflown and if none of the PMCs are
overflown ( counter value not >= 0x8000 ), it throws warning:
"Can't find PMC that caused IRQ".

Powerpc has capability to mask and replay a performance monitoring
interrupt (PMI). In case of replayed PMI, there are some corner cases
that clears the PMCs after masking. In such cases, the perf interrupt
handler will not find the active PMC values that had caused the overflow
and thus leading to this message. This patchset attempts to fix those
corner cases.

However there is one more case in PowerNV where these messages are
emitted during system wide profiling or when a specific CPU is monitored
for an event. That is, when a counter overflow just before entering idle
and a PMI gets triggered after wakeup from idle. Since PMCs
are not saved in the idle path, perf interrupt handler will not
find overflown counter value and emits the "Can't find PMC" messages.
This patch documents this race condition in powerpc core-book3s.

Patch fixes the ppmu callbacks to disable pending interrupt before clearing
the overflown PMC and documents the race condition in idle path.

Athira Rajeev (1):
  powerpc/perf: Fix PMU callbacks to clear pending PMI before resetting
an overflown PMC

 arch/powerpc/include/asm/pmc.h  | 11 +
 arch/powerpc/perf/core-book3s.c | 55 +
 2 files changed, 66 insertions(+)

-- 
1.8.3.1



Re: [PATCH] powerpc/perf: Fix PMU callbacks to clear pending PMI before resetting an overflown PMC

2021-04-06 Thread Athira Rajeev
Hi,

Cover letter is missing in this patch. I will resent the patch along with cover 
letter. 
Sorry for the noise.

Thanks,
Athira
> On 06-Apr-2021, at 7:44 PM, Athira Rajeev  wrote:
> 
> Running perf fuzzer showed below in dmesg logs:
> "Can't find PMC that caused IRQ"
> 
> This means a PMU exception happened, but none of the PMC's (Performance
> Monitor Counter) were found to be overflown. There are some corner cases
> that clears the PMCs after PMI gets masked. In such cases, the perf
> interrupt handler will not find the active PMC values that had caused
> the overflow and thus leads to this message while replaying.
> 
> Case 1: PMU Interrupt happens during replay of other interrupts and
> counter values gets cleared by PMU callbacks before replay:
> 
> During replay of interrupts like timer, __do_irq and doorbell exception, we
> conditionally enable interrupts via may_hard_irq_enable(). This could
> potentially create a window to generate a PMI. Since irq soft mask is set
> to ALL_DISABLED, the PMI will get masked here. We could get IPIs run before
> perf interrupt is replayed and the PMU events could deleted or stopped.
> This will change the PMU SPR values and resets the counters. Snippet of
> ftrace log showing PMU callbacks invoked in "__do_irq":
> 
> -0 [051] dns. 132025441306354: __do_irq <-call_do_irq
> -0 [051] dns. 132025441306430: irq_enter <-__do_irq
> -0 [051] dns. 132025441306503: irq_enter_rcu <-__do_irq
> -0 [051] dnH. 132025441306599: xive_get_irq <-__do_irq
> <<>>
> -0 [051] dnH. 132025441307770: 
> generic_smp_call_function_single_interrupt <-smp_ipi_demux_relaxed
> -0 [051] dnH. 132025441307839: flush_smp_call_function_queue 
> <-smp_ipi_demux_relaxed
> -0 [051] dnH. 132025441308057: _raw_spin_lock <-event_function
> -0 [051] dnH. 132025441308206: power_pmu_disable <-perf_pmu_disable
> -0 [051] dnH. 132025441308337: power_pmu_del <-event_sched_out
> -0 [051] dnH. 132025441308407: power_pmu_read <-power_pmu_del
> -0 [051] dnH. 132025441308477: read_pmc <-power_pmu_read
> -0 [051] dnH. 132025441308590: isa207_disable_pmc <-power_pmu_del
> -0 [051] dnH. 132025441308663: write_pmc <-power_pmu_del
> -0 [051] dnH. 132025441308787: power_pmu_event_idx 
> <-perf_event_update_userpage
> -0 [051] dnH. 132025441308859: rcu_read_unlock_strict 
> <-perf_event_update_userpage
> -0 [051] dnH. 132025441308975: power_pmu_enable <-perf_pmu_enable
> <<>>
> -0 [051] dnH. 132025441311108: irq_exit <-__do_irq
> -0 [051] dns. 132025441311319: performance_monitor_exception 
> <-replay_soft_interrupts
> 
> Case 2: PMI's masked during local_* operations, example local_add.
> If the local_add operation happens within a local_irq_save, replay of
> PMI will be during local_irq_restore. Similar to case 1, this could
> also create a window before replay where PMU events gets deleted or
> stopped.
> 
> Patch adds a fix to update the PMU callback functions (del,stop,enable) to
> check for pending perf interrupt. If there is an overflown PMC and pending
> perf interrupt indicated in Paca, clear the PMI bit in paca to drop that
> sample. In case of power_pmu_del, also clear the MMCR0 PMAO bit which
> otherwise could lead to spurious interrupts in some corner cases. Example,
> a timer after power_pmu_del which will re-enable interrupts since PMI is
> cleared and triggers a PMI again since PMAO bit is still set.
> 
> We can't just replay PMI any time. Hence this approach is preferred rather
> than replaying PMI before resetting overflown PMC. Patch also documents
> core-book3s on a race condition which can trigger these PMC messages during
> idle path in PowerNV.
> 
> Fixes: f442d004806e ("powerpc/64s: Add support to mask perf interrupts and 
> replay them")
> Reported-by: Nageswara R Sastry 
> Suggested-by: Nicholas Piggin 
> Suggested-by: Madhavan Srinivasan 
> Signed-off-by: Athira Rajeev 
> ---
> arch/powerpc/include/asm/pmc.h  | 11 +
> arch/powerpc/perf/core-book3s.c | 55 +
> 2 files changed, 66 insertions(+)
> 
> diff --git a/arch/powerpc/include/asm/pmc.h b/arch/powerpc/include/asm/pmc.h
> index c6bbe9778d3c..97b4bd8de25b 100644
> --- a/arch/powerpc/include/asm/pmc.h
> +++ b/arch/powerpc/include/asm/pmc.h
> @@ -34,11 +34,22 @@ static inline void ppc_set_pmu_inuse(int inuse)
> #endif
> }
> 
> +static inline int clear_paca_irq_pmi(void)
> +{
> + if (get_paca()->irq_happened & PACA_IRQ_PMI) {
> + WARN_ON_ONCE(mfmsr() & MSR_EE);
> + get_paca()->irq_happened &= ~PACA_IRQ_PMI;
> + return 1;
> + }
> + return 0;
> 

[PATCH] powerpc/perf: Fix PMU callbacks to clear pending PMI before resetting an overflown PMC

2021-04-06 Thread Athira Rajeev
Running perf fuzzer showed below in dmesg logs:
"Can't find PMC that caused IRQ"

This means a PMU exception happened, but none of the PMC's (Performance
Monitor Counter) were found to be overflown. There are some corner cases
that clears the PMCs after PMI gets masked. In such cases, the perf
interrupt handler will not find the active PMC values that had caused
the overflow and thus leads to this message while replaying.

Case 1: PMU Interrupt happens during replay of other interrupts and
counter values gets cleared by PMU callbacks before replay:

During replay of interrupts like timer, __do_irq and doorbell exception, we
conditionally enable interrupts via may_hard_irq_enable(). This could
potentially create a window to generate a PMI. Since irq soft mask is set
to ALL_DISABLED, the PMI will get masked here. We could get IPIs run before
perf interrupt is replayed and the PMU events could deleted or stopped.
This will change the PMU SPR values and resets the counters. Snippet of
ftrace log showing PMU callbacks invoked in "__do_irq":

-0 [051] dns. 132025441306354: __do_irq <-call_do_irq
-0 [051] dns. 132025441306430: irq_enter <-__do_irq
-0 [051] dns. 132025441306503: irq_enter_rcu <-__do_irq
-0 [051] dnH. 132025441306599: xive_get_irq <-__do_irq
<<>>
-0 [051] dnH. 132025441307770: generic_smp_call_function_single_interrupt 
<-smp_ipi_demux_relaxed
-0 [051] dnH. 132025441307839: flush_smp_call_function_queue 
<-smp_ipi_demux_relaxed
-0 [051] dnH. 132025441308057: _raw_spin_lock <-event_function
-0 [051] dnH. 132025441308206: power_pmu_disable <-perf_pmu_disable
-0 [051] dnH. 132025441308337: power_pmu_del <-event_sched_out
-0 [051] dnH. 132025441308407: power_pmu_read <-power_pmu_del
-0 [051] dnH. 132025441308477: read_pmc <-power_pmu_read
-0 [051] dnH. 132025441308590: isa207_disable_pmc <-power_pmu_del
-0 [051] dnH. 132025441308663: write_pmc <-power_pmu_del
-0 [051] dnH. 132025441308787: power_pmu_event_idx 
<-perf_event_update_userpage
-0 [051] dnH. 132025441308859: rcu_read_unlock_strict 
<-perf_event_update_userpage
-0 [051] dnH. 132025441308975: power_pmu_enable <-perf_pmu_enable
<<>>
-0 [051] dnH. 132025441311108: irq_exit <-__do_irq
-0 [051] dns. 132025441311319: performance_monitor_exception 
<-replay_soft_interrupts

Case 2: PMI's masked during local_* operations, example local_add.
If the local_add operation happens within a local_irq_save, replay of
PMI will be during local_irq_restore. Similar to case 1, this could
also create a window before replay where PMU events gets deleted or
stopped.

Patch adds a fix to update the PMU callback functions (del,stop,enable) to
check for pending perf interrupt. If there is an overflown PMC and pending
perf interrupt indicated in Paca, clear the PMI bit in paca to drop that
sample. In case of power_pmu_del, also clear the MMCR0 PMAO bit which
otherwise could lead to spurious interrupts in some corner cases. Example,
a timer after power_pmu_del which will re-enable interrupts since PMI is
cleared and triggers a PMI again since PMAO bit is still set.

We can't just replay PMI any time. Hence this approach is preferred rather
than replaying PMI before resetting overflown PMC. Patch also documents
core-book3s on a race condition which can trigger these PMC messages during
idle path in PowerNV.

Fixes: f442d004806e ("powerpc/64s: Add support to mask perf interrupts and 
replay them")
Reported-by: Nageswara R Sastry 
Suggested-by: Nicholas Piggin 
Suggested-by: Madhavan Srinivasan 
Signed-off-by: Athira Rajeev 
---
 arch/powerpc/include/asm/pmc.h  | 11 +
 arch/powerpc/perf/core-book3s.c | 55 +
 2 files changed, 66 insertions(+)

diff --git a/arch/powerpc/include/asm/pmc.h b/arch/powerpc/include/asm/pmc.h
index c6bbe9778d3c..97b4bd8de25b 100644
--- a/arch/powerpc/include/asm/pmc.h
+++ b/arch/powerpc/include/asm/pmc.h
@@ -34,11 +34,22 @@ static inline void ppc_set_pmu_inuse(int inuse)
 #endif
 }
 
+static inline int clear_paca_irq_pmi(void)
+{
+   if (get_paca()->irq_happened & PACA_IRQ_PMI) {
+   WARN_ON_ONCE(mfmsr() & MSR_EE);
+   get_paca()->irq_happened &= ~PACA_IRQ_PMI;
+   return 1;
+   }
+   return 0;
+}
+
 extern void power4_enable_pmcs(void);
 
 #else /* CONFIG_PPC64 */
 
 static inline void ppc_set_pmu_inuse(int inuse) { }
+static inline int clear_paca_irq_pmi(void) { return 0; }
 
 #endif
 
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 766f064f00fb..18ca3c90f866 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -847,6 +847,20 @@ static void write_pmc(int idx, unsigned long val)
}
 }
 
+static int pmc_overflown(int idx)
+{
+   unsigned long val[8];
+   int i;
+
+   for (i = 0; i < ppmu->n_counter; i++)
+  

Re: [PATCH v3 1/2] powerpc/perf: Infrastructure to support checking of attr.config*

2021-04-01 Thread Athira Rajeev



> On 25-Mar-2021, at 5:23 PM, Madhavan Srinivasan  wrote:
> 
> Introduce code to support the checking of attr.config* for
> values which are reserved for a given platform.
> Performance Monitoring Unit (PMU) configuration registers
> have fields that are reserved and some specific values for
> bit fields are reserved. For ex., MMCRA[61:62] is
> Random Sampling Mode (SM) and value of 0b11 for this field
> is reserved.
> 
> Writing non-zero or invalid values in these fields will
> have unknown behaviours.
> 
> Patch adds a generic call-back function "check_attr_config"
> in "struct power_pmu", to be called in event_init to
> check for attr.config* values for a given platform.
> "check_attr_config" is valid only for raw event type.
> 
> Signed-off-by: Madhavan Srinivasan 
> ---
> Changelog v2:
> -Fixed commit message
> 
> Changelog v1:
> -Fixed commit message and in-code comments

Changes looks fine to me.

Reviewed-by: Athira Rajeev 

Thanks,
Athira
> 
> arch/powerpc/include/asm/perf_event_server.h |  6 ++
> arch/powerpc/perf/core-book3s.c  | 14 ++
> 2 files changed, 20 insertions(+)
> 
> diff --git a/arch/powerpc/include/asm/perf_event_server.h 
> b/arch/powerpc/include/asm/perf_event_server.h
> index 00e7e671bb4b..dde97d7d9253 100644
> --- a/arch/powerpc/include/asm/perf_event_server.h
> +++ b/arch/powerpc/include/asm/perf_event_server.h
> @@ -67,6 +67,12 @@ struct power_pmu {
>* the pmu supports extended perf regs capability
>*/
>   int capabilities;
> + /*
> +  * Function to check event code for values which are
> +  * reserved. Function takes struct perf_event as input,
> +  * since event code could be spread in attr.config*
> +  */
> + int (*check_attr_config)(struct perf_event *ev);
> };
> 
> /*
> diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
> index 6817331e22ff..c6eeb4fdc5fd 100644
> --- a/arch/powerpc/perf/core-book3s.c
> +++ b/arch/powerpc/perf/core-book3s.c
> @@ -1958,6 +1958,20 @@ static int power_pmu_event_init(struct perf_event 
> *event)
> 
>   if (ppmu->blacklist_ev && is_event_blacklisted(ev))
>   return -EINVAL;
> + /*
> +  * PMU config registers have fields that are
> +  * reserved and specific value to bit field as reserved.
> +  * For ex., MMCRA[61:62] is Randome Sampling Mode (SM)
> +  * and value of 0b11 to this field is reserved.
> +  *
> +  * This check is needed only for raw event type,
> +  * since tools like fuzzer use raw event type to
> +  * provide randomized event code values for test.
> +  *
> +  */
> + if (ppmu->check_attr_config &&
> + ppmu->check_attr_config(event))
> + return -EINVAL;
>   break;
>   default:
>   return -ENOENT;
> -- 
> 2.26.2
> 



Re: [PATCH V2 3/5] tools/perf: Add powerpc support for PERF_SAMPLE_WEIGHT_STRUCT

2021-03-26 Thread Athira Rajeev
On 25-Mar-2021, at 1:13 AM, Jiri Olsa  wrote:On Mon, Mar 22, 2021 at 10:57:25AM -0400, Athira Rajeev wrote:Add arch specific arch_evsel__set_sample_weight() to set the newsample type for powerpc.Add arch specific arch_perf_parse_sample_weight() to store thesample->weight values depending on the sample type applied.if the new sample type (PERF_SAMPLE_WEIGHT_STRUCT) is applied,store only the lower 32 bits to sample->weight. If sample typeis 'PERF_SAMPLE_WEIGHT', store the full 64-bit to sample->weight.Signed-off-by: Athira Rajeev ---tools/perf/arch/powerpc/util/Build   |  2 ++tools/perf/arch/powerpc/util/event.c | 32 tools/perf/arch/powerpc/util/evsel.c |  8 3 files changed, 42 insertions(+)create mode 100644 tools/perf/arch/powerpc/util/event.ccreate mode 100644 tools/perf/arch/powerpc/util/evsel.cdiff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Buildindex b7945e5a543b..8a79c4126e5b 100644--- a/tools/perf/arch/powerpc/util/Build+++ b/tools/perf/arch/powerpc/util/Build@@ -4,6 +4,8 @@ perf-y += kvm-stat.operf-y += perf_regs.operf-y += mem-events.operf-y += sym-handling.o+perf-y += evsel.o+perf-y += event.operf-$(CONFIG_DWARF) += dwarf-regs.operf-$(CONFIG_DWARF) += skip-callchain-idx.odiff --git a/tools/perf/arch/powerpc/util/event.c b/tools/perf/arch/powerpc/util/event.cnew file mode 100644index ..f49d32c2c8ae--- /dev/null+++ b/tools/perf/arch/powerpc/util/event.c@@ -0,0 +1,32 @@+// SPDX-License-Identifier: GPL-2.0+#include +#include +#include ++#include "../../../util/event.h"+#include "../../../util/synthetic-events.h"+#include "../../../util/machine.h"+#include "../../../util/tool.h"+#include "../../../util/map.h"+#include "../../../util/debug.h"nit, just #include "utils/...h" should work no?other than that, the patchset looks ok to meAcked-by: Jiri Olsa Hi Jiri, ArnaldoThanks for reviewing the patch set.I checked that, just using "utils/...h" also works.Below is the change which I verified. Since the patches are presently merged in https://git.kernel.org/pub/scm/linux/kernel/git/acme/linux.git/log/?h=tmp.perf/core, can you please suggest how can we go about this change ?diff --git a/tools/perf/arch/powerpc/util/event.c b/tools/perf/arch/powerpc/util/event.cindex 3bf441257466..c479d0a0e696 100644--- a/tools/perf/arch/powerpc/util/event.c+++ b/tools/perf/arch/powerpc/util/event.c@@ -3,12 +3,12 @@ #include  #include -#include "../../../util/event.h"-#include "../../../util/synthetic-events.h"-#include "../../../util/machine.h"-#include "../../../util/tool.h"-#include "../../../util/map.h"-#include "../../../util/debug.h"+#include "util/event.h"+#include "util/synthetic-events.h"+#include "util/machine.h"+#include "util/tool.h"+#include "util/map.h"+#include "util/debug.h" void arch_perf_parse_sample_weight(struct perf_sample *data,    const __u64 *array, u64 type)ThanksAthirathanks,jirka++void arch_perf_parse_sample_weight(struct perf_sample *data,+   const __u64 *array, u64 type)+{+	union perf_sample_weight weight;++	weight.full = *array;+	if (type & PERF_SAMPLE_WEIGHT)+		data->weight = weight.full;+	else+		data->weight = weight.var1_dw;+}++void arch_perf_synthesize_sample_weight(const struct perf_sample *data,+	__u64 *array, u64 type)+{+	*array = data->weight;++	if (type & PERF_SAMPLE_WEIGHT_STRUCT)+		*array &= 0x;+}diff --git a/tools/perf/arch/powerpc/util/evsel.c b/tools/perf/arch/powerpc/util/evsel.cnew file mode 100644index ..2f733cdc8dbb--- /dev/null+++ b/tools/perf/arch/powerpc/util/evsel.c@@ -0,0 +1,8 @@+// SPDX-License-Identifier: GPL-2.0+#include +#include "util/evsel.h"++void arch_evsel__set_sample_weight(struct evsel *evsel)+{+	evsel__set_sample_bit(evsel, WEIGHT_STRUCT);+}-- 1.8.3.1

[PATCH V2 5/5] tools/perf: Display sort dimension p_stage_cyc only on supported archs

2021-03-22 Thread Athira Rajeev
The sort dimension "p_stage_cyc" is used to represent pipeline
stage cycle information. Presently, this is used only in powerpc.
For unsupported platforms, we don't want to display it
in the perf report output columns. Hence add check in sort_dimension__add()
and skip the sort key incase it is not applicable for the particular arch.

Signed-off-by: Athira Rajeev 
---
 tools/perf/arch/powerpc/util/event.c |  7 +++
 tools/perf/util/event.h  |  1 +
 tools/perf/util/sort.c   | 19 +++
 3 files changed, 27 insertions(+)

diff --git a/tools/perf/arch/powerpc/util/event.c 
b/tools/perf/arch/powerpc/util/event.c
index 22521bc9481a..3bf441257466 100644
--- a/tools/perf/arch/powerpc/util/event.c
+++ b/tools/perf/arch/powerpc/util/event.c
@@ -44,3 +44,10 @@ const char *arch_perf_header_entry(const char *se_header)
return "Dispatch Cyc";
return se_header;
 }
+
+int arch_support_sort_key(const char *sort_key)
+{
+   if (!strcmp(sort_key, "p_stage_cyc"))
+   return 1;
+   return 0;
+}
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index e5da4a695ff2..8a62fb39e365 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -429,5 +429,6 @@ void  cpu_map_data__synthesize(struct 
perf_record_cpu_map_data *data, struct per
 void arch_perf_parse_sample_weight(struct perf_sample *data, const __u64 
*array, u64 type);
 void arch_perf_synthesize_sample_weight(const struct perf_sample *data, __u64 
*array, u64 type);
 const char *arch_perf_header_entry(const char *se_header);
+int arch_support_sort_key(const char *sort_key);
 
 #endif /* __PERF_RECORD_H */
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index d262261ad1a6..e8030778ff44 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -47,6 +47,7 @@
 inthave_ignore_callees = 0;
 enum sort_mode sort__mode = SORT_MODE__NORMAL;
 const char *dynamic_headers[] = {"local_ins_lat", "p_stage_cyc"};
+const char *arch_specific_sort_keys[] = {"p_stage_cyc"};
 
 /*
  * Replaces all occurrences of a char used with the:
@@ -1837,6 +1838,11 @@ struct sort_dimension {
int taken;
 };
 
+int __weak arch_support_sort_key(const char *sort_key __maybe_unused)
+{
+   return 0;
+}
+
 const char * __weak arch_perf_header_entry(const char *se_header)
 {
return se_header;
@@ -2773,6 +2779,19 @@ int sort_dimension__add(struct perf_hpp_list *list, 
const char *tok,
 {
unsigned int i, j;
 
+   /*
+* Check to see if there are any arch specific
+* sort dimensions not applicable for the current
+* architecture. If so, Skip that sort key since
+* we don't want to display it in the output fields.
+*/
+   for (j = 0; j < ARRAY_SIZE(arch_specific_sort_keys); j++) {
+   if (!strcmp(arch_specific_sort_keys[j], tok) &&
+   !arch_support_sort_key(tok)) {
+   return 0;
+   }
+   }
+
for (i = 0; i < ARRAY_SIZE(common_sort_dimensions); i++) {
struct sort_dimension *sd = _sort_dimensions[i];
 
-- 
1.8.3.1



[PATCH V2 0/5] powerpc/perf: Export processor pipeline stage cycles information

2021-03-22 Thread Athira Rajeev
rch specific header string for matching
sort order in patch2.
  
Athira Rajeev (5):
  powerpc/perf: Expose processor pipeline stage cycles using
PERF_SAMPLE_WEIGHT_STRUCT
  tools/perf: Add dynamic headers for perf report columns
  tools/perf: Add powerpc support for PERF_SAMPLE_WEIGHT_STRUCT
  tools/perf: Support pipeline stage cycles for powerpc
  tools/perf: Display sort dimension p_stage_cyc only on supported archs

 arch/powerpc/include/asm/perf_event_server.h |  2 +-
 arch/powerpc/perf/core-book3s.c  |  4 +-
 arch/powerpc/perf/isa207-common.c| 29 --
 arch/powerpc/perf/isa207-common.h|  6 ++-
 tools/perf/Documentation/perf-report.txt |  2 +
 tools/perf/arch/powerpc/util/Build   |  2 +
 tools/perf/arch/powerpc/util/event.c | 53 
 tools/perf/arch/powerpc/util/evsel.c |  8 
 tools/perf/util/event.h  |  3 ++
 tools/perf/util/hist.c   | 11 +++--
 tools/perf/util/hist.h   |  1 +
 tools/perf/util/session.c|  4 +-
 tools/perf/util/sort.c   | 60 +++-
 tools/perf/util/sort.h   |  2 +
 14 files changed, 174 insertions(+), 13 deletions(-)
 create mode 100644 tools/perf/arch/powerpc/util/event.c
 create mode 100644 tools/perf/arch/powerpc/util/evsel.c

-- 
1.8.3.1



[PATCH V2 4/5] tools/perf: Support pipeline stage cycles for powerpc

2021-03-22 Thread Athira Rajeev
The pipeline stage cycles details can be recorded on powerpc from
the contents of Performance Monitor Unit (PMU) registers. On
ISA v3.1 platform, sampling registers exposes the cycles spent in
different pipeline stages. Patch adds perf tools support to present
two of the cycle counter information along with memory latency (weight).

Re-use the field 'ins_lat' for storing the first pipeline stage cycle.
This is stored in 'var2_w' field of 'perf_sample_weight'.

Add a new field 'p_stage_cyc' to store the second pipeline stage cycle
which is stored in 'var3_w' field of perf_sample_weight.

Add new sort function 'Pipeline Stage Cycle' and include this in
default_mem_sort_order[]. This new sort function may be used to denote
some other pipeline stage in another architecture. So add this to
list of sort entries that can have dynamic header string.

Signed-off-by: Athira Rajeev 
---
 tools/perf/Documentation/perf-report.txt |  2 ++
 tools/perf/arch/powerpc/util/event.c | 18 --
 tools/perf/util/event.h  |  1 +
 tools/perf/util/hist.c   | 11 ---
 tools/perf/util/hist.h   |  1 +
 tools/perf/util/session.c|  4 +++-
 tools/perf/util/sort.c   | 24 ++--
 tools/perf/util/sort.h   |  2 ++
 8 files changed, 55 insertions(+), 8 deletions(-)

diff --git a/tools/perf/Documentation/perf-report.txt 
b/tools/perf/Documentation/perf-report.txt
index f546b5e9db05..563fb01a9b8d 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -112,6 +112,8 @@ OPTIONS
- ins_lat: Instruction latency in core cycles. This is the global 
instruction
  latency
- local_ins_lat: Local instruction latency version
+   - p_stage_cyc: On powerpc, this presents the number of cycles spent in a
+ pipeline stage. And currently supported only on powerpc.
 
By default, comm, dso and symbol keys are used.
(i.e. --sort comm,dso,symbol)
diff --git a/tools/perf/arch/powerpc/util/event.c 
b/tools/perf/arch/powerpc/util/event.c
index f49d32c2c8ae..22521bc9481a 100644
--- a/tools/perf/arch/powerpc/util/event.c
+++ b/tools/perf/arch/powerpc/util/event.c
@@ -18,8 +18,11 @@ void arch_perf_parse_sample_weight(struct perf_sample *data,
weight.full = *array;
if (type & PERF_SAMPLE_WEIGHT)
data->weight = weight.full;
-   else
+   else {
data->weight = weight.var1_dw;
+   data->ins_lat = weight.var2_w;
+   data->p_stage_cyc = weight.var3_w;
+   }
 }
 
 void arch_perf_synthesize_sample_weight(const struct perf_sample *data,
@@ -27,6 +30,17 @@ void arch_perf_synthesize_sample_weight(const struct 
perf_sample *data,
 {
*array = data->weight;
 
-   if (type & PERF_SAMPLE_WEIGHT_STRUCT)
+   if (type & PERF_SAMPLE_WEIGHT_STRUCT) {
*array &= 0x;
+   *array |= ((u64)data->ins_lat << 32);
+   }
+}
+
+const char *arch_perf_header_entry(const char *se_header)
+{
+   if (!strcmp(se_header, "Local INSTR Latency"))
+   return "Finish Cyc";
+   else if (!strcmp(se_header, "Pipeline Stage Cycle"))
+   return "Dispatch Cyc";
+   return se_header;
 }
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 6106a9c134c9..e5da4a695ff2 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -147,6 +147,7 @@ struct perf_sample {
u8  cpumode;
u16 misc;
u16 ins_lat;
+   u16 p_stage_cyc;
bool no_hw_idx; /* No hw_idx collected in branch_stack */
char insn[MAX_INSN];
void *raw_data;
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index c82f5fc26af8..9299ee535518 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -211,6 +211,7 @@ void hists__calc_col_len(struct hists *hists, struct 
hist_entry *h)
hists__new_col_len(hists, HISTC_MEM_BLOCKED, 10);
hists__new_col_len(hists, HISTC_LOCAL_INS_LAT, 13);
hists__new_col_len(hists, HISTC_GLOBAL_INS_LAT, 13);
+   hists__new_col_len(hists, HISTC_P_STAGE_CYC, 13);
if (symbol_conf.nanosecs)
hists__new_col_len(hists, HISTC_TIME, 16);
else
@@ -289,13 +290,14 @@ static long hist_time(unsigned long htime)
 }
 
 static void he_stat__add_period(struct he_stat *he_stat, u64 period,
-   u64 weight, u64 ins_lat)
+   u64 weight, u64 ins_lat, u64 p_stage_cyc)
 {
 
he_stat->period += period;
he_stat->weight += weight;
he_stat->nr_events  += 1;
he_stat->ins_lat+= ins_lat;
+   he_stat->p_stage_cyc+= p_stage_cyc;
 }
 
 static void he_stat__add_stat(struct he_stat *dest, stru

[PATCH V2 1/5] powerpc/perf: Expose processor pipeline stage cycles using PERF_SAMPLE_WEIGHT_STRUCT

2021-03-22 Thread Athira Rajeev
Performance Monitoring Unit (PMU) registers in powerpc provides
information on cycles elapsed between different stages in the
pipeline. This can be used for application tuning. On ISA v3.1
platform, this information is exposed by sampling registers.
Patch adds kernel support to capture two of the cycle counters
as part of perf sample using the sample type:
PERF_SAMPLE_WEIGHT_STRUCT.

The power PMU function 'get_mem_weight' currently uses 64 bit weight
field of perf_sample_data to capture memory latency. But following the
introduction of PERF_SAMPLE_WEIGHT_TYPE, weight field could contain
64-bit or 32-bit value depending on the architexture support for
PERF_SAMPLE_WEIGHT_STRUCT. Patches uses WEIGHT_STRUCT to expose the
pipeline stage cycles info. Hence update the ppmu functions to work for
64-bit and 32-bit weight values.

If the sample type is PERF_SAMPLE_WEIGHT, use the 64-bit weight field.
if the sample type is PERF_SAMPLE_WEIGHT_STRUCT, memory subsystem
latency is stored in the low 32bits of perf_sample_weight structure.
Also for CPU_FTR_ARCH_31, capture the two cycle counter information in
two 16 bit fields of perf_sample_weight structure.

Signed-off-by: Athira Rajeev 
---
 arch/powerpc/include/asm/perf_event_server.h |  2 +-
 arch/powerpc/perf/core-book3s.c  |  4 ++--
 arch/powerpc/perf/isa207-common.c| 29 +---
 arch/powerpc/perf/isa207-common.h|  6 +-
 4 files changed, 34 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/include/asm/perf_event_server.h 
b/arch/powerpc/include/asm/perf_event_server.h
index 00e7e671bb4b..112cf092d7b3 100644
--- a/arch/powerpc/include/asm/perf_event_server.h
+++ b/arch/powerpc/include/asm/perf_event_server.h
@@ -43,7 +43,7 @@ struct power_pmu {
u64 alt[]);
void(*get_mem_data_src)(union perf_mem_data_src *dsrc,
u32 flags, struct pt_regs *regs);
-   void(*get_mem_weight)(u64 *weight);
+   void(*get_mem_weight)(u64 *weight, u64 type);
unsigned long   group_constraint_mask;
unsigned long   group_constraint_val;
u64 (*bhrb_filter_map)(u64 branch_sample_type);
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 766f064f00fb..6936763246bd 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -2206,9 +2206,9 @@ static void record_and_restart(struct perf_event *event, 
unsigned long val,
ppmu->get_mem_data_src)
ppmu->get_mem_data_src(_src, ppmu->flags, 
regs);
 
-   if (event->attr.sample_type & PERF_SAMPLE_WEIGHT &&
+   if (event->attr.sample_type & PERF_SAMPLE_WEIGHT_TYPE &&
ppmu->get_mem_weight)
-   ppmu->get_mem_weight();
+   ppmu->get_mem_weight(, 
event->attr.sample_type);
 
if (perf_event_overflow(event, , regs))
power_pmu_stop(event, 0);
diff --git a/arch/powerpc/perf/isa207-common.c 
b/arch/powerpc/perf/isa207-common.c
index e4f577da33d8..5dcbdbd54598 100644
--- a/arch/powerpc/perf/isa207-common.c
+++ b/arch/powerpc/perf/isa207-common.c
@@ -284,8 +284,10 @@ void isa207_get_mem_data_src(union perf_mem_data_src 
*dsrc, u32 flags,
}
 }
 
-void isa207_get_mem_weight(u64 *weight)
+void isa207_get_mem_weight(u64 *weight, u64 type)
 {
+   union perf_sample_weight *weight_fields;
+   u64 weight_lat;
u64 mmcra = mfspr(SPRN_MMCRA);
u64 exp = MMCRA_THR_CTR_EXP(mmcra);
u64 mantissa = MMCRA_THR_CTR_MANT(mmcra);
@@ -296,9 +298,30 @@ void isa207_get_mem_weight(u64 *weight)
mantissa = P10_MMCRA_THR_CTR_MANT(mmcra);
 
if (val == 0 || val == 7)
-   *weight = 0;
+   weight_lat = 0;
else
-   *weight = mantissa << (2 * exp);
+   weight_lat = mantissa << (2 * exp);
+
+   /*
+* Use 64 bit weight field (full) if sample type is
+* WEIGHT.
+*
+* if sample type is WEIGHT_STRUCT:
+* - store memory latency in the lower 32 bits.
+* - For ISA v3.1, use remaining two 16 bit fields of
+*   perf_sample_weight to store cycle counter values
+*   from sier2.
+*/
+   weight_fields = (union perf_sample_weight *)weight;
+   if (type & PERF_SAMPLE_WEIGHT)
+   weight_fields->full = weight_lat;
+   else {
+   weight_fields->var1_dw = (u32)weight_lat;
+   if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+   weight_fields->var2_w = 
P10_SIER2_FINISH_CYC(mfspr(SPRN_SIER2));
+   weight_fields->var3_w = 
P10_SIER2_DISPATCH_CYC(mfspr(SPRN_SIER2));
+   

[PATCH V2 2/5] tools/perf: Add dynamic headers for perf report columns

2021-03-22 Thread Athira Rajeev
Currently the header string for different columns in perf report
is fixed. Some fields of perf sample could have different meaning
for different architectures than the meaning conveyed by the header
string. An example is the new field 'var2_w' of perf_sample_weight
structure. This is presently captured as 'Local INSTR Latency' in
perf mem report. But this could be used to denote a different latency
cycle in another architecture.

Introduce a weak function arch_perf_header_entry() to set
the arch specific header string for the fields which can contain dynamic
header. If the architecture do not have this function, fall back to the
default header string value.

Signed-off-by: Athira Rajeev 
---
 tools/perf/util/event.h |  1 +
 tools/perf/util/sort.c  | 19 ++-
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index f603edbbbc6f..6106a9c134c9 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -427,5 +427,6 @@ void  cpu_map_data__synthesize(struct 
perf_record_cpu_map_data *data, struct per
 
 void arch_perf_parse_sample_weight(struct perf_sample *data, const __u64 
*array, u64 type);
 void arch_perf_synthesize_sample_weight(const struct perf_sample *data, __u64 
*array, u64 type);
+const char *arch_perf_header_entry(const char *se_header);
 
 #endif /* __PERF_RECORD_H */
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 552b590485bf..eeb03e749181 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -25,6 +25,7 @@
 #include 
 #include "mem-events.h"
 #include "annotate.h"
+#include "event.h"
 #include "time-utils.h"
 #include "cgroup.h"
 #include "machine.h"
@@ -45,6 +46,7 @@
 regex_tignore_callees_regex;
 inthave_ignore_callees = 0;
 enum sort_mode sort__mode = SORT_MODE__NORMAL;
+const char *dynamic_headers[] = {"local_ins_lat"};
 
 /*
  * Replaces all occurrences of a char used with the:
@@ -1816,6 +1818,16 @@ struct sort_dimension {
int taken;
 };
 
+const char * __weak arch_perf_header_entry(const char *se_header)
+{
+   return se_header;
+}
+
+static void sort_dimension_add_dynamic_header(struct sort_dimension *sd)
+{
+   sd->entry->se_header = arch_perf_header_entry(sd->entry->se_header);
+}
+
 #define DIM(d, n, func) [d] = { .name = n, .entry = &(func) }
 
 static struct sort_dimension common_sort_dimensions[] = {
@@ -2739,7 +2751,7 @@ int sort_dimension__add(struct perf_hpp_list *list, const 
char *tok,
struct evlist *evlist,
int level)
 {
-   unsigned int i;
+   unsigned int i, j;
 
for (i = 0; i < ARRAY_SIZE(common_sort_dimensions); i++) {
struct sort_dimension *sd = _sort_dimensions[i];
@@ -2747,6 +2759,11 @@ int sort_dimension__add(struct perf_hpp_list *list, 
const char *tok,
if (strncasecmp(tok, sd->name, strlen(tok)))
continue;
 
+   for (j = 0; j < ARRAY_SIZE(dynamic_headers); j++) {
+   if (!strcmp(dynamic_headers[j], sd->name))
+   sort_dimension_add_dynamic_header(sd);
+   }
+
if (sd->entry == _parent) {
int ret = regcomp(_regex, parent_pattern, 
REG_EXTENDED);
if (ret) {
-- 
1.8.3.1



[PATCH V2 3/5] tools/perf: Add powerpc support for PERF_SAMPLE_WEIGHT_STRUCT

2021-03-22 Thread Athira Rajeev
Add arch specific arch_evsel__set_sample_weight() to set the new
sample type for powerpc.

Add arch specific arch_perf_parse_sample_weight() to store the
sample->weight values depending on the sample type applied.
if the new sample type (PERF_SAMPLE_WEIGHT_STRUCT) is applied,
store only the lower 32 bits to sample->weight. If sample type
is 'PERF_SAMPLE_WEIGHT', store the full 64-bit to sample->weight.

Signed-off-by: Athira Rajeev 
---
 tools/perf/arch/powerpc/util/Build   |  2 ++
 tools/perf/arch/powerpc/util/event.c | 32 
 tools/perf/arch/powerpc/util/evsel.c |  8 
 3 files changed, 42 insertions(+)
 create mode 100644 tools/perf/arch/powerpc/util/event.c
 create mode 100644 tools/perf/arch/powerpc/util/evsel.c

diff --git a/tools/perf/arch/powerpc/util/Build 
b/tools/perf/arch/powerpc/util/Build
index b7945e5a543b..8a79c4126e5b 100644
--- a/tools/perf/arch/powerpc/util/Build
+++ b/tools/perf/arch/powerpc/util/Build
@@ -4,6 +4,8 @@ perf-y += kvm-stat.o
 perf-y += perf_regs.o
 perf-y += mem-events.o
 perf-y += sym-handling.o
+perf-y += evsel.o
+perf-y += event.o
 
 perf-$(CONFIG_DWARF) += dwarf-regs.o
 perf-$(CONFIG_DWARF) += skip-callchain-idx.o
diff --git a/tools/perf/arch/powerpc/util/event.c 
b/tools/perf/arch/powerpc/util/event.c
new file mode 100644
index ..f49d32c2c8ae
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/event.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+#include 
+#include 
+#include 
+
+#include "../../../util/event.h"
+#include "../../../util/synthetic-events.h"
+#include "../../../util/machine.h"
+#include "../../../util/tool.h"
+#include "../../../util/map.h"
+#include "../../../util/debug.h"
+
+void arch_perf_parse_sample_weight(struct perf_sample *data,
+  const __u64 *array, u64 type)
+{
+   union perf_sample_weight weight;
+
+   weight.full = *array;
+   if (type & PERF_SAMPLE_WEIGHT)
+   data->weight = weight.full;
+   else
+   data->weight = weight.var1_dw;
+}
+
+void arch_perf_synthesize_sample_weight(const struct perf_sample *data,
+   __u64 *array, u64 type)
+{
+   *array = data->weight;
+
+   if (type & PERF_SAMPLE_WEIGHT_STRUCT)
+   *array &= 0x;
+}
diff --git a/tools/perf/arch/powerpc/util/evsel.c 
b/tools/perf/arch/powerpc/util/evsel.c
new file mode 100644
index ..2f733cdc8dbb
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/evsel.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+#include 
+#include "util/evsel.h"
+
+void arch_evsel__set_sample_weight(struct evsel *evsel)
+{
+   evsel__set_sample_bit(evsel, WEIGHT_STRUCT);
+}
-- 
1.8.3.1



Re: [PATCH 4/4] tools/perf: Support pipeline stage cycles for powerpc

2021-03-17 Thread Athira Rajeev
On 16-Mar-2021, at 4:48 AM, Jiri Olsa  wrote:On Mon, Mar 15, 2021 at 01:22:09PM +0530, Athira Rajeev wrote:SNIP++static char *setup_dynamic_sort_keys(char *str)+{+	unsigned int j;++	if (sort__mode == SORT_MODE__MEMORY)+		for (j = 0; j < ARRAY_SIZE(dynamic_sort_keys_mem); j++)+			if (arch_support_dynamic_key(dynamic_sort_keys_mem[j])) {+str = suffix_if_not_in(dynamic_sort_keys_mem[j], str);+if (str == NULL)+	return str;+			}++	return str;+}+static int __setup_sorting(struct evlist *evlist){	char *str;@@ -3050,6 +3085,12 @@ static int __setup_sorting(struct evlist *evlist)		}	}+	str = setup_dynamic_sort_keys(str);+	if (str == NULL) {+		pr_err("Not enough memory to setup dynamic sort keys");+		return -ENOMEM;+	}hum, so this is basicaly overloading the default_mem_sort_order forarchitecture, right?then I think it'd be easier just overload default_mem_sort_order directlyI was thinking more about adding extra (arch specific) loop tosort_dimension__add or somehow add arch's specific stuff tomemory_sort_dimensionsHi Jiri,Above patch was to append additional sort keys to sort order based onsort mode and architecture support. I had initially thought of defining twoorders ( say default_mem_sort_order plus mem_sort_order_pstage ). But ifnew sort keys gets added for mem mode in future, we will need to keepupdating both orders. So preferred the approach of "appending" supported sortkeys to default order.Following your thought on using "sort_dimension__add", I tried below approachwhich is easier. The new sort dimension "p_stage_cyc" is presently only supportedon powerpc. For unsupported platforms, we don't want to display itin the perf report output columns. Hence added check in sort_dimension__add()and skip the sort key incase its not applicable for particular arch.Please help to check if below approach looks fine.diff --git a/tools/perf/arch/powerpc/util/event.c b/tools/perf/arch/powerpc/util/event.cindex b80fbee83b6e..7205767d75eb 100644--- a/tools/perf/arch/powerpc/util/event.c+++ b/tools/perf/arch/powerpc/util/event.c@@ -44,3 +44,10 @@ const char *arch_perf_header_entry__add(const char *se_header) 		return "Dispatch Cyc"; 	return se_header; }++int arch_support_sort_key(const char *sort_key)+{+	if (!strcmp(sort_key, "p_stage_cyc"))+		return 1;+	return 0;+}diff --git a/tools/perf/util/event.h b/tools/perf/util/event.hindex 65f89e80916f..612a92aaaefb 100644--- a/tools/perf/util/event.h+++ b/tools/perf/util/event.h@@ -429,5 +429,6 @@ char *get_page_size_name(u64 size, char *str); void arch_perf_parse_sample_weight(struct perf_sample *data, const __u64 *array, u64 type); void arch_perf_synthesize_sample_weight(const struct perf_sample *data, __u64 *array, u64 type); const char *arch_perf_header_entry__add(const char *se_header);+int arch_support_sort_key(const char *sort_key); #endif /* __PERF_RECORD_H */diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.cindex cbb3899e7eca..d8b0b0b43a81 100644--- a/tools/perf/util/sort.c+++ b/tools/perf/util/sort.c@@ -47,6 +47,7 @@ regex_t		ignore_callees_regex; int		have_ignore_callees = 0; enum sort_mode	sort__mode = SORT_MODE__NORMAL; const char	*dynamic_headers[] = {"local_ins_lat", "p_stage_cyc"};+const char	*arch_specific_sort_keys[] = {"p_stage_cyc"}; /*  * Replaces all occurrences of a char used with the:@@ -1837,6 +1838,11 @@ struct sort_dimension { 	int			taken; };+int __weak arch_support_sort_key(const char *sort_key __maybe_unused)+{+	return 0;+}+ const char * __weak arch_perf_header_entry__add(const char *se_header) { 	return se_header;@@ -2773,6 +2779,18 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok, { 	unsigned int i, j;+	/* Check to see if there are any arch specific+	 * sort dimensions not applicable for the current+	 * architecture. If so, Skip that sort key since+	 * we don't want to display it in the output fields.+	 */+	for (j = 0; j < ARRAY_SIZE(arch_specific_sort_keys); j++) {+		if (!strcmp(arch_specific_sort_keys[j], tok) &&+!arch_support_sort_key(tok)) {+			return 0;+		}+	}+ 	for (i = 0; i < ARRAY_SIZE(common_sort_dimensions); i++) { 		struct sort_dimension *sd = _sort_dimensions[i];— 2.26.2ThanksAthirajirka

Re: [PATCH 4/4] tools/perf: Support pipeline stage cycles for powerpc

2021-03-15 Thread Athira Rajeev



> On 12-Mar-2021, at 6:26 PM, Jiri Olsa  wrote:
> 
> On Tue, Mar 09, 2021 at 09:04:00AM -0500, Athira Rajeev wrote:
>> The pipeline stage cycles details can be recorded on powerpc from
>> the contents of Performance Monitor Unit (PMU) registers. On
>> ISA v3.1 platform, sampling registers exposes the cycles spent in
>> different pipeline stages. Patch adds perf tools support to present
>> two of the cycle counter information along with memory latency (weight).
>> 
>> Re-use the field 'ins_lat' for storing the first pipeline stage cycle.
>> This is stored in 'var2_w' field of 'perf_sample_weight'.
>> 
>> Add a new field 'p_stage_cyc' to store the second pipeline stage cycle
>> which is stored in 'var3_w' field of perf_sample_weight.
>> 
>> Add new sort function 'Pipeline Stage Cycle' and include this in
>> default_mem_sort_order[]. This new sort function may be used to denote
>> some other pipeline stage in another architecture. So add this to
>> list of sort entries that can have dynamic header string.
>> 
>> Signed-off-by: Athira Rajeev 
>> ---
>> tools/perf/Documentation/perf-report.txt |  1 +
>> tools/perf/arch/powerpc/util/event.c | 18 --
>> tools/perf/util/event.h  |  1 +
>> tools/perf/util/hist.c   | 11 ---
>> tools/perf/util/hist.h   |  1 +
>> tools/perf/util/session.c|  4 +++-
>> tools/perf/util/sort.c   | 24 ++--
>> tools/perf/util/sort.h   |  2 ++
>> 8 files changed, 54 insertions(+), 8 deletions(-)
>> 
>> diff --git a/tools/perf/Documentation/perf-report.txt 
>> b/tools/perf/Documentation/perf-report.txt
>> index f546b5e9db05..9691d9c227ba 100644
>> --- a/tools/perf/Documentation/perf-report.txt
>> +++ b/tools/perf/Documentation/perf-report.txt
>> @@ -112,6 +112,7 @@ OPTIONS
>>  - ins_lat: Instruction latency in core cycles. This is the global 
>> instruction
>>latency
>>  - local_ins_lat: Local instruction latency version
>> +- p_stage_cyc: Number of cycles spent in a pipeline stage.
> 
> please specify in here that it's ppc only

Ok Sure,

> 
> SNIP
> 
>> +struct sort_entry sort_p_stage_cyc = {
>> +.se_header  = "Pipeline Stage Cycle",
>> +.se_cmp = sort__global_p_stage_cyc_cmp,
>> +.se_snprintf= hist_entry__p_stage_cyc_snprintf,
>> +.se_width_idx   = HISTC_P_STAGE_CYC,
>> +};
>> +
>> struct sort_entry sort_mem_daddr_sym = {
>>  .se_header  = "Data Symbol",
>>  .se_cmp = sort__daddr_cmp,
>> @@ -1853,6 +1872,7 @@ static void sort_dimension_add_dynamic_header(struct 
>> sort_dimension *sd)
>>  DIM(SORT_CODE_PAGE_SIZE, "code_page_size", sort_code_page_size),
>>  DIM(SORT_LOCAL_INS_LAT, "local_ins_lat", sort_local_ins_lat),
>>  DIM(SORT_GLOBAL_INS_LAT, "ins_lat", sort_global_ins_lat),
>> +DIM(SORT_P_STAGE_CYC, "p_stage_cyc", sort_p_stage_cyc),
> 
> this might be out of scope for this patch, but would it make sense
> to add arch specific sort dimension? so the specific column is
> not even visible on arch that it's not supported on
> 

Hi Jiri,

Thanks for the suggestions.

Below is an approach I came up with for adding dynamic sort key based on 
architecture support.
With this patch, perf report for mem mode will display new sort key only in 
supported archs. 
Please help to review if this approach looks good. I have created this on top 
of my current set. If this looks fine, 
I can include this in version2 patch set.

From 8ebbe6ae802d895103335899e4e60dde5e562f33 Mon Sep 17 00:00:00 2001
From: Athira Rajeev 
Date: Mon, 15 Mar 2021 02:33:28 +
Subject: [PATCH] tools/perf: Add dynamic sort dimensions for mem mode

Add dynamic sort dimensions for mem mode.

Signed-off-by: Athira Rajeev 
---
 tools/perf/arch/powerpc/util/event.c |  7 +
 tools/perf/util/event.h  |  1 +
 tools/perf/util/sort.c   | 43 +++-
 3 files changed, 50 insertions(+), 1 deletion(-)

diff --git a/tools/perf/arch/powerpc/util/event.c 
b/tools/perf/arch/powerpc/util/event.c
index b80fbee83b6e..fddfc288c415 100644
--- a/tools/perf/arch/powerpc/util/event.c
+++ b/tools/perf/arch/powerpc/util/event.c
@@ -44,3 +44,10 @@ const char *arch_perf_header_entry__add(const char 
*se_header)
return "Dispatch Cyc";
return se_header;
 }
+
+int arch_support_dynamic_key(const char *sort_key)
+{
+   if (!strcmp(sort_key, "p_stage_cyc"))
+   return 1;
+   return 0;
+}
diff --g

Re: [PATCH 2/4] tools/perf: Add dynamic headers for perf report columns

2021-03-15 Thread Athira Rajeev



> On 12-Mar-2021, at 6:27 PM, Jiri Olsa  wrote:
> 
> On Tue, Mar 09, 2021 at 09:03:58AM -0500, Athira Rajeev wrote:
>> Currently the header string for different columns in perf report
>> is fixed. Some fields of perf sample could have different meaning
>> for different architectures than the meaning conveyed by the header
>> string. An example is the new field 'var2_w' of perf_sample_weight
>> structure. This is presently captured as 'Local INSTR Latency' in
>> perf mem report. But this could be used to denote a different latency
>> cycle in another architecture.
>> 
>> Introduce a weak function arch_perf_header_entry__add() to set
>> the arch specific header string for the fields which can contain dynamic
>> header. If the architecture do not have this function, fall back to the
>> default header string value.
>> 
>> Signed-off-by: Athira Rajeev 
>> ---
>> tools/perf/util/event.h |  1 +
>> tools/perf/util/sort.c  | 19 ++-
>> 2 files changed, 19 insertions(+), 1 deletion(-)
>> 
>> diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
>> index f603edbbbc6f..89b149e2e70a 100644
>> --- a/tools/perf/util/event.h
>> +++ b/tools/perf/util/event.h
>> @@ -427,5 +427,6 @@ void  cpu_map_data__synthesize(struct 
>> perf_record_cpu_map_data *data, struct per
>> 
>> void arch_perf_parse_sample_weight(struct perf_sample *data, const __u64 
>> *array, u64 type);
>> void arch_perf_synthesize_sample_weight(const struct perf_sample *data, 
>> __u64 *array, u64 type);
>> +const char *arch_perf_header_entry__add(const char *se_header);
>> 
>> #endif /* __PERF_RECORD_H */
>> diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
>> index 0d5ad42812b9..741a6df29fa0 100644
>> --- a/tools/perf/util/sort.c
>> +++ b/tools/perf/util/sort.c
>> @@ -25,6 +25,7 @@
>> #include 
>> #include "mem-events.h"
>> #include "annotate.h"
>> +#include "event.h"
>> #include "time-utils.h"
>> #include "cgroup.h"
>> #include "machine.h"
>> @@ -45,6 +46,7 @@
>> regex_t  ignore_callees_regex;
>> int  have_ignore_callees = 0;
>> enum sort_mode   sort__mode = SORT_MODE__NORMAL;
>> +const char  *dynamic_headers[] = {"local_ins_lat"};
>> 
>> /*
>>  * Replaces all occurrences of a char used with the:
>> @@ -1816,6 +1818,16 @@ struct sort_dimension {
>>  int taken;
>> };
>> 
>> +const char * __weak arch_perf_header_entry__add(const char *se_header)
> 
> no need for the __add suffix in here
> 
> jirka
> 

Thanks Jiri for the review.

I will include this change in next version.

Thanks
Athira

>> +{
>> +return se_header;
>> +}
>> +
>> +static void sort_dimension_add_dynamic_header(struct sort_dimension *sd)
>> +{
>> +sd->entry->se_header = 
>> arch_perf_header_entry__add(sd->entry->se_header);
>> +}
>> +
>> #define DIM(d, n, func) [d] = { .name = n, .entry = &(func) }
>> 
>> static struct sort_dimension common_sort_dimensions[] = {
>> @@ -2739,11 +2751,16 @@ int sort_dimension__add(struct perf_hpp_list *list, 
>> const char *tok,
>>  struct evlist *evlist,
>>  int level)
>> {
>> -unsigned int i;
>> +unsigned int i, j;
>> 
>>  for (i = 0; i < ARRAY_SIZE(common_sort_dimensions); i++) {
>>  struct sort_dimension *sd = _sort_dimensions[i];
>> 
>> +for (j = 0; j < ARRAY_SIZE(dynamic_headers); j++) {
>> +if (!strcmp(dynamic_headers[j], sd->name))
>> +sort_dimension_add_dynamic_header(sd);
>> +}
>> +
>>  if (strncasecmp(tok, sd->name, strlen(tok)))
>>  continue;
>> 
>> -- 
>> 1.8.3.1



[PATCH 4/4] tools/perf: Support pipeline stage cycles for powerpc

2021-03-09 Thread Athira Rajeev
The pipeline stage cycles details can be recorded on powerpc from
the contents of Performance Monitor Unit (PMU) registers. On
ISA v3.1 platform, sampling registers exposes the cycles spent in
different pipeline stages. Patch adds perf tools support to present
two of the cycle counter information along with memory latency (weight).

Re-use the field 'ins_lat' for storing the first pipeline stage cycle.
This is stored in 'var2_w' field of 'perf_sample_weight'.

Add a new field 'p_stage_cyc' to store the second pipeline stage cycle
which is stored in 'var3_w' field of perf_sample_weight.

Add new sort function 'Pipeline Stage Cycle' and include this in
default_mem_sort_order[]. This new sort function may be used to denote
some other pipeline stage in another architecture. So add this to
list of sort entries that can have dynamic header string.

Signed-off-by: Athira Rajeev 
---
 tools/perf/Documentation/perf-report.txt |  1 +
 tools/perf/arch/powerpc/util/event.c | 18 --
 tools/perf/util/event.h  |  1 +
 tools/perf/util/hist.c   | 11 ---
 tools/perf/util/hist.h   |  1 +
 tools/perf/util/session.c|  4 +++-
 tools/perf/util/sort.c   | 24 ++--
 tools/perf/util/sort.h   |  2 ++
 8 files changed, 54 insertions(+), 8 deletions(-)

diff --git a/tools/perf/Documentation/perf-report.txt 
b/tools/perf/Documentation/perf-report.txt
index f546b5e9db05..9691d9c227ba 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -112,6 +112,7 @@ OPTIONS
- ins_lat: Instruction latency in core cycles. This is the global 
instruction
  latency
- local_ins_lat: Local instruction latency version
+   - p_stage_cyc: Number of cycles spent in a pipeline stage.
 
By default, comm, dso and symbol keys are used.
(i.e. --sort comm,dso,symbol)
diff --git a/tools/perf/arch/powerpc/util/event.c 
b/tools/perf/arch/powerpc/util/event.c
index f49d32c2c8ae..b80fbee83b6e 100644
--- a/tools/perf/arch/powerpc/util/event.c
+++ b/tools/perf/arch/powerpc/util/event.c
@@ -18,8 +18,11 @@ void arch_perf_parse_sample_weight(struct perf_sample *data,
weight.full = *array;
if (type & PERF_SAMPLE_WEIGHT)
data->weight = weight.full;
-   else
+   else {
data->weight = weight.var1_dw;
+   data->ins_lat = weight.var2_w;
+   data->p_stage_cyc = weight.var3_w;
+   }
 }
 
 void arch_perf_synthesize_sample_weight(const struct perf_sample *data,
@@ -27,6 +30,17 @@ void arch_perf_synthesize_sample_weight(const struct 
perf_sample *data,
 {
*array = data->weight;
 
-   if (type & PERF_SAMPLE_WEIGHT_STRUCT)
+   if (type & PERF_SAMPLE_WEIGHT_STRUCT) {
*array &= 0x;
+   *array |= ((u64)data->ins_lat << 32);
+   }
+}
+
+const char *arch_perf_header_entry__add(const char *se_header)
+{
+   if (!strcmp(se_header, "Local INSTR Latency"))
+   return "Finish Cyc";
+   else if (!strcmp(se_header, "Pipeline Stage Cycle"))
+   return "Dispatch Cyc";
+   return se_header;
 }
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 89b149e2e70a..65f89e80916f 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -147,6 +147,7 @@ struct perf_sample {
u8  cpumode;
u16 misc;
u16 ins_lat;
+   u16 p_stage_cyc;
bool no_hw_idx; /* No hw_idx collected in branch_stack */
char insn[MAX_INSN];
void *raw_data;
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index c82f5fc26af8..9299ee535518 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -211,6 +211,7 @@ void hists__calc_col_len(struct hists *hists, struct 
hist_entry *h)
hists__new_col_len(hists, HISTC_MEM_BLOCKED, 10);
hists__new_col_len(hists, HISTC_LOCAL_INS_LAT, 13);
hists__new_col_len(hists, HISTC_GLOBAL_INS_LAT, 13);
+   hists__new_col_len(hists, HISTC_P_STAGE_CYC, 13);
if (symbol_conf.nanosecs)
hists__new_col_len(hists, HISTC_TIME, 16);
else
@@ -289,13 +290,14 @@ static long hist_time(unsigned long htime)
 }
 
 static void he_stat__add_period(struct he_stat *he_stat, u64 period,
-   u64 weight, u64 ins_lat)
+   u64 weight, u64 ins_lat, u64 p_stage_cyc)
 {
 
he_stat->period += period;
he_stat->weight += weight;
he_stat->nr_events  += 1;
he_stat->ins_lat+= ins_lat;
+   he_stat->p_stage_cyc+= p_stage_cyc;
 }
 
 static void he_stat__add_stat(struct he_stat *dest, struct he_stat *src)
@@ -308,6 +310,7 @@ static void he_stat__add_stat(struct he_st

[PATCH 3/4] tools/perf: Add powerpc support for PERF_SAMPLE_WEIGHT_STRUCT

2021-03-09 Thread Athira Rajeev
Add arch specific arch_evsel__set_sample_weight() to set the new
sample type for powerpc.

Add arch specific arch_perf_parse_sample_weight() to store the
sample->weight values depending on the sample type applied.
if the new sample type (PERF_SAMPLE_WEIGHT_STRUCT) is applied,
store only the lower 32 bits to sample->weight. If sample type
is 'PERF_SAMPLE_WEIGHT', store the full 64-bit to sample->weight.

Signed-off-by: Athira Rajeev 
---
 tools/perf/arch/powerpc/util/Build   |  2 ++
 tools/perf/arch/powerpc/util/event.c | 32 
 tools/perf/arch/powerpc/util/evsel.c |  8 
 3 files changed, 42 insertions(+)
 create mode 100644 tools/perf/arch/powerpc/util/event.c
 create mode 100644 tools/perf/arch/powerpc/util/evsel.c

diff --git a/tools/perf/arch/powerpc/util/Build 
b/tools/perf/arch/powerpc/util/Build
index b7945e5a543b..8a79c4126e5b 100644
--- a/tools/perf/arch/powerpc/util/Build
+++ b/tools/perf/arch/powerpc/util/Build
@@ -4,6 +4,8 @@ perf-y += kvm-stat.o
 perf-y += perf_regs.o
 perf-y += mem-events.o
 perf-y += sym-handling.o
+perf-y += evsel.o
+perf-y += event.o
 
 perf-$(CONFIG_DWARF) += dwarf-regs.o
 perf-$(CONFIG_DWARF) += skip-callchain-idx.o
diff --git a/tools/perf/arch/powerpc/util/event.c 
b/tools/perf/arch/powerpc/util/event.c
new file mode 100644
index ..f49d32c2c8ae
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/event.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+#include 
+#include 
+#include 
+
+#include "../../../util/event.h"
+#include "../../../util/synthetic-events.h"
+#include "../../../util/machine.h"
+#include "../../../util/tool.h"
+#include "../../../util/map.h"
+#include "../../../util/debug.h"
+
+void arch_perf_parse_sample_weight(struct perf_sample *data,
+  const __u64 *array, u64 type)
+{
+   union perf_sample_weight weight;
+
+   weight.full = *array;
+   if (type & PERF_SAMPLE_WEIGHT)
+   data->weight = weight.full;
+   else
+   data->weight = weight.var1_dw;
+}
+
+void arch_perf_synthesize_sample_weight(const struct perf_sample *data,
+   __u64 *array, u64 type)
+{
+   *array = data->weight;
+
+   if (type & PERF_SAMPLE_WEIGHT_STRUCT)
+   *array &= 0x;
+}
diff --git a/tools/perf/arch/powerpc/util/evsel.c 
b/tools/perf/arch/powerpc/util/evsel.c
new file mode 100644
index ..2f733cdc8dbb
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/evsel.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+#include 
+#include "util/evsel.h"
+
+void arch_evsel__set_sample_weight(struct evsel *evsel)
+{
+   evsel__set_sample_bit(evsel, WEIGHT_STRUCT);
+}
-- 
1.8.3.1



[PATCH 2/4] tools/perf: Add dynamic headers for perf report columns

2021-03-09 Thread Athira Rajeev
Currently the header string for different columns in perf report
is fixed. Some fields of perf sample could have different meaning
for different architectures than the meaning conveyed by the header
string. An example is the new field 'var2_w' of perf_sample_weight
structure. This is presently captured as 'Local INSTR Latency' in
perf mem report. But this could be used to denote a different latency
cycle in another architecture.

Introduce a weak function arch_perf_header_entry__add() to set
the arch specific header string for the fields which can contain dynamic
header. If the architecture do not have this function, fall back to the
default header string value.

Signed-off-by: Athira Rajeev 
---
 tools/perf/util/event.h |  1 +
 tools/perf/util/sort.c  | 19 ++-
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index f603edbbbc6f..89b149e2e70a 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -427,5 +427,6 @@ void  cpu_map_data__synthesize(struct 
perf_record_cpu_map_data *data, struct per
 
 void arch_perf_parse_sample_weight(struct perf_sample *data, const __u64 
*array, u64 type);
 void arch_perf_synthesize_sample_weight(const struct perf_sample *data, __u64 
*array, u64 type);
+const char *arch_perf_header_entry__add(const char *se_header);
 
 #endif /* __PERF_RECORD_H */
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 0d5ad42812b9..741a6df29fa0 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -25,6 +25,7 @@
 #include 
 #include "mem-events.h"
 #include "annotate.h"
+#include "event.h"
 #include "time-utils.h"
 #include "cgroup.h"
 #include "machine.h"
@@ -45,6 +46,7 @@
 regex_tignore_callees_regex;
 inthave_ignore_callees = 0;
 enum sort_mode sort__mode = SORT_MODE__NORMAL;
+const char *dynamic_headers[] = {"local_ins_lat"};
 
 /*
  * Replaces all occurrences of a char used with the:
@@ -1816,6 +1818,16 @@ struct sort_dimension {
int taken;
 };
 
+const char * __weak arch_perf_header_entry__add(const char *se_header)
+{
+   return se_header;
+}
+
+static void sort_dimension_add_dynamic_header(struct sort_dimension *sd)
+{
+   sd->entry->se_header = 
arch_perf_header_entry__add(sd->entry->se_header);
+}
+
 #define DIM(d, n, func) [d] = { .name = n, .entry = &(func) }
 
 static struct sort_dimension common_sort_dimensions[] = {
@@ -2739,11 +2751,16 @@ int sort_dimension__add(struct perf_hpp_list *list, 
const char *tok,
struct evlist *evlist,
int level)
 {
-   unsigned int i;
+   unsigned int i, j;
 
for (i = 0; i < ARRAY_SIZE(common_sort_dimensions); i++) {
struct sort_dimension *sd = _sort_dimensions[i];
 
+   for (j = 0; j < ARRAY_SIZE(dynamic_headers); j++) {
+   if (!strcmp(dynamic_headers[j], sd->name))
+   sort_dimension_add_dynamic_header(sd);
+   }
+
if (strncasecmp(tok, sd->name, strlen(tok)))
continue;
 
-- 
1.8.3.1



[PATCH 1/4] powerpc/perf: Expose processor pipeline stage cycles using PERF_SAMPLE_WEIGHT_STRUCT

2021-03-09 Thread Athira Rajeev
Performance Monitoring Unit (PMU) registers in powerpc provides
information on cycles elapsed between different stages in the
pipeline. This can be used for application tuning. On ISA v3.1
platform, this information is exposed by sampling registers.
Patch adds kernel support to capture two of the cycle counters
as part of perf sample using the sample type:
PERF_SAMPLE_WEIGHT_STRUCT.

The power PMU function 'get_mem_weight' currently uses 64 bit weight
field of perf_sample_data to capture memory latency. But following the
introduction of PERF_SAMPLE_WEIGHT_TYPE, weight field could contain
64-bit or 32-bit value depending on the architexture support for
PERF_SAMPLE_WEIGHT_STRUCT. Patches uses WEIGHT_STRUCT to expose the
pipeline stage cycles info. Hence update the ppmu functions to work for
64-bit and 32-bit weight values.

If the sample type is PERF_SAMPLE_WEIGHT, use the 64-bit weight field.
if the sample type is PERF_SAMPLE_WEIGHT_STRUCT, memory subsystem
latency is stored in the low 32bits of perf_sample_weight structure.
Also for CPU_FTR_ARCH_31, capture the two cycle counter information in
two 16 bit fields of perf_sample_weight structure.

Signed-off-by: Athira Rajeev 
---
 arch/powerpc/include/asm/perf_event_server.h |  2 +-
 arch/powerpc/perf/core-book3s.c  |  4 ++--
 arch/powerpc/perf/isa207-common.c| 29 +---
 arch/powerpc/perf/isa207-common.h|  6 +-
 4 files changed, 34 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/include/asm/perf_event_server.h 
b/arch/powerpc/include/asm/perf_event_server.h
index 00e7e671bb4b..112cf092d7b3 100644
--- a/arch/powerpc/include/asm/perf_event_server.h
+++ b/arch/powerpc/include/asm/perf_event_server.h
@@ -43,7 +43,7 @@ struct power_pmu {
u64 alt[]);
void(*get_mem_data_src)(union perf_mem_data_src *dsrc,
u32 flags, struct pt_regs *regs);
-   void(*get_mem_weight)(u64 *weight);
+   void(*get_mem_weight)(u64 *weight, u64 type);
unsigned long   group_constraint_mask;
unsigned long   group_constraint_val;
u64 (*bhrb_filter_map)(u64 branch_sample_type);
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 6817331e22ff..57ff2494880c 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -2206,9 +2206,9 @@ static void record_and_restart(struct perf_event *event, 
unsigned long val,
ppmu->get_mem_data_src)
ppmu->get_mem_data_src(_src, ppmu->flags, 
regs);
 
-   if (event->attr.sample_type & PERF_SAMPLE_WEIGHT &&
+   if (event->attr.sample_type & PERF_SAMPLE_WEIGHT_TYPE &&
ppmu->get_mem_weight)
-   ppmu->get_mem_weight();
+   ppmu->get_mem_weight(, 
event->attr.sample_type);
 
if (perf_event_overflow(event, , regs))
power_pmu_stop(event, 0);
diff --git a/arch/powerpc/perf/isa207-common.c 
b/arch/powerpc/perf/isa207-common.c
index e4f577da33d8..5dcbdbd54598 100644
--- a/arch/powerpc/perf/isa207-common.c
+++ b/arch/powerpc/perf/isa207-common.c
@@ -284,8 +284,10 @@ void isa207_get_mem_data_src(union perf_mem_data_src 
*dsrc, u32 flags,
}
 }
 
-void isa207_get_mem_weight(u64 *weight)
+void isa207_get_mem_weight(u64 *weight, u64 type)
 {
+   union perf_sample_weight *weight_fields;
+   u64 weight_lat;
u64 mmcra = mfspr(SPRN_MMCRA);
u64 exp = MMCRA_THR_CTR_EXP(mmcra);
u64 mantissa = MMCRA_THR_CTR_MANT(mmcra);
@@ -296,9 +298,30 @@ void isa207_get_mem_weight(u64 *weight)
mantissa = P10_MMCRA_THR_CTR_MANT(mmcra);
 
if (val == 0 || val == 7)
-   *weight = 0;
+   weight_lat = 0;
else
-   *weight = mantissa << (2 * exp);
+   weight_lat = mantissa << (2 * exp);
+
+   /*
+* Use 64 bit weight field (full) if sample type is
+* WEIGHT.
+*
+* if sample type is WEIGHT_STRUCT:
+* - store memory latency in the lower 32 bits.
+* - For ISA v3.1, use remaining two 16 bit fields of
+*   perf_sample_weight to store cycle counter values
+*   from sier2.
+*/
+   weight_fields = (union perf_sample_weight *)weight;
+   if (type & PERF_SAMPLE_WEIGHT)
+   weight_fields->full = weight_lat;
+   else {
+   weight_fields->var1_dw = (u32)weight_lat;
+   if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+   weight_fields->var2_w = 
P10_SIER2_FINISH_CYC(mfspr(SPRN_SIER2));
+   weight_fields->var3_w = 
P10_SIER2_DISPATCH_CYC(mfspr(SPRN_SIER2));
+   

[PATCH 0/4] powerpc/perf: Export processor pipeline stage cycles information

2021-03-09 Thread Athira Rajeev
Performance Monitoring Unit (PMU) registers in powerpc exports
number of cycles elapsed between different stages in the pipeline.
Example, sampling registers in ISA v3.1.

This patchset implements kernel and perf tools support to expose
these pipeline stage cycles using the sample type PERF_SAMPLE_WEIGHT_TYPE.

Patch 1/4 adds kernel side support to store the cycle counter
values as part of 'var2_w' and 'var3_w' fields of perf_sample_weight
structure.

Patch 2/4 adds support to make the perf report column header
strings as dynamic.
Patch 3/4 adds powerpc support in perf tools for PERF_SAMPLE_WEIGHT_STRUCT
in sample type: PERF_SAMPLE_WEIGHT_TYPE.
Patch 4/4 adds support to present pipeline stage cycles as part of
mem-mode.

Sample output on powerpc:

# perf mem record ls
# perf mem report

# To display the perf.data header info, please use --header/--header-only 
options.
#
#
# Total Lost Samples: 0
#
# Samples: 11  of event 'cpu/mem-loads/'
# Total weight : 1332
# Sort order   : 
local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked,blocked,local_ins_lat,stall_cyc
#
# Overhead   Samples  Local Weight  Memory access Symbol
  Shared Object Data Symbol 
   Data ObjectSnoop TLB access  Locked  
Blocked Finish Cyc Dispatch Cyc 
#         
..    
.  .  
  ..  ..  ..  .  
.
#
44.14% 1  588   L1 hit[k] 
rcu_nmi_exit[kernel.vmlinux]  [k] 0xc007ffdd21b0
 [unknown]  N/A   N/A 
No   N/A7  5
22.22% 1  296   L1 hit[k] 
copypage_power7 [kernel.vmlinux]  [k] 0xc000ff6a1780
 [unknown]  N/A   N/A 
No   N/A2933
 6.98% 1  93L1 hit[.] _dl_addr  
  libc-2.31.so  [.] 0x7fff86fa5058  
   libc-2.31.so   N/A   N/A No   
N/A7  1
 6.61% 1  88L2 hit[.] 
new_do_writelibc-2.31.so  [.] _IO_2_1_stdout_+0x0   
 libc-2.31.so   N/A   N/A 
No   N/A84 1
 5.93% 1  79L1 hit[k] 
printk_nmi_exit [kernel.vmlinux]  [k] 0xc006085df6b0
 [unknown]  N/A   N/A 
No   N/A7  1
 4.05% 1  54L2 hit[.] 
__alloc_dir libc-2.31.so  [.] 0x7fffdb70a640
 [stack]N/A   N/A 
No   N/A18 1
 3.60% 1  48L1 hit[.] _init 
  ls[.] 0x00016ca82118  
   [heap] N/A   N/A No   
N/A7  6
 2.40% 1  32L1 hit[k] desc_read 
  [kernel.vmlinux]  [k] _printk_rb_static_descs+0x1ea10 
   [kernel.vmlinux].data  N/A   N/A No   
N/A7  1
 1.65% 1  22L2 hit[k] 
perf_iterate_ctx.constprop.139  [kernel.vmlinux]  [k] 0xc0064d79e8a8
 [unknown]  N/A   N/A 
No   N/A16 1
 1.58% 1  21L1 hit[k] 
perf_event_interrupt[kernel.vmlinux]  [k] 0xc006085df6b0
 [unknown]  N/A   N/A 
No   N/A7  1
 0.83% 1  11L1 hit[k] 
perf_event_exec [kernel.vmlinux]  [k] 0xc007ffdd3288
 [unknown]  N/A   N/A 
No   N/A7  4


Athira Rajeev (4):
  powerpc/perf: Expose processor pipeline stage cycles using
PERF_SAMPLE_WEIGHT_STRUCT
  tools/perf: Add dynamic headers for perf report columns
  tools/perf: Add powerpc support for PERF_SAMPLE_WEIGHT_STRUCT
  tools/perf: Support

Re: [PATCH] powerpc/perf: prevent mixed EBB and non-EBB events

2021-03-04 Thread Athira Rajeev



> On 24-Feb-2021, at 5:51 PM, Thadeu Lima de Souza Cascardo 
>  wrote:
> 
> EBB events must be under exclusive groups, so there is no mix of EBB and
> non-EBB events on the same PMU. This requirement worked fine as perf core
> would not allow other pinned events to be scheduled together with exclusive
> events.
> 
> This assumption was broken by commit 1908dc911792 ("perf: Tweak
> perf_event_attr::exclusive semantics").
> 
> After that, the test cpu_event_pinned_vs_ebb_test started succeeding after
> read_events, but worse, the task would not have given access to PMC1, so
> when it tried to write to it, it was killed with "illegal instruction".
> 
> Preventing mixed EBB and non-EBB events from being add to the same PMU will
> just revert to the previous behavior and the test will succeed.


Hi,

Thanks for checking this. I checked your patch which is fixing “check_excludes” 
to make
sure all events must agree on EBB. But in the PMU group constraints, we already 
have check for
EBB events. This is in arch/powerpc/perf/isa207-common.c ( 
isa207_get_constraint function ).

<<>>
mask  |= CNST_EBB_VAL(ebb);
value |= CNST_EBB_MASK;
<<>>

But the above setting for mask and value is interchanged. We actually need to 
fix here.

Below patch should fix this:

diff --git a/arch/powerpc/perf/isa207-common.c 
b/arch/powerpc/perf/isa207-common.c
index e4f577da33d8..8b5eeb6fb2fb 100644
--- a/arch/powerpc/perf/isa207-common.c
+++ b/arch/powerpc/perf/isa207-common.c
@@ -447,8 +447,8 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, 
unsigned long *valp,
 * EBB events are pinned & exclusive, so this should never actually
 * hit, but we leave it as a fallback in case.
 */
-   mask  |= CNST_EBB_VAL(ebb);
-   value |= CNST_EBB_MASK;
+   mask  |= CNST_EBB_MASK;
+   value |= CNST_EBB_VAL(ebb);
 
*maskp = mask;
*valp = value;


Can you please try with this patch.

Thanks
Athira


> 
> Fixes: 1908dc911792 (perf: Tweak perf_event_attr::exclusive semantics)
> Signed-off-by: Thadeu Lima de Souza Cascardo 
> ---
> arch/powerpc/perf/core-book3s.c | 20 
> 1 file changed, 16 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
> index 43599e671d38..d767f7944f85 100644
> --- a/arch/powerpc/perf/core-book3s.c
> +++ b/arch/powerpc/perf/core-book3s.c
> @@ -1010,9 +1010,25 @@ static int check_excludes(struct perf_event **ctrs, 
> unsigned int cflags[],
> int n_prev, int n_new)
> {
>   int eu = 0, ek = 0, eh = 0;
> + bool ebb = false;
>   int i, n, first;
>   struct perf_event *event;
> 
> + n = n_prev + n_new;
> + if (n <= 1)
> + return 0;
> +
> + first = 1;
> + for (i = 0; i < n; ++i) {
> + event = ctrs[i];
> + if (first) {
> + ebb = is_ebb_event(event);
> + first = 0;
> + } else if (is_ebb_event(event) != ebb) {
> + return -EAGAIN;
> + }
> + }
> +
>   /*
>* If the PMU we're on supports per event exclude settings then we
>* don't need to do any of this logic. NB. This assumes no PMU has both
> @@ -1021,10 +1037,6 @@ static int check_excludes(struct perf_event **ctrs, 
> unsigned int cflags[],
>   if (ppmu->flags & PPMU_ARCH_207S)
>   return 0;
> 
> - n = n_prev + n_new;
> - if (n <= 1)
> - return 0;
> -
>   first = 1;
>   for (i = 0; i < n; ++i) {
>   if (cflags[i] & PPMU_LIMITED_PMC_OK) {
> -- 
> 2.27.0
> 



[PATCH] powerpc/perf: Fix sampled instruction type for larx/stcx

2021-03-04 Thread Athira Rajeev
Sampled Instruction Event Register (SIER) field [46:48]
identifies the sampled instruction type. ISA v3.1 says value
of 0b111 for this field as reserved, but in POWER10 it denotes
LARX/STCX type which will hopefully be fixed in ISA v3.1 update.

Patch fixes the functions to handle type value 7 for
CPU_FTR_ARCH_31.

Fixes: a64e697cef23 ("powerpc/perf: power10 Performance Monitoring support")
Signed-off-by: Athira Rajeev 
---
 arch/powerpc/perf/isa207-common.c | 30 +++---
 arch/powerpc/perf/isa207-common.h |  1 +
 2 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/perf/isa207-common.c 
b/arch/powerpc/perf/isa207-common.c
index e4f577da33d8..754f904d8d69 100644
--- a/arch/powerpc/perf/isa207-common.c
+++ b/arch/powerpc/perf/isa207-common.c
@@ -266,6 +266,8 @@ void isa207_get_mem_data_src(union perf_mem_data_src *dsrc, 
u32 flags,
u32 sub_idx;
u64 sier;
u64 val;
+   u64 mmcra = mfspr(SPRN_MMCRA);
+   u32 op_type;
 
/* Skip if no SIER support */
if (!(flags & PPMU_HAS_SIER)) {
@@ -275,12 +277,34 @@ void isa207_get_mem_data_src(union perf_mem_data_src 
*dsrc, u32 flags,
 
sier = mfspr(SPRN_SIER);
val = (sier & ISA207_SIER_TYPE_MASK) >> ISA207_SIER_TYPE_SHIFT;
-   if (val == 1 || val == 2) {
+   if (val == 1 || val == 2 || (val == 7 && 
cpu_has_feature(CPU_FTR_ARCH_31))) {
idx = (sier & ISA207_SIER_LDST_MASK) >> ISA207_SIER_LDST_SHIFT;
sub_idx = (sier & ISA207_SIER_DATA_SRC_MASK) >> 
ISA207_SIER_DATA_SRC_SHIFT;
 
dsrc->val = isa207_find_source(idx, sub_idx);
-   dsrc->val |= (val == 1) ? P(OP, LOAD) : P(OP, STORE);
+   if (val == 7) {
+   /*
+* Type 0b111 denotes either larx or stcx instruction. 
Use the
+* MMCRA sampling bits [57:59] along with the type value
+* to determine the exact instruction type. If the 
sampling
+* criteria is neither load or store, set the type as 
default
+* to NA.
+*/
+   op_type = (mmcra >> MMCRA_SAMP_ELIG_SHIFT) & 
MMCRA_SAMP_ELIG_MASK;
+   switch (op_type) {
+   case 5:
+   dsrc->val |= P(OP, LOAD);
+   break;
+   case 7:
+   dsrc->val |= P(OP, STORE);
+   break;
+   default:
+   dsrc->val |= P(OP, NA);
+   break;
+   }
+   } else {
+   dsrc->val |= (val == 1) ? P(OP, LOAD) : P(OP, STORE);
+   }
}
 }
 
@@ -295,7 +319,7 @@ void isa207_get_mem_weight(u64 *weight)
if (cpu_has_feature(CPU_FTR_ARCH_31))
mantissa = P10_MMCRA_THR_CTR_MANT(mmcra);
 
-   if (val == 0 || val == 7)
+   if (val == 0 || (val == 7 && !cpu_has_feature(CPU_FTR_ARCH_31)))
*weight = 0;
else
*weight = mantissa << (2 * exp);
diff --git a/arch/powerpc/perf/isa207-common.h 
b/arch/powerpc/perf/isa207-common.h
index 1af0e8c97ac7..7b0242efe4b9 100644
--- a/arch/powerpc/perf/isa207-common.h
+++ b/arch/powerpc/perf/isa207-common.h
@@ -220,6 +220,7 @@
 /* Bits in MMCRA for PowerISA v2.07 */
 #define MMCRA_SAMP_MODE_SHIFT  1
 #define MMCRA_SAMP_ELIG_SHIFT  4
+#define MMCRA_SAMP_ELIG_MASK   7
 #define MMCRA_THR_CTL_SHIFT8
 #define MMCRA_THR_SEL_SHIFT16
 #define MMCRA_THR_CMP_SHIFT32
-- 
1.8.3.1



[PATCH] powerpc/perf: Fix the threshold event selection for memory events in power10

2021-03-03 Thread Athira Rajeev
Memory events (mem-loads and mem-stores) currently use the threshold
event selection as issue to finish. Power10 supports issue to complete
as part of thresholding which is more appropriate for mem-loads and
mem-stores. Hence fix the event code for memory events to use issue
to complete.

Fixes: a64e697cef23 ("powerpc/perf: power10 Performance Monitoring support")
Signed-off-by: Athira Rajeev 
---
 arch/powerpc/perf/power10-events-list.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/perf/power10-events-list.h 
b/arch/powerpc/perf/power10-events-list.h
index e45dafe818ed..93be7197d250 100644
--- a/arch/powerpc/perf/power10-events-list.h
+++ b/arch/powerpc/perf/power10-events-list.h
@@ -75,5 +75,5 @@
  * thresh end (TE)
  */
 
-EVENT(MEM_LOADS,   0x34340401e0);
-EVENT(MEM_STORES,  0x343c0401e0);
+EVENT(MEM_LOADS,   0x35340401e0);
+EVENT(MEM_STORES,  0x353c0401e0);
-- 
1.8.3.1



Re: [PATCH v3 1/2] powerpc/perf: Use PVR rather than oprofile field to determine CPU version

2021-03-03 Thread Athira Rajeev


> On 01-Mar-2021, at 5:39 PM, Christophe Leroy  
> wrote:
> 
> From: Rashmica Gupta 
> 
> Currently the perf CPU backend drivers detect what CPU they're on using
> cur_cpu_spec->oprofile_cpu_type.
> 
> Although that works, it's a bit crufty to be using oprofile related fields,
> especially seeing as oprofile is more or less unused these days.
> 
> It also means perf is reliant on the fragile logic in setup_cpu_spec()
> which detects when we're using a logical PVR and copies back the PMU
> related fields from the raw CPU entry. So lets check the PVR directly.
> 
> Suggested-by: Michael Ellerman 
> Signed-off-by: Rashmica Gupta 
> Reviewed-by: Madhavan Srinivasan 
> [chleroy: Added power10 and fixed checkpatch issues]
> Signed-off-by: Christophe Leroy 

Reviewed-and-tested-by: Athira Rajeev mailto:atraj...@linux.vnet.ibm.com>>

Thanks
Athira
> ---
> arch/powerpc/perf/e500-pmu.c| 9 +
> arch/powerpc/perf/e6500-pmu.c   | 5 +++--
> arch/powerpc/perf/hv-24x7.c | 6 +++---
> arch/powerpc/perf/mpc7450-pmu.c | 5 +++--
> arch/powerpc/perf/power10-pmu.c | 6 ++
> arch/powerpc/perf/power5+-pmu.c | 6 +++---
> arch/powerpc/perf/power5-pmu.c  | 5 +++--
> arch/powerpc/perf/power6-pmu.c  | 5 +++--
> arch/powerpc/perf/power7-pmu.c  | 7 ---
> arch/powerpc/perf/power8-pmu.c  | 5 +++--
> arch/powerpc/perf/power9-pmu.c  | 4 +---
> arch/powerpc/perf/ppc970-pmu.c  | 7 ---
> 12 files changed, 37 insertions(+), 33 deletions(-)
> 
> diff --git a/arch/powerpc/perf/e500-pmu.c b/arch/powerpc/perf/e500-pmu.c
> index a59c33bed32a..e3e1a68eb1d5 100644
> --- a/arch/powerpc/perf/e500-pmu.c
> +++ b/arch/powerpc/perf/e500-pmu.c
> @@ -118,12 +118,13 @@ static struct fsl_emb_pmu e500_pmu = {
> 
> static int init_e500_pmu(void)
> {
> - if (!cur_cpu_spec->oprofile_cpu_type)
> - return -ENODEV;
> + unsigned int pvr = mfspr(SPRN_PVR);
> 
> - if (!strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/e500mc"))
> + /* ec500mc */
> + if (PVR_VER(pvr) == PVR_VER_E500MC || PVR_VER(pvr) == PVR_VER_E5500)
>   num_events = 256;
> - else if (strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/e500"))
> + /* e500 */
> + else if (PVR_VER(pvr) != PVR_VER_E500V1 && PVR_VER(pvr) != 
> PVR_VER_E500V2)
>   return -ENODEV;
> 
>   return register_fsl_emb_pmu(_pmu);
> diff --git a/arch/powerpc/perf/e6500-pmu.c b/arch/powerpc/perf/e6500-pmu.c
> index 44ad65da82ed..bd779a2338f8 100644
> --- a/arch/powerpc/perf/e6500-pmu.c
> +++ b/arch/powerpc/perf/e6500-pmu.c
> @@ -107,8 +107,9 @@ static struct fsl_emb_pmu e6500_pmu = {
> 
> static int init_e6500_pmu(void)
> {
> - if (!cur_cpu_spec->oprofile_cpu_type ||
> - strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/e6500"))
> + unsigned int pvr = mfspr(SPRN_PVR);
> +
> + if (PVR_VER(pvr) != PVR_VER_E6500)
>   return -ENODEV;
> 
>   return register_fsl_emb_pmu(_pmu);
> diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
> index e5eb33255066..f3f2472fa1c6 100644
> --- a/arch/powerpc/perf/hv-24x7.c
> +++ b/arch/powerpc/perf/hv-24x7.c
> @@ -1718,16 +1718,16 @@ static int hv_24x7_init(void)
> {
>   int r;
>   unsigned long hret;
> + unsigned int pvr = mfspr(SPRN_PVR);
>   struct hv_perf_caps caps;
> 
>   if (!firmware_has_feature(FW_FEATURE_LPAR)) {
>   pr_debug("not a virtualized system, not enabling\n");
>   return -ENODEV;
> - } else if (!cur_cpu_spec->oprofile_cpu_type)
> - return -ENODEV;
> + }
> 
>   /* POWER8 only supports v1, while POWER9 only supports v2. */
> - if (!strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power8"))
> + if (PVR_VER(pvr) == PVR_POWER8)
>   interface_version = 1;
>   else {
>   interface_version = 2;
> diff --git a/arch/powerpc/perf/mpc7450-pmu.c b/arch/powerpc/perf/mpc7450-pmu.c
> index e39b15b79a83..552d51a925d3 100644
> --- a/arch/powerpc/perf/mpc7450-pmu.c
> +++ b/arch/powerpc/perf/mpc7450-pmu.c
> @@ -417,8 +417,9 @@ struct power_pmu mpc7450_pmu = {
> 
> static int __init init_mpc7450_pmu(void)
> {
> - if (!cur_cpu_spec->oprofile_cpu_type ||
> - strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc/7450"))
> + unsigned int pvr = mfspr(SPRN_PVR);
> +
> + if (PVR_VER(pvr) != PVR_7450)
>   return -ENODEV;
> 
>   return register_power_pmu(_pmu);
> diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c
> index a901c1348cad..d1395844a329 100644
> --- a/arc

Re: [PATCH] perf bench numa: Fix the condition checks for max number of numa nodes

2021-03-01 Thread Athira Rajeev
On 26-Feb-2021, at 2:28 PM, Srikar Dronamraju  wrote:* Athira Rajeev  [2021-02-25 11:50:02]:In systems having higher node numbers available like node255, perf numa bench will fail with SIGABORT.<<>>perf: bench/numa.c:1416: init: Assertion `!(g->p.nr_nodes > 64 || g->p.nr_nodes < 0)' failed.Aborted (core dumped)<<>>Looks good to me.Reviewed-by: Srikar Dronamraju Thanks Srikar for reviewing the patch.Athira.-- Thanks and RegardsSrikar Dronamraju

Re: [PATCH V2] powerpc/perf: Fix handling of privilege level checks in perf interrupt context

2021-02-28 Thread Athira Rajeev



> On 26-Feb-2021, at 3:05 PM, Peter Zijlstra  wrote:
> 
> On Thu, Feb 25, 2021 at 05:10:39AM -0500, Athira Rajeev wrote:
>> diff --git a/arch/powerpc/perf/core-book3s.c 
>> b/arch/powerpc/perf/core-book3s.c
>> index 4b4319d8..c8be44c 100644
>> --- a/arch/powerpc/perf/core-book3s.c
>> +++ b/arch/powerpc/perf/core-book3s.c
>> @@ -222,7 +222,7 @@ static inline void perf_get_data_addr(struct perf_event 
>> *event, struct pt_regs *
>>  if (!(mmcra & MMCRA_SAMPLE_ENABLE) || sdar_valid)
>>  *addrp = mfspr(SPRN_SDAR);
>> 
>> -if (is_kernel_addr(mfspr(SPRN_SDAR)) && perf_allow_kernel(>attr) 
>> != 0)
>> +if (is_kernel_addr(mfspr(SPRN_SDAR)) && event->attr.exclude_kernel)
>>  *addrp = 0;
>> }
>> 
>> @@ -507,7 +507,7 @@ static void power_pmu_bhrb_read(struct perf_event 
>> *event, struct cpu_hw_events *
>>   * addresses, hence include a check before filtering 
>> code
>>   */
>>  if (!(ppmu->flags & PPMU_ARCH_31) &&
>> -is_kernel_addr(addr) && 
>> perf_allow_kernel(>attr) != 0)
>> +is_kernel_addr(addr) && event->attr.exclude_kernel)
>>  continue;
>> 
>>  /* Branches are read most recent first (ie. mfbhrb 0 is
> 
> Acked-by: Peter Zijlstra (Intel) 


Thanks Peter for reviewing the patch.

Athira.

[PATCH] perf bench numa: Fix the condition checks for max number of numa nodes

2021-02-25 Thread Athira Rajeev
In systems having higher node numbers available like node
255, perf numa bench will fail with SIGABORT.

<<>>
perf: bench/numa.c:1416: init: Assertion `!(g->p.nr_nodes > 64 || g->p.nr_nodes 
< 0)' failed.
Aborted (core dumped)
<<>>

Snippet from 'numactl -H' below on a powerpc system where the highest
node number available is 255.

available: 6 nodes (0,8,252-255)
node 0 cpus: 
node 0 size: 519587 MB
node 0 free: 516659 MB
node 8 cpus: 
node 8 size: 523607 MB
node 8 free: 486757 MB
node 252 cpus:
node 252 size: 0 MB
node 252 free: 0 MB
node 253 cpus:
node 253 size: 0 MB
node 253 free: 0 MB
node 254 cpus:
node 254 size: 0 MB
node 254 free: 0 MB
node 255 cpus:
node 255 size: 0 MB
node 255 free: 0 MB
node distances:
node   0   8  252  253  254  255

Note:  expands to actual cpu list in the original output.
These nodes 252-255 are to represent the memory on GPUs and are valid
nodes.

The perf numa bench init code has a condition check to see if the number
of numa nodes (nr_nodes) exceeds MAX_NR_NODES. The value of MAX_NR_NODES
defined in perf code is 64. And the 'nr_nodes' is the value from
numa_max_node() which represents the highest node number available in the
system. In some systems where we could have numa node 255, this condition
check fails and results in SIGABORT.

The numa benchmark uses static value of MAX_NR_NODES in the code to
represent size of two numa node arrays and node bitmask used for setting
memory policy. Patch adds a fix to dynamically allocate size for the
two arrays and bitmask value based on the node numbers available in the
system. With the fix, perf numa benchmark will work with node configuration
on any system and thus removes the static MAX_NR_NODES value.

Signed-off-by: Athira Rajeev 
---
 tools/perf/bench/numa.c | 42 +-
 1 file changed, 29 insertions(+), 13 deletions(-)

diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
index 11726ec..20b87e2 100644
--- a/tools/perf/bench/numa.c
+++ b/tools/perf/bench/numa.c
@@ -344,18 +344,22 @@ static void mempol_restore(void)
 
 static void bind_to_memnode(int node)
 {
-   unsigned long nodemask;
+   struct bitmask *node_mask;
int ret;
 
if (node == NUMA_NO_NODE)
return;
 
-   BUG_ON(g->p.nr_nodes > (int)sizeof(nodemask)*8);
-   nodemask = 1L << node;
+   node_mask = numa_allocate_nodemask();
+   BUG_ON(!node_mask);
 
-   ret = set_mempolicy(MPOL_BIND, , sizeof(nodemask)*8);
-   dprintf("binding to node %d, mask: %016lx => %d\n", node, nodemask, 
ret);
+   numa_bitmask_clearall(node_mask);
+   numa_bitmask_setbit(node_mask, node);
 
+   ret = set_mempolicy(MPOL_BIND, node_mask->maskp, node_mask->size + 1);
+   dprintf("binding to node %d, mask: %016lx => %d\n", node, 
*node_mask->maskp, ret);
+
+   numa_bitmask_free(node_mask);
BUG_ON(ret);
 }
 
@@ -876,8 +880,6 @@ static void update_curr_cpu(int task_nr, unsigned long 
bytes_worked)
prctl(0, bytes_worked);
 }
 
-#define MAX_NR_NODES   64
-
 /*
  * Count the number of nodes a process's threads
  * are spread out on.
@@ -888,10 +890,15 @@ static void update_curr_cpu(int task_nr, unsigned long 
bytes_worked)
  */
 static int count_process_nodes(int process_nr)
 {
-   char node_present[MAX_NR_NODES] = { 0, };
+   char *node_present;
int nodes;
int n, t;
 
+   node_present = (char *)malloc(g->p.nr_nodes * sizeof(char));
+   BUG_ON(!node_present);
+   for (nodes = 0; nodes < g->p.nr_nodes; nodes++)
+   node_present[nodes] = 0;
+
for (t = 0; t < g->p.nr_threads; t++) {
struct thread_data *td;
int task_nr;
@@ -901,17 +908,20 @@ static int count_process_nodes(int process_nr)
td = g->threads + task_nr;
 
node = numa_node_of_cpu(td->curr_cpu);
-   if (node < 0) /* curr_cpu was likely still -1 */
+   if (node < 0) /* curr_cpu was likely still -1 */ {
+   free(node_present);
return 0;
+   }
 
node_present[node] = 1;
}
 
nodes = 0;
 
-   for (n = 0; n < MAX_NR_NODES; n++)
+   for (n = 0; n < g->p.nr_nodes; n++)
nodes += node_present[n];
 
+   free(node_present);
return nodes;
 }
 
@@ -980,7 +990,7 @@ static void calc_convergence(double runtime_ns_max, double 
*convergence)
 {
unsigned int loops_done_min, loops_done_max;
int process_groups;
-   int nodes[MAX_NR_NODES];
+   int *nodes;
int distance;
int nr_min;
int nr_max;
@@ -994,6 +1004,8 @@ static void calc_convergence(double runtime_ns_max, double 
*convergence)
if (!g->p.show_convergence && !g->p.measure_convergence)
return;
 

[PATCH V2] powerpc/perf: Fix handling of privilege level checks in perf interrupt context

2021-02-25 Thread Athira Rajeev
Running "perf mem record" in powerpc platforms with selinux enabled
resulted in soft lockup's. Below call-trace was seen in the logs:

CPU: 58 PID: 3751 Comm: sssd_nss Not tainted 5.11.0-rc7+ #2
NIP:  c0dff3d4 LR: c0dff3d0 CTR: 
REGS: c07fffab7d60 TRAP: 0100   Not tainted  (5.11.0-rc7+)
<<>>
NIP [c0dff3d4] _raw_spin_lock_irqsave+0x94/0x120
LR [c0dff3d0] _raw_spin_lock_irqsave+0x90/0x120
Call Trace:
[cfd471a0] [cfd47260] 0xcfd47260 (unreliable)
[cfd471e0] [c0b5fbbc] skb_queue_tail+0x3c/0x90
[cfd47220] [c0296edc] audit_log_end+0x6c/0x180
[cfd47260] [c06a3f20] common_lsm_audit+0xb0/0xe0
[cfd472a0] [c066c664] slow_avc_audit+0xa4/0x110
[cfd47320] [c066cff4] avc_has_perm+0x1c4/0x260
[cfd47430] [c066e064] selinux_perf_event_open+0x74/0xd0
[cfd47450] [c0669888] security_perf_event_open+0x68/0xc0
[cfd47490] [c013d788] record_and_restart+0x6e8/0x7f0
[cfd476c0] [c013dabc] perf_event_interrupt+0x22c/0x560
[cfd477d0] [c002d0fc] performance_monitor_exception0x4c/0x60
[cfd477f0] [c000b378] 
performance_monitor_common_virt+0x1c8/0x1d0
interrupt: f00 at _raw_spin_lock_irqsave+0x38/0x120
NIP:  c0dff378 LR: c0b5fbbc CTR: c07d47f0
REGS: cfd47860 TRAP: 0f00   Not tainted  (5.11.0-rc7+)
<<>>
NIP [c0dff378] _raw_spin_lock_irqsave+0x38/0x120
LR [c0b5fbbc] skb_queue_tail+0x3c/0x90
interrupt: f00
[cfd47b00] [0038] 0x38 (unreliable)
[cfd47b40] [caae6200] 0xcaae6200
[cfd47b80] [c0296edc] audit_log_end+0x6c/0x180
[cfd47bc0] [c029f494] audit_log_exit+0x344/0xf80
[cfd47d10] [c02a2b00] __audit_syscall_exit+0x2c0/0x320
[cfd47d60] [c0032878] do_syscall_trace_leave+0x148/0x200
[cfd47da0] [c003d5b4] syscall_exit_prepare+0x324/0x390
[cfd47e10] [c000d76c] system_call_common+0xfc/0x27c

The above trace shows that while the CPU was handling a performance
monitor exception, there was a call to "security_perf_event_open"
function. In powerpc core-book3s, this function is called from
'perf_allow_kernel' check during recording of data address in the sample
via perf_get_data_addr().

Commit da97e18458fb ("perf_event: Add support for LSM and SELinux checks")
introduced security enhancements to perf. As part of this commit, the new
security hook for perf_event_open was added in all places where perf
paranoid check was previously used. In powerpc core-book3s code, originally
had paranoid checks in 'perf_get_data_addr' and 'power_pmu_bhrb_read'. So
'perf_paranoid_kernel' checks were replaced with 'perf_allow_kernel' in
these pmu helper functions as well.

The intention of paranoid checks in core-book3s was to verify privilege
access before capturing some of the sample data. Along with paranoid
checks, 'perf_allow_kernel' also does a 'security_perf_event_open'. Since
these functions are accessed while recording sample, we end up in calling
selinux_perf_event_open in PMI context. Some of the security functions
use spinlock like sidtab_sid2str_put(). If a perf interrupt hits under
a spin lock and if we end up in calling selinux hook functions in PMI
handler, this could cause a dead lock.

Since the purpose of this security hook is to control access to
perf_event_open, it is not right to call this in interrupt context.
The paranoid checks in powerpc core-book3s were done at interrupt
time which is also not correct.
Reference commits:
Commit cd1231d7035f ("powerpc/perf: Prevent kernel address leak via
perf_get_data_addr()")
Commit bb19af816025 ("powerpc/perf: Prevent kernel address leak to
userspace via BHRB buffer")

We only allow creation of events that has already passed the privilege
checks in perf_event_open. So these paranoid checks are not needed at
event time. As a fix, patch uses 'event->attr.exclude_kernel' check
to prevent exposing kernel address for userspace only sampling.

Suggested-by: Michael Ellerman 
Signed-off-by: Athira Rajeev 
---
Changes in v2:
- Addressed review comments from Ondrej Mosnacek and Peter Zijlstra.
  Changed the approach to use 'event->attr.exclude_kernel'
  check to prevent exposing kernel address for userspace only
  sampling as suggested by Ondrej Mosnacek.

 arch/powerpc/perf/core-book3s.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 4b4319d8..c8be44c 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -222,7 +222,7 @@ static inline void perf_get_data_addr(struct perf_event 
*event, struct pt_regs *
if (!(mmcra & MMCRA_SAMPLE_ENABLE) || sdar_valid)
   

Re: [PATCH] powerpc/perf: Fix handling of privilege level checks in perf interrupt context

2021-02-24 Thread Athira Rajeev



> On 23-Feb-2021, at 6:24 PM, Michael Ellerman  wrote:
> 
> Peter Zijlstra  writes:
>> On Tue, Feb 23, 2021 at 01:31:49AM -0500, Athira Rajeev wrote:
>>> Running "perf mem record" in powerpc platforms with selinux enabled
>>> resulted in soft lockup's. Below call-trace was seen in the logs:
> ...
>>> 
>>> Since the purpose of this security hook is to control access to
>>> perf_event_open, it is not right to call this in interrupt context.
>>> But in case of powerpc PMU, we need the privilege checks for specific
>>> samples from branch history ring buffer and sampling register values.
>> 
>> I'm confused... why would you need those checks at event time? Either
>> the event has perf_event_attr::exclude_kernel and it then isn't allowed
>> to expose kernel addresses, or it doesn't and it is.
> 
> Well one of us is confused that's for sure ^_^
> 
> I missed/forgot that we had that logic in open.
> 
> I think the reason we got here is that in the past we didn't have the
> event in the low-level routines where we want to check,
> power_pmu_bhrb_read() and perf_get_data_addr(), so we hacked in a
> perf_paranoid_kernel() check. Which was wrong.
> 
> Then Joel's patch plumbed the event through and switched those paranoid
> checks to perf_allow_kernel().
> 
> Anyway, we'll just switch those to exclude_kernel checks.
> 
>> There should never be an event-time question of permission like this. If
>> you allow creation of an event, you're allowing the data it generates.
> 
> Ack.

Thanks for all the reviews. I will send a V2 with using 
'event->attr.exclude_kernel' in the checks.

Athira 
> 
> cheers


[PATCH] powerpc/perf: Fix handling of privilege level checks in perf interrupt context

2021-02-22 Thread Athira Rajeev
Running "perf mem record" in powerpc platforms with selinux enabled
resulted in soft lockup's. Below call-trace was seen in the logs:

CPU: 58 PID: 3751 Comm: sssd_nss Not tainted 5.11.0-rc7+ #2
NIP:  c0dff3d4 LR: c0dff3d0 CTR: 
REGS: c07fffab7d60 TRAP: 0100   Not tainted  (5.11.0-rc7+)
<<>>
NIP [c0dff3d4] _raw_spin_lock_irqsave+0x94/0x120
LR [c0dff3d0] _raw_spin_lock_irqsave+0x90/0x120
Call Trace:
[cfd471a0] [cfd47260] 0xcfd47260 (unreliable)
[cfd471e0] [c0b5fbbc] skb_queue_tail+0x3c/0x90
[cfd47220] [c0296edc] audit_log_end+0x6c/0x180
[cfd47260] [c06a3f20] common_lsm_audit+0xb0/0xe0
[cfd472a0] [c066c664] slow_avc_audit+0xa4/0x110
[cfd47320] [c066cff4] avc_has_perm+0x1c4/0x260
[cfd47430] [c066e064] selinux_perf_event_open+0x74/0xd0
[cfd47450] [c0669888] security_perf_event_open+0x68/0xc0
[cfd47490] [c013d788] record_and_restart+0x6e8/0x7f0
[cfd476c0] [c013dabc] perf_event_interrupt+0x22c/0x560
[cfd477d0] [c002d0fc] performance_monitor_exception+0x4c/0x60
[cfd477f0] [c000b378] 
performance_monitor_common_virt+0x1c8/0x1d0
interrupt: f00 at _raw_spin_lock_irqsave+0x38/0x120
NIP:  c0dff378 LR: c0b5fbbc CTR: c07d47f0
REGS: cfd47860 TRAP: 0f00   Not tainted  (5.11.0-rc7+)
<<>>
NIP [c0dff378] _raw_spin_lock_irqsave+0x38/0x120
LR [c0b5fbbc] skb_queue_tail+0x3c/0x90
interrupt: f00
[cfd47b00] [0038] 0x38 (unreliable)
[cfd47b40] [caae6200] 0xcaae6200
[cfd47b80] [c0296edc] audit_log_end+0x6c/0x180
[cfd47bc0] [c029f494] audit_log_exit+0x344/0xf80
[cfd47d10] [c02a2b00] __audit_syscall_exit+0x2c0/0x320
[cfd47d60] [c0032878] do_syscall_trace_leave+0x148/0x200
[cfd47da0] [c003d5b4] syscall_exit_prepare+0x324/0x390
[cfd47e10] [c000d76c] system_call_common+0xfc/0x27c

The above trace shows that while the CPU was handling a performance
monitor exception, there was a call to "security_perf_event_open"
function. In powerpc core-book3s, this function is called from
'perf_allow_kernel' check during recording of data address in the sample
via perf_get_data_addr().

Commit da97e18458fb ("perf_event: Add support for LSM and SELinux checks")
introduced security enhancements to perf. As part of this commit, the new
security hook for perf_event_open was added in all places where perf
paranoid check was previously used. In powerpc core-book3s code, originally
had paranoid checks in 'perf_get_data_addr' and 'power_pmu_bhrb_read'. So
'perf_paranoid_kernel' checks were replaced with 'perf_allow_kernel' in
these pmu helper functions as well.

The intention of paranoid checks in core-book3s is to verify privilege
access before capturing some of the sample data. Along with paranoid
checks, 'perf_allow_kernel' also does a 'security_perf_event_open'. Since
these functions are accessed while recording sample, we end up in calling
selinux_perf_event_open in PMI context. Some of the security functions
use spinlock like sidtab_sid2str_put(). If a perf interrupt hits under
a spin lock and if we end up in calling selinux hook functions in PMI
handler, this could cause a dead lock.

Since the purpose of this security hook is to control access to
perf_event_open, it is not right to call this in interrupt context.
But in case of powerpc PMU, we need the privilege checks for specific
samples from branch history ring buffer and sampling register values.
Reference commits:
Commit cd1231d7035f ("powerpc/perf: Prevent kernel address leak via
perf_get_data_addr()")
Commit bb19af816025 ("powerpc/perf: Prevent kernel address leak to
userspace via BHRB buffer")

As a fix, patch caches 'perf_allow_kernel' value in event_init in
'pmu_private' field of perf_event. The cached value is used in the
PMI code path.

Suggested-by: Michael Ellerman 
Signed-off-by: Athira Rajeev 
---
 arch/powerpc/perf/core-book3s.c | 16 ++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 4b4319d8..9e9f67f 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -189,6 +189,11 @@ static inline unsigned long perf_ip_adjust(struct pt_regs 
*regs)
return 0;
 }
 
+static bool event_allow_kernel(struct perf_event *event)
+{
+   return (bool)event->pmu_private;
+}
+
 /*
  * The user wants a data address recorded.
  * If we're not doing instruction sampling, give them the SDAR
@@ -222,7 +227,7 @@ static inline void perf_get_data_addr(struct perf_event 
*event, struct pt_regs *
if (!(mmcra & MMCRA_SAMPLE_ENABLE) || sdar_valid)
  

Re: [PATCH] tools/perf: Fix powerpc gap between kernel end and module start

2021-02-11 Thread Athira Rajeev



> On 09-Feb-2021, at 6:17 PM, Arnaldo Carvalho de Melo  wrote:
> 
> Em Wed, Feb 03, 2021 at 12:31:48PM -0300, Arnaldo Carvalho de Melo escreveu:
>> Em Tue, Feb 02, 2021 at 04:02:36PM +0530, Athira Rajeev escreveu:
>>> 
>>> 
>>>On 18-Jan-2021, at 3:51 PM, kajoljain  wrote:
>>> 
>>> 
>>> 
>>>On 1/12/21 3:08 PM, Jiri Olsa wrote:
>>> 
>>>On Mon, Dec 28, 2020 at 09:14:14PM -0500, Athira Rajeev wrote:
>>> 
>>>SNIP
>>> 
>>> 
>>>c2799370 b backtrace_flag
>>>c2799378 B radix_tree_node_cachep
>>>c2799380 B __bss_stop
>>>c27a B _end
>>>c0080389 t icmp_checkentry  [ip_tables]
>>>c00803890038 t ipt_alloc_initial_table  [ip_tables]
>>>c00803890468 T ipt_do_table [ip_tables]
>>>c00803890de8 T ipt_unregister_table_pre_exit
>>> [ip_tables]
>>>...
>>> 
>>>Perf calls function symbols__fixup_end() which sets the end of
>>>symbol
>>>to 0xc0080389, which is the next address and this is the
>>>start
>>>address of first module (icmp_checkentry in above) which will 
>>> make
>>>the
>>>huge symbol size of 0x8010f.
>>> 
>>>After symbols__fixup_end:
>>>symbols__fixup_end: sym->name: _end, sym->start:
>>>0xc27a,
>>>sym->end: 0xc0080389
>>> 
>>>On powerpc, kernel text segment is located at 0xc000
>>>whereas the modules are located at very high memory addresses,
>>>0xc0080xxx. Since the gap between end of kernel text
>>>segment
>>>and beginning of first module's address is high, histogram
>>>allocation
>>>using calloc fails.
>>> 
>>>Fix this by detecting the kernel's last symbol and limiting
>>>the range of last kernel symbol to pagesize.
>>> 
>>> 
>>>Patch looks good to me.
>>> 
>>>Tested-By: Kajol Jain
>>> 
>>>Thanks,
>>>Kajol Jain
>>> 
>>> 
>>>Signed-off-by: Athira Rajeev
>>> 
>>> 
>>>I can't test, but since the same approach works for arm and s390,
>>>this also looks ok
>>> 
>>>Acked-by: Jiri Olsa 
>>> 
>>>thanks,
>>>jirka
>>> 
>>> 
>>> Hi Arnaldo,
>>> 
>>> Can you please help review this patch and merge if this looks good..
>> 
>> Thanks, collected the Tested-by from Kajol and the Acked-by from Jiri
>> and applied to my local tree for testing, then up to my perf/core
>> branch.
> 
> Had to apply this on top.
> 
> - Arnaldo
> 
> commit 0f000f9c89182950cd3500226729977251529364
> Author: Arnaldo Carvalho de Melo 
> Date:   Tue Feb 9 09:41:21 2021 -0300
> 
>perf powerpc: Fix printf conversion specifier for IP addresses
> 
>We need to use "%#" PRIx64 for u64 values, not "%lx", fixing this build
>problem on powerpc 32-bit:
> 
>  7213.69 ubuntu:18.04-x-powerpc: FAIL powerpc-linux-gnu-gcc 
> (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0
>arch/powerpc/util/machine.c: In function 'arch__symbols__fixup_end':
>arch/powerpc/util/machine.c:23:12: error: format '%lx' expects 
> argument of type 'long unsigned int', but argument 6 has type 'u64 {aka long 
> long unsigned int}' [-Werror=format=]
>  pr_debug4("%s sym:%s end:%#lx\n", __func__, p->name, p->end);
>^
>/git/linux/tools/perf/util/debug.h:18:21: note: in definition of macro 
> 'pr_fmt'
> #define pr_fmt(fmt) fmt
> ^~~
>/git/linux/tools/perf/util/debug.h:33:29: note: in expansion of macro 
> 'pr_debugN'
> #define pr_debug4(fmt, ...) pr_debugN(4, pr_fmt(fmt), ##__VA_ARGS__)
> ^
>/git/linux/tools/perf/util/debug.h:33:42: note: in expansion of macro 
> 'pr_fmt'
> #define pr_debug4(fmt, ...) pr_debugN(4, pr_fmt(fmt), ##__VA_ARGS__)
>  ^~~~~~
>arch/powerpc/util/machine.c:

[PATCH V2] powerpc/perf: Record counter overflow always if SAMPLE_IP is unset

2021-02-05 Thread Athira Rajeev
While sampling for marked events, currently we record the sample only
if the SIAR valid bit of Sampled Instruction Event Register (SIER) is
set. SIAR_VALID bit is used for fetching the instruction address from
Sampled Instruction Address Register(SIAR). But there are some usecases,
where the user is interested only in the PMU stats at each counter
overflow and the exact IP of the overflow event is not required.
Dropping SIAR invalid samples will fail to record some of the counter
overflows in such cases.

Example of such usecase is dumping the PMU stats (event counts)
after some regular amount of instructions/events from the userspace
(ex: via ptrace). Here counter overflow is indicated to userspace via
signal handler, and captured by monitoring and enabling I/O
signaling on the event file descriptor. In these cases, we expect to
get sample/overflow indication after each specified sample_period.

Perf event attribute will not have PERF_SAMPLE_IP set in the
sample_type if exact IP of the overflow event is not requested. So
while profiling if SAMPLE_IP is not set, just record the counter overflow
irrespective of SIAR_VALID check.

Suggested-by: Michael Ellerman 
Signed-off-by: Athira Rajeev 
---
Changes in v2:
-- Changed the approach to include PERF_SAMPLE_IP
   condition while checking siar_valid as Suggested by
   Michael Ellerman.

 arch/powerpc/perf/core-book3s.c | 19 +++
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 28206b1fe172..0ddbe33798ce 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -2149,7 +2149,17 @@ static void record_and_restart(struct perf_event *event, 
unsigned long val,
left += period;
if (left <= 0)
left = period;
-   record = siar_valid(regs);
+
+   /*
+* If address is not requested in the sample
+* via PERF_SAMPLE_IP, just record that sample
+* irrespective of SIAR valid check.
+*/
+   if (event->attr.sample_type & PERF_SAMPLE_IP)
+   record = siar_valid(regs);
+   else
+   record = 1;
+
event->hw.last_period = event->hw.sample_period;
}
if (left < 0x8000LL)
@@ -2167,9 +2177,10 @@ static void record_and_restart(struct perf_event *event, 
unsigned long val,
 * MMCR2. Check attr.exclude_kernel and address to drop the sample in
 * these cases.
 */
-   if (event->attr.exclude_kernel && record)
-   if (is_kernel_addr(mfspr(SPRN_SIAR)))
-   record = 0;
+   if (event->attr.exclude_kernel &&
+   (event->attr.sample_type & PERF_SAMPLE_IP) &&
+   is_kernel_addr(mfspr(SPRN_SIAR)))
+   record = 0;
 
/*
 * Finally record data if requested.
-- 
1.8.3.1



Re: [PATCH] powerpc/perf: Record counter overflow always if SAMPLE_IP is unset

2021-02-04 Thread Athira Rajeev



> On 04-Feb-2021, at 8:25 AM, Michael Ellerman  wrote:
> 
> Athira Rajeev  writes:
>> While sampling for marked events, currently we record the sample only
>> if the SIAR valid bit of Sampled Instruction Event Register (SIER) is
>> set. SIAR_VALID bit is used for fetching the instruction address from
>> Sampled Instruction Address Register(SIAR). But there are some usecases,
>> where the user is interested only in the PMU stats at each counter
>> overflow and the exact IP of the overflow event is not required.
>> Dropping SIAR invalid samples will fail to record some of the counter
>> overflows in such cases.
>> 
>> Example of such usecase is dumping the PMU stats (event counts)
>> after some regular amount of instructions/events from the userspace
>> (ex: via ptrace). Here counter overflow is indicated to userspace via
>> signal handler, and captured by monitoring and enabling I/O
>> signaling on the event file descriptor. In these cases, we expect to
>> get sample/overflow indication after each specified sample_period.
>> 
>> Perf event attribute will not have PERF_SAMPLE_IP set in the
>> sample_type if exact IP of the overflow event is not requested. So
>> while profiling if SAMPLE_IP is not set, just record the counter overflow
>> irrespective of SIAR_VALID check.
>> 
>> Signed-off-by: Athira Rajeev 
>> ---
>> arch/powerpc/perf/core-book3s.c | 10 --
>> 1 file changed, 8 insertions(+), 2 deletions(-)
>> 
>> diff --git a/arch/powerpc/perf/core-book3s.c 
>> b/arch/powerpc/perf/core-book3s.c
>> index 28206b1fe172..bb4828a05e4d 100644
>> --- a/arch/powerpc/perf/core-book3s.c
>> +++ b/arch/powerpc/perf/core-book3s.c
>> @@ -2166,10 +2166,16 @@ static void record_and_restart(struct perf_event 
>> *event, unsigned long val,
>>   * address even when freeze on supervisor state (kernel) is set in
>>   * MMCR2. Check attr.exclude_kernel and address to drop the sample in
>>   * these cases.
>> + *
>> + * If address is not requested in the sample
>> + * via PERF_SAMPLE_IP, just record that sample
>> + * irrespective of SIAR valid check.
>>   */
>> -if (event->attr.exclude_kernel && record)
>> -if (is_kernel_addr(mfspr(SPRN_SIAR)))
>> +if (event->attr.exclude_kernel && record) {
>> +if (is_kernel_addr(mfspr(SPRN_SIAR)) && 
>> (event->attr.sample_type & PERF_SAMPLE_IP))
>>  record = 0;
>> +} else if (!record && !(event->attr.sample_type & PERF_SAMPLE_IP))
>> +record = 1;
> 
> This seems wrong, you're assuming that record was set previously to
> = siar_valid(), but it may be that record is still 0 from the
> initialisation and we weren't going to record.
> 
> Don't we need something more like this?

Hi Michael,

Thanks for checking the patch and sharing the suggestion.

Yes, the below change looks good and tested with my scenario. 
I will send a V2 with new change.

Thanks
Athira
> 
> diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
> index 9fd06010e8b6..e4e8a017d339 100644
> --- a/arch/powerpc/perf/core-book3s.c
> +++ b/arch/powerpc/perf/core-book3s.c
> @@ -2136,7 +2136,12 @@ static void record_and_restart(struct perf_event 
> *event, unsigned long val,
>   left += period;
>   if (left <= 0)
>   left = period;
> - record = siar_valid(regs);
> +
> + if (event->attr.sample_type & PERF_SAMPLE_IP)
> + record = siar_valid(regs);
> + else
> + record = 1;
> +
>   event->hw.last_period = event->hw.sample_period;
>   }
>   if (left < 0x8000LL)
> @@ -2154,9 +2159,10 @@ static void record_and_restart(struct perf_event 
> *event, unsigned long val,
>* MMCR2. Check attr.exclude_kernel and address to drop the sample in
>* these cases.
>*/
> - if (event->attr.exclude_kernel && record)
> - if (is_kernel_addr(mfspr(SPRN_SIAR)))
> - record = 0;
> + if (event->attr.exclude_kernel &&
> + (event->attr.sample_type & PERF_SAMPLE_IP) &&
> + is_kernel_addr(mfspr(SPRN_SIAR)))
> + record = 0;
> 
>   /*
>* Finally record data if requested.
> 
> 
> 
> cheers



Re: [PATCH 3/3] tools/perf: Add perf tools support to expose Performance Monitor Counter SPRs as part of extended regs

2021-02-04 Thread Athira Rajeev



> On 03-Feb-2021, at 9:55 PM, Arnaldo Carvalho de Melo  wrote:
> 
> Em Wed, Feb 03, 2021 at 01:55:37AM -0500, Athira Rajeev escreveu:
>> To enable presenting of Performance Monitor Counter Registers
>> (PMC1 to PMC6) as part of extended regsiters, patch adds these
>> to sample_reg_mask in the tool side (to use with -I? option).
>> 
>> Simplified the PERF_REG_PMU_MASK_300/31 definition. Excluded the
>> unsupported SPRs (MMCR3, SIER2, SIER3) from extended mask value for
>> CPU_FTR_ARCH_300.
> 
> Applied just 3/3, the tooling part, to my local branch, please holler if
> I should wait a bit more.
> 
> - Arnaldo
> 

Thanks Arnaldo for taking the tool side changes.

Athira.

>> Signed-off-by: Athira Rajeev 
>> ---
>> tools/arch/powerpc/include/uapi/asm/perf_regs.h | 28 
>> +++--
>> tools/perf/arch/powerpc/include/perf_regs.h |  6 ++
>> tools/perf/arch/powerpc/util/perf_regs.c|  6 ++
>> 3 files changed, 34 insertions(+), 6 deletions(-)
>> 
>> diff --git a/tools/arch/powerpc/include/uapi/asm/perf_regs.h 
>> b/tools/arch/powerpc/include/uapi/asm/perf_regs.h
>> index bdf5f10f8b9f..578b3ee86105 100644
>> --- a/tools/arch/powerpc/include/uapi/asm/perf_regs.h
>> +++ b/tools/arch/powerpc/include/uapi/asm/perf_regs.h
>> @@ -55,17 +55,33 @@ enum perf_event_powerpc_regs {
>>  PERF_REG_POWERPC_MMCR3,
>>  PERF_REG_POWERPC_SIER2,
>>  PERF_REG_POWERPC_SIER3,
>> +PERF_REG_POWERPC_PMC1,
>> +PERF_REG_POWERPC_PMC2,
>> +PERF_REG_POWERPC_PMC3,
>> +PERF_REG_POWERPC_PMC4,
>> +PERF_REG_POWERPC_PMC5,
>> +PERF_REG_POWERPC_PMC6,
>>  /* Max regs without the extended regs */
>>  PERF_REG_POWERPC_MAX = PERF_REG_POWERPC_MMCRA + 1,
>> };
>> 
>> #define PERF_REG_PMU_MASK((1ULL << PERF_REG_POWERPC_MAX) - 1)
>> 
>> -/* PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_300 */
>> -#define PERF_REG_PMU_MASK_300   (((1ULL << (PERF_REG_POWERPC_MMCR2 + 1)) - 
>> 1) - PERF_REG_PMU_MASK)
>> -/* PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_31 */
>> -#define PERF_REG_PMU_MASK_31   (((1ULL << (PERF_REG_POWERPC_SIER3 + 1)) - 
>> 1) - PERF_REG_PMU_MASK)
>> +/* Exclude MMCR3, SIER2, SIER3 for CPU_FTR_ARCH_300 */
>> +#define PERF_EXCLUDE_REG_EXT_300(7ULL << PERF_REG_POWERPC_MMCR3)
>> 
>> -#define PERF_REG_MAX_ISA_300   (PERF_REG_POWERPC_MMCR2 + 1)
>> -#define PERF_REG_MAX_ISA_31(PERF_REG_POWERPC_SIER3 + 1)
>> +/*
>> + * PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_300
>> + * includes 9 SPRS from MMCR0 to PMC6 excluding the
>> + * unsupported SPRS in PERF_EXCLUDE_REG_EXT_300.
>> + */
>> +#define PERF_REG_PMU_MASK_300   ((0xfffULL << PERF_REG_POWERPC_MMCR0) - 
>> PERF_EXCLUDE_REG_EXT_300)
>> +
>> +/*
>> + * PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_31
>> + * includes 12 SPRs from MMCR0 to PMC6.
>> + */
>> +#define PERF_REG_PMU_MASK_31   (0xfffULL << PERF_REG_POWERPC_MMCR0)
>> +
>> +#define PERF_REG_EXTENDED_MAX  (PERF_REG_POWERPC_PMC6 + 1)
>> #endif /* _UAPI_ASM_POWERPC_PERF_REGS_H */
>> diff --git a/tools/perf/arch/powerpc/include/perf_regs.h 
>> b/tools/perf/arch/powerpc/include/perf_regs.h
>> index 63f3ac91049f..98b6f9eabfc3 100644
>> --- a/tools/perf/arch/powerpc/include/perf_regs.h
>> +++ b/tools/perf/arch/powerpc/include/perf_regs.h
>> @@ -71,6 +71,12 @@
>>  [PERF_REG_POWERPC_MMCR3] = "mmcr3",
>>  [PERF_REG_POWERPC_SIER2] = "sier2",
>>  [PERF_REG_POWERPC_SIER3] = "sier3",
>> +[PERF_REG_POWERPC_PMC1] = "pmc1",
>> +[PERF_REG_POWERPC_PMC2] = "pmc2",
>> +[PERF_REG_POWERPC_PMC3] = "pmc3",
>> +[PERF_REG_POWERPC_PMC4] = "pmc4",
>> +[PERF_REG_POWERPC_PMC5] = "pmc5",
>> +[PERF_REG_POWERPC_PMC6] = "pmc6",
>> };
>> 
>> static inline const char *perf_reg_name(int id)
>> diff --git a/tools/perf/arch/powerpc/util/perf_regs.c 
>> b/tools/perf/arch/powerpc/util/perf_regs.c
>> index 2b6d4704e3aa..8116a253f91f 100644
>> --- a/tools/perf/arch/powerpc/util/perf_regs.c
>> +++ b/tools/perf/arch/powerpc/util/perf_regs.c
>> @@ -68,6 +68,12 @@
>>  SMPL_REG(mmcr3, PERF_REG_POWERPC_MMCR3),
>>  SMPL_REG(sier2, PERF_REG_POWERPC_SIER2),
>>  SMPL_REG(sier3, PERF_REG_POWERPC_SIER3),
>> +SMPL_REG(pmc1, PERF_REG_POWERPC_PMC1),
>> +SMPL_REG(pmc2, PERF_REG_POWERPC_PMC2),
>> +SMPL_REG(pmc3, PERF_REG_POWERPC_PMC3),
>> +SMPL_REG(pmc4, PERF_REG_POWERPC_PMC4),
>> +SMPL_REG(pmc5, PERF_REG_POWERPC_PMC5),
>> +SMPL_REG(pmc6, PERF_REG_POWERPC_PMC6),
>>  SMPL_REG_END
>> };
>> 
>> -- 
>> 1.8.3.1
>> 
> 
> -- 
> 
> - Arnaldo



Re: [PATCH] tools/perf: Fix powerpc gap between kernel end and module start

2021-02-04 Thread Athira Rajeev



> On 03-Feb-2021, at 9:01 PM, Arnaldo Carvalho de Melo  wrote:
> 
> Thanks, collected the Tested-by from Kajol and the Acked-by from Jiri
> and applied to my local tree for testing, then up to my perf/core
> branch.
> 
> - Arnaldo

Thanks Arnaldo for taking this fix.





[PATCH] powerpc/perf: Record counter overflow always if SAMPLE_IP is unset

2021-02-03 Thread Athira Rajeev
While sampling for marked events, currently we record the sample only
if the SIAR valid bit of Sampled Instruction Event Register (SIER) is
set. SIAR_VALID bit is used for fetching the instruction address from
Sampled Instruction Address Register(SIAR). But there are some usecases,
where the user is interested only in the PMU stats at each counter
overflow and the exact IP of the overflow event is not required.
Dropping SIAR invalid samples will fail to record some of the counter
overflows in such cases.

Example of such usecase is dumping the PMU stats (event counts)
after some regular amount of instructions/events from the userspace
(ex: via ptrace). Here counter overflow is indicated to userspace via
signal handler, and captured by monitoring and enabling I/O
signaling on the event file descriptor. In these cases, we expect to
get sample/overflow indication after each specified sample_period.

Perf event attribute will not have PERF_SAMPLE_IP set in the
sample_type if exact IP of the overflow event is not requested. So
while profiling if SAMPLE_IP is not set, just record the counter overflow
irrespective of SIAR_VALID check.

Signed-off-by: Athira Rajeev 
---
 arch/powerpc/perf/core-book3s.c | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 28206b1fe172..bb4828a05e4d 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -2166,10 +2166,16 @@ static void record_and_restart(struct perf_event 
*event, unsigned long val,
 * address even when freeze on supervisor state (kernel) is set in
 * MMCR2. Check attr.exclude_kernel and address to drop the sample in
 * these cases.
+*
+* If address is not requested in the sample
+* via PERF_SAMPLE_IP, just record that sample
+* irrespective of SIAR valid check.
 */
-   if (event->attr.exclude_kernel && record)
-   if (is_kernel_addr(mfspr(SPRN_SIAR)))
+   if (event->attr.exclude_kernel && record) {
+   if (is_kernel_addr(mfspr(SPRN_SIAR)) && 
(event->attr.sample_type & PERF_SAMPLE_IP))
record = 0;
+   } else if (!record && !(event->attr.sample_type & PERF_SAMPLE_IP))
+   record = 1;
 
/*
 * Finally record data if requested.
-- 
1.8.3.1



[PATCH 3/3] tools/perf: Add perf tools support to expose Performance Monitor Counter SPRs as part of extended regs

2021-02-02 Thread Athira Rajeev
To enable presenting of Performance Monitor Counter Registers
(PMC1 to PMC6) as part of extended regsiters, patch adds these
to sample_reg_mask in the tool side (to use with -I? option).

Simplified the PERF_REG_PMU_MASK_300/31 definition. Excluded the
unsupported SPRs (MMCR3, SIER2, SIER3) from extended mask value for
CPU_FTR_ARCH_300.

Signed-off-by: Athira Rajeev 
---
 tools/arch/powerpc/include/uapi/asm/perf_regs.h | 28 +++--
 tools/perf/arch/powerpc/include/perf_regs.h |  6 ++
 tools/perf/arch/powerpc/util/perf_regs.c|  6 ++
 3 files changed, 34 insertions(+), 6 deletions(-)

diff --git a/tools/arch/powerpc/include/uapi/asm/perf_regs.h 
b/tools/arch/powerpc/include/uapi/asm/perf_regs.h
index bdf5f10f8b9f..578b3ee86105 100644
--- a/tools/arch/powerpc/include/uapi/asm/perf_regs.h
+++ b/tools/arch/powerpc/include/uapi/asm/perf_regs.h
@@ -55,17 +55,33 @@ enum perf_event_powerpc_regs {
PERF_REG_POWERPC_MMCR3,
PERF_REG_POWERPC_SIER2,
PERF_REG_POWERPC_SIER3,
+   PERF_REG_POWERPC_PMC1,
+   PERF_REG_POWERPC_PMC2,
+   PERF_REG_POWERPC_PMC3,
+   PERF_REG_POWERPC_PMC4,
+   PERF_REG_POWERPC_PMC5,
+   PERF_REG_POWERPC_PMC6,
/* Max regs without the extended regs */
PERF_REG_POWERPC_MAX = PERF_REG_POWERPC_MMCRA + 1,
 };
 
 #define PERF_REG_PMU_MASK  ((1ULL << PERF_REG_POWERPC_MAX) - 1)
 
-/* PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_300 */
-#define PERF_REG_PMU_MASK_300   (((1ULL << (PERF_REG_POWERPC_MMCR2 + 1)) - 1) 
- PERF_REG_PMU_MASK)
-/* PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_31 */
-#define PERF_REG_PMU_MASK_31   (((1ULL << (PERF_REG_POWERPC_SIER3 + 1)) - 1) - 
PERF_REG_PMU_MASK)
+/* Exclude MMCR3, SIER2, SIER3 for CPU_FTR_ARCH_300 */
+#definePERF_EXCLUDE_REG_EXT_300(7ULL << PERF_REG_POWERPC_MMCR3)
 
-#define PERF_REG_MAX_ISA_300   (PERF_REG_POWERPC_MMCR2 + 1)
-#define PERF_REG_MAX_ISA_31(PERF_REG_POWERPC_SIER3 + 1)
+/*
+ * PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_300
+ * includes 9 SPRS from MMCR0 to PMC6 excluding the
+ * unsupported SPRS in PERF_EXCLUDE_REG_EXT_300.
+ */
+#define PERF_REG_PMU_MASK_300   ((0xfffULL << PERF_REG_POWERPC_MMCR0) - 
PERF_EXCLUDE_REG_EXT_300)
+
+/*
+ * PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_31
+ * includes 12 SPRs from MMCR0 to PMC6.
+ */
+#define PERF_REG_PMU_MASK_31   (0xfffULL << PERF_REG_POWERPC_MMCR0)
+
+#define PERF_REG_EXTENDED_MAX  (PERF_REG_POWERPC_PMC6 + 1)
 #endif /* _UAPI_ASM_POWERPC_PERF_REGS_H */
diff --git a/tools/perf/arch/powerpc/include/perf_regs.h 
b/tools/perf/arch/powerpc/include/perf_regs.h
index 63f3ac91049f..98b6f9eabfc3 100644
--- a/tools/perf/arch/powerpc/include/perf_regs.h
+++ b/tools/perf/arch/powerpc/include/perf_regs.h
@@ -71,6 +71,12 @@
[PERF_REG_POWERPC_MMCR3] = "mmcr3",
[PERF_REG_POWERPC_SIER2] = "sier2",
[PERF_REG_POWERPC_SIER3] = "sier3",
+   [PERF_REG_POWERPC_PMC1] = "pmc1",
+   [PERF_REG_POWERPC_PMC2] = "pmc2",
+   [PERF_REG_POWERPC_PMC3] = "pmc3",
+   [PERF_REG_POWERPC_PMC4] = "pmc4",
+   [PERF_REG_POWERPC_PMC5] = "pmc5",
+   [PERF_REG_POWERPC_PMC6] = "pmc6",
 };
 
 static inline const char *perf_reg_name(int id)
diff --git a/tools/perf/arch/powerpc/util/perf_regs.c 
b/tools/perf/arch/powerpc/util/perf_regs.c
index 2b6d4704e3aa..8116a253f91f 100644
--- a/tools/perf/arch/powerpc/util/perf_regs.c
+++ b/tools/perf/arch/powerpc/util/perf_regs.c
@@ -68,6 +68,12 @@
SMPL_REG(mmcr3, PERF_REG_POWERPC_MMCR3),
SMPL_REG(sier2, PERF_REG_POWERPC_SIER2),
SMPL_REG(sier3, PERF_REG_POWERPC_SIER3),
+   SMPL_REG(pmc1, PERF_REG_POWERPC_PMC1),
+   SMPL_REG(pmc2, PERF_REG_POWERPC_PMC2),
+   SMPL_REG(pmc3, PERF_REG_POWERPC_PMC3),
+   SMPL_REG(pmc4, PERF_REG_POWERPC_PMC4),
+   SMPL_REG(pmc5, PERF_REG_POWERPC_PMC5),
+   SMPL_REG(pmc6, PERF_REG_POWERPC_PMC6),
SMPL_REG_END
 };
 
-- 
1.8.3.1



[PATCH 2/3] powerpc/perf: Expose Performance Monitor Counter SPR's as part of extended regs

2021-02-02 Thread Athira Rajeev
Currently Monitor Mode Control Registers and Sampling registers are
part of extended regs. Patch adds support to include Performance Monitor
Counter Registers (PMC1 to PMC6 ) as part of extended registers.

PMCs are saved in the perf interrupt handler as part of
per-cpu array 'pmcs' in struct cpu_hw_events. While capturing
the register values for extended regs, fetch these saved PMC values.

Simplified the PERF_REG_PMU_MASK_300/31 definition to include PMU
SPRs MMCR0 to PMC6. Exclude the unsupported SPRs (MMCR3, SIER2, SIER3)
from extended mask value for CPU_FTR_ARCH_300 in the new definition.

PERF_REG_EXTENDED_MAX is used to check if any index beyond the extended
registers is requested in the sample. Have one PERF_REG_EXTENDED_MAX
for CPU_FTR_ARCH_300/CPU_FTR_ARCH_31 since perf_reg_validate function
already checks the extended mask for the presence of any unsupported
register.

Signed-off-by: Athira Rajeev 
---
 arch/powerpc/include/asm/perf_event.h |  2 ++
 arch/powerpc/include/uapi/asm/perf_regs.h | 28 ++--
 arch/powerpc/perf/core-book3s.c   | 11 +++
 arch/powerpc/perf/perf_regs.c | 13 -
 4 files changed, 39 insertions(+), 15 deletions(-)

diff --git a/arch/powerpc/include/asm/perf_event.h 
b/arch/powerpc/include/asm/perf_event.h
index daec64d41b44..164e910bf654 100644
--- a/arch/powerpc/include/asm/perf_event.h
+++ b/arch/powerpc/include/asm/perf_event.h
@@ -14,6 +14,7 @@
 #include 
 #else
 static inline bool is_sier_available(void) { return false; }
+static inline unsigned long get_pmcs_ext_regs(int idx) { return 0; }
 #endif
 
 #ifdef CONFIG_FSL_EMB_PERF_EVENT
@@ -40,6 +41,7 @@
 
 /* To support perf_regs sier update */
 extern bool is_sier_available(void);
+extern unsigned long get_pmcs_ext_regs(int idx);
 /* To define perf extended regs mask value */
 extern u64 PERF_REG_EXTENDED_MASK;
 #define PERF_REG_EXTENDED_MASK PERF_REG_EXTENDED_MASK
diff --git a/arch/powerpc/include/uapi/asm/perf_regs.h 
b/arch/powerpc/include/uapi/asm/perf_regs.h
index bdf5f10f8b9f..578b3ee86105 100644
--- a/arch/powerpc/include/uapi/asm/perf_regs.h
+++ b/arch/powerpc/include/uapi/asm/perf_regs.h
@@ -55,17 +55,33 @@ enum perf_event_powerpc_regs {
PERF_REG_POWERPC_MMCR3,
PERF_REG_POWERPC_SIER2,
PERF_REG_POWERPC_SIER3,
+   PERF_REG_POWERPC_PMC1,
+   PERF_REG_POWERPC_PMC2,
+   PERF_REG_POWERPC_PMC3,
+   PERF_REG_POWERPC_PMC4,
+   PERF_REG_POWERPC_PMC5,
+   PERF_REG_POWERPC_PMC6,
/* Max regs without the extended regs */
PERF_REG_POWERPC_MAX = PERF_REG_POWERPC_MMCRA + 1,
 };
 
 #define PERF_REG_PMU_MASK  ((1ULL << PERF_REG_POWERPC_MAX) - 1)
 
-/* PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_300 */
-#define PERF_REG_PMU_MASK_300   (((1ULL << (PERF_REG_POWERPC_MMCR2 + 1)) - 1) 
- PERF_REG_PMU_MASK)
-/* PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_31 */
-#define PERF_REG_PMU_MASK_31   (((1ULL << (PERF_REG_POWERPC_SIER3 + 1)) - 1) - 
PERF_REG_PMU_MASK)
+/* Exclude MMCR3, SIER2, SIER3 for CPU_FTR_ARCH_300 */
+#definePERF_EXCLUDE_REG_EXT_300(7ULL << PERF_REG_POWERPC_MMCR3)
 
-#define PERF_REG_MAX_ISA_300   (PERF_REG_POWERPC_MMCR2 + 1)
-#define PERF_REG_MAX_ISA_31(PERF_REG_POWERPC_SIER3 + 1)
+/*
+ * PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_300
+ * includes 9 SPRS from MMCR0 to PMC6 excluding the
+ * unsupported SPRS in PERF_EXCLUDE_REG_EXT_300.
+ */
+#define PERF_REG_PMU_MASK_300   ((0xfffULL << PERF_REG_POWERPC_MMCR0) - 
PERF_EXCLUDE_REG_EXT_300)
+
+/*
+ * PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_31
+ * includes 12 SPRs from MMCR0 to PMC6.
+ */
+#define PERF_REG_PMU_MASK_31   (0xfffULL << PERF_REG_POWERPC_MMCR0)
+
+#define PERF_REG_EXTENDED_MAX  (PERF_REG_POWERPC_PMC6 + 1)
 #endif /* _UAPI_ASM_POWERPC_PERF_REGS_H */
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 436af496e3aa..6ffc18b7e80b 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -150,6 +150,17 @@ bool is_sier_available(void)
return false;
 }
 
+/*
+ * Return PMC value corresponding to the
+ * index passed.
+ */
+unsigned long get_pmcs_ext_regs(int idx)
+{
+   struct cpu_hw_events *cpuhw = this_cpu_ptr(_hw_events);
+
+   return cpuhw->pmcs[idx];
+}
+
 static bool regs_use_siar(struct pt_regs *regs)
 {
/*
diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c
index 6f681b105eec..b931eed482c9 100644
--- a/arch/powerpc/perf/perf_regs.c
+++ b/arch/powerpc/perf/perf_regs.c
@@ -75,6 +75,8 @@
 static u64 get_ext_regs_value(int idx)
 {
switch (idx) {
+   case PERF_REG_POWERPC_PMC1 ... PERF_REG_POWERPC_PMC6:
+   return get_pmcs_ext_regs(idx - PERF_REG_POWERPC_PMC1);
case PERF_REG_POWERPC_MMCR0:
return mfspr(SPRN_MMCR0);
case PERF_REG_POWERPC_MMCR1:
@@ -95,13 +97,6 @@ static u64 get_ext_regs_value(int idx)
 
 

[PATCH 1/3] powerpc/perf: Include PMCs as part of per-cpu cpuhw_events struct

2021-02-02 Thread Athira Rajeev
To support capturing of PMC's as part of extended registers, the
value of SPR's PMC1 to PMC6 has to be saved in the starting of PMI
interrupt handler. This is needed since we are resetting the
overflown PMC before creating sample and hence directly reading
SPRN_PMCx in 'perf_reg_value' will be capturing the modified value.

To solve this, add a per-cpu array as part of structure cpu_hw_events
and use this array to capture PMC values in the perf interrupt handler.
Patch also re-factor's the interrupt handler code to use this per-cpu
array instead of current local array.

Signed-off-by: Athira Rajeev 
---
 arch/powerpc/perf/core-book3s.c | 17 +++--
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 28206b1fe172..436af496e3aa 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -54,6 +54,9 @@ struct cpu_hw_events {
struct  perf_branch_stack   bhrb_stack;
struct  perf_branch_entry   bhrb_entries[BHRB_MAX_ENTRIES];
u64 ic_init;
+
+   /* Store the PMC values */
+   unsigned long pmcs[MAX_HWEVENTS];
 };
 
 static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
@@ -2277,7 +2280,6 @@ static void __perf_event_interrupt(struct pt_regs *regs)
int i, j;
struct cpu_hw_events *cpuhw = this_cpu_ptr(_hw_events);
struct perf_event *event;
-   unsigned long val[8];
int found, active;
int nmi;
 
@@ -2301,12 +2303,12 @@ static void __perf_event_interrupt(struct pt_regs *regs)
 
/* Read all the PMCs since we'll need them a bunch of times */
for (i = 0; i < ppmu->n_counter; ++i)
-   val[i] = read_pmc(i + 1);
+   cpuhw->pmcs[i] = read_pmc(i + 1);
 
/* Try to find what caused the IRQ */
found = 0;
for (i = 0; i < ppmu->n_counter; ++i) {
-   if (!pmc_overflow(val[i]))
+   if (!pmc_overflow(cpuhw->pmcs[i]))
continue;
if (is_limited_pmc(i + 1))
continue; /* these won't generate IRQs */
@@ -2321,7 +2323,7 @@ static void __perf_event_interrupt(struct pt_regs *regs)
event = cpuhw->event[j];
if (event->hw.idx == (i + 1)) {
active = 1;
-   record_and_restart(event, val[i], regs);
+   record_and_restart(event, cpuhw->pmcs[i], regs);
break;
}
}
@@ -2335,11 +2337,11 @@ static void __perf_event_interrupt(struct pt_regs *regs)
event = cpuhw->event[i];
if (!event->hw.idx || is_limited_pmc(event->hw.idx))
continue;
-   if (pmc_overflow_power7(val[event->hw.idx - 1])) {
+   if (pmc_overflow_power7(cpuhw->pmcs[event->hw.idx - 
1])) {
/* event has overflowed in a buggy way*/
found = 1;
record_and_restart(event,
-  val[event->hw.idx - 1],
+  cpuhw->pmcs[event->hw.idx - 
1],
   regs);
}
}
@@ -2356,6 +2358,9 @@ static void __perf_event_interrupt(struct pt_regs *regs)
 */
write_mmcr0(cpuhw, cpuhw->mmcr.mmcr0);
 
+   /* Clear the cpuhw->pmcs */
+   memset(>pmcs, 0, sizeof(cpuhw->pmcs));
+
if (nmi)
nmi_exit();
else
-- 
1.8.3.1



[PATCH 0/3] powerpc/perf: Add Performance Monitor Counters to extended regs

2021-02-02 Thread Athira Rajeev
Patch set to add Performance Monitor Counter SPR's as
part of extended regs in powerpc.

Patch 1/3 saves the PMC values in the perf interrupt
handler as part of per-cpu array.
Patch 2/3 adds PMC1 to PMC6 as part of the extended
regs mask.
Patch 3/3 includes perf tools side changes to add
PMC1 to PMC6 to sample_reg_mask to use with -I? option.

Athira Rajeev (3):
  powerpc/perf: Include PMCs as part of per-cpu cpuhw_events struct
  powerpc/perf: Expose Performance Monitor Counter SPR's as part of
extended regs
  tools/perf: Add perf tools support to expose Performance Monitor
Counter SPRs as part of extended regs

 arch/powerpc/include/asm/perf_event.h   |  2 ++
 arch/powerpc/include/uapi/asm/perf_regs.h   | 28 +++--
 arch/powerpc/perf/core-book3s.c | 28 +++--
 arch/powerpc/perf/perf_regs.c   | 13 
 tools/arch/powerpc/include/uapi/asm/perf_regs.h | 28 +++--
 tools/perf/arch/powerpc/include/perf_regs.h |  6 ++
 tools/perf/arch/powerpc/util/perf_regs.c|  6 ++
 7 files changed, 84 insertions(+), 27 deletions(-)

-- 
1.8.3.1



Re: [PATCH] tools/perf: Fix powerpc gap between kernel end and module start

2021-02-02 Thread Athira Rajeev
On 18-Jan-2021, at 3:51 PM, kajoljain  wrote:On 1/12/21 3:08 PM, Jiri Olsa wrote:On Mon, Dec 28, 2020 at 09:14:14PM -0500, Athira Rajeev wrote:SNIPc2799370 b backtrace_flagc2799378 B radix_tree_node_cachepc2799380 B __bss_stopc27a B _endc0080389 t icmp_checkentry  [ip_tables]c00803890038 t ipt_alloc_initial_table  [ip_tables]c00803890468 T ipt_do_table [ip_tables]c00803890de8 T ipt_unregister_table_pre_exit    [ip_tables]...Perf calls function symbols__fixup_end() which sets the end of symbolto 0xc0080389, which is the next address and this is the startaddress of first module (icmp_checkentry in above) which will make thehuge symbol size of 0x8010f.After symbols__fixup_end:symbols__fixup_end: sym->name: _end, sym->start: 0xc27a,sym->end: 0xc0080389On powerpc, kernel text segment is located at 0xc000whereas the modules are located at very high memory addresses,0xc0080xxx. Since the gap between end of kernel text segmentand beginning of first module's address is high, histogram allocationusing calloc fails.Fix this by detecting the kernel's last symbol and limitingthe range of last kernel symbol to pagesize.Patch looks good to me.Tested-By: Kajol JainThanks,Kajol JainSigned-off-by: Athira RajeevI can't test, but since the same approach works for arm and s390,this also looks okAcked-by: Jiri Olsa thanks,jirkaHi Arnaldo,Can you please help review this patch and merge if this looks good..ThanksAthira---tools/perf/arch/powerpc/util/Build |  1 +tools/perf/arch/powerpc/util/machine.c | 24 2 files changed, 25 insertions(+)create mode 100644 tools/perf/arch/powerpc/util/machine.cdiff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Buildindex e86e210bf514..b7945e5a543b 100644--- a/tools/perf/arch/powerpc/util/Build+++ b/tools/perf/arch/powerpc/util/Build@@ -1,4 +1,5 @@perf-y += header.o+perf-y += machine.operf-y += kvm-stat.operf-y += perf_regs.operf-y += mem-events.odiff --git a/tools/perf/arch/powerpc/util/machine.c b/tools/perf/arch/powerpc/util/machine.cnew file mode 100644index ..c30e5cc88c16--- /dev/null+++ b/tools/perf/arch/powerpc/util/machine.c@@ -0,0 +1,24 @@+// SPDX-License-Identifier: GPL-2.0++#include +#include +#include  // page_size+#include "debug.h"+#include "symbol.h"++/* On powerpc kernel text segment start at memory addresses, 0xc000+ * whereas the modules are located at very high memory addresses,+ * for example 0xc0080xxx. The gap between end of kernel text segment+ * and beginning of first module's text segment is very high.+ * Therefore do not fill this gap and do not assign it to the kernel dso map.+ */++void arch__symbols__fixup_end(struct symbol *p, struct symbol *c)+{+	if (strchr(p->name, '[') == NULL && strchr(c->name, '['))+		/* Limit the range of last kernel symbol */+		p->end += page_size;+	else+		p->end = c->start;+	pr_debug4("%s sym:%s end:%#lx\n", __func__, p->name, p->end);+}-- 1.8.3.1

[PATCH] powerpc/perf: Fix the guest crash issue with trace-imc

2021-02-01 Thread Athira Rajeev
when perf kvm record with trace_imc event is attach to guest
pid(with -p option), the qemu process gets killed with permission
issue. This happens because trace_imc event requires admin privileges
to monitor the process.If the qemu creates threads, by default
child tasks also inherit the counters and if there is no permission
to monitor qemu threads, we return permission denied ( EACCES ).

Fix this by returning EACCES only if there is no CAP_SYS_ADMIN and the
event doesn’t have inheritance.

Fixes: 012ae244845f ("powerpc/perf: Trace imc PMU functions")
Signed-off-by: Athira Rajeev 
---
 arch/powerpc/perf/imc-pmu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index e106909ff9c3..cc5679bfd28b 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -1429,7 +1429,7 @@ static int trace_imc_event_init(struct perf_event *event)
if (event->attr.type != event->pmu->type)
return -ENOENT;
 
-   if (!perfmon_capable())
+   if (!perfmon_capable() && !event->attr.inherit)
return -EACCES;
 
/* Return if this is a couting event */
-- 
1.8.3.1



Re: [PATCH v7 19/42] powerpc/perf: move perf irq/nmi handling details into traps.c

2021-01-31 Thread Athira Rajeev
On 30-Jan-2021, at 6:38 PM, Nicholas Piggin  wrote:This is required in order to allow more significant differences betweenNMI type interrupt handlers and regular asynchronous handlers.Signed-off-by: Nicholas Piggin ---arch/powerpc/kernel/traps.c  | 31 +++-arch/powerpc/perf/core-book3s.c  | 35 ++--arch/powerpc/perf/core-fsl-emb.c | 25 ---3 files changed, 32 insertions(+), 59 deletions(-)diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.cindex 4349b25807cf..6da3a3642dfb 100644--- a/arch/powerpc/kernel/traps.c+++ b/arch/powerpc/kernel/traps.c@@ -1892,11 +1892,40 @@ void vsx_unavailable_tm(struct pt_regs *regs)}#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */-void performance_monitor_exception(struct pt_regs *regs)+static void performance_monitor_exception_nmi(struct pt_regs *regs)+{+	nmi_enter();++	__this_cpu_inc(irq_stat.pmu_irqs);++	perf_irq(regs);++	nmi_exit();+}++static void performance_monitor_exception_async(struct pt_regs *regs){+	irq_enter();+	__this_cpu_inc(irq_stat.pmu_irqs);	perf_irq(regs);++	irq_exit();+}++void performance_monitor_exception(struct pt_regs *regs)+{+	/*+	 * On 64-bit, if perf interrupts hit in a local_irq_disable+	 * (soft-masked) region, we consider them as NMIs. This is required to+	 * prevent hash faults on user addresses when reading callchains (and+	 * looks better from an irq tracing perspective).+	 */+	if (IS_ENABLED(CONFIG_PPC64) && unlikely(arch_irq_disabled_regs(regs)))+		performance_monitor_exception_nmi(regs);+	else+		performance_monitor_exception_async(regs);}Hi Nick,Thanks for the change in tracing of PMI interrupts.I tested with this patch that moves perf irq/nmi handling details into traps.cand verified the patch works fine for NMI and asynchronous cases.Test scenario1: My test kernel module tries to create one of performance monitorcounter overflow between local_irq_save/local_irq_restore which should be delivered as an NMI.Verified it calls NMI exception handler from my ftrace logs below<<>>dummy_perf <-performance_monitor_exception_nmi<<>>Test scenario2: My test kernel module tries to create one of performance monitor counteroverflow between powerpc_local_irq_pmu_save/restore which should be delivered as anasynchronous interrupt ( replayed PMI ).Verified it calls correct handler from my ftrace logs:<<>>replay_soft_interrupts <-arch_local_irq_restoreirq_enter <-performance_monitor_exception_asyncirq_enter_rcu <-performance_monitor_exception_asyncdummy_perf <-performance_monitor_exception_asyncirq_exit <-performance_monitor_exception_async<<>>Reviewed-and-Tested-by:  Athira Rajeev #ifdef CONFIG_PPC_ADV_DEBUG_REGSdiff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.cindex 28206b1fe172..9fd06010e8b6 100644--- a/arch/powerpc/perf/core-book3s.c+++ b/arch/powerpc/perf/core-book3s.c@@ -110,10 +110,6 @@ static inline void perf_read_regs(struct pt_regs *regs){	regs->result = 0;}-static inline int perf_intr_is_nmi(struct pt_regs *regs)-{-	return 0;-}static inline int siar_valid(struct pt_regs *regs){@@ -353,15 +349,6 @@ static inline void perf_read_regs(struct pt_regs *regs)	regs->result = use_siar;}-/*- * If interrupts were soft-disabled when a PMU interrupt occurs, treat- * it as an NMI.- */-static inline int perf_intr_is_nmi(struct pt_regs *regs)-{-	return (regs->softe & IRQS_DISABLED);-}-/* * On processors like P7+ that have the SIAR-Valid bit, marked instructions * must be sampled only if the SIAR-valid bit is set.@@ -2279,7 +2266,6 @@ static void __perf_event_interrupt(struct pt_regs *regs)	struct perf_event *event;	unsigned long val[8];	int found, active;-	int nmi;	if (cpuhw->n_limited)		freeze_limited_counters(cpuhw, mfspr(SPRN_PMC5),@@ -2287,18 +2273,6 @@ static void __perf_event_interrupt(struct pt_regs *regs)	perf_read_regs(regs);-	/*-	 * If perf interrupts hit in a local_irq_disable (soft-masked) region,-	 * we consider them as NMIs. This is required to prevent hash faults on-	 * user addresses when reading callchains. See the NMI test in-	 * do_hash_page.-	 */-	nmi = perf_intr_is_nmi(regs);-	if (nmi)-		nmi_enter();-	else-		irq_enter();-	/* Read all the PMCs since we'll need them a bunch of times */	for (i = 0; i < ppmu->n_counter; ++i)		val[i] = read_pmc(i + 1);@@ -2344,8 +2318,8 @@ static void __perf_event_interrupt(struct pt_regs *regs)			}		}	}-	if (!found && !nmi && printk_ratelimit())-		printk(KERN_WARNING "Can't find PMC that caused IRQ\n");+	if (unlikely(!found) && !arch_irq_disabled_regs(regs))+		printk_ratelimited(KERN_WARNING "Can't find PMC that caused IRQ\n");	/*	 * Reset MMCR0 to its normal value.  This will set PMXE and@@ -2355,11 +2329,6 @@ static void __perf_event_interrupt(struct pt_regs *regs)	 * we get back out of this interrupt.	 */	write_mmcr0(cpuhw, cpuhw->mmcr.mmcr0);--	if (nmi)-		nmi

Re: [PATCH v6 14/39] powerpc/perf: move perf irq/nmi handling details into traps.c

2021-01-26 Thread Athira Rajeev
On 20-Jan-2021, at 8:39 AM, Nicholas Piggin  wrote:Excerpts from Athira Rajeev's message of January 19, 2021 8:24 pm:On 15-Jan-2021, at 10:19 PM, Nicholas Piggin  wrote:This is required in order to allow more significant differences betweenNMI type interrupt handlers and regular asynchronous handlers.Signed-off-by: Nicholas Piggin ---arch/powerpc/kernel/traps.c  | 31 +++-arch/powerpc/perf/core-book3s.c  | 35 ++--arch/powerpc/perf/core-fsl-emb.c | 25 ---3 files changed, 32 insertions(+), 59 deletions(-)Hi Nick,Reviewed this perf patch which moves the nmi_enter/irq_enter to traps.c and code-wise changesfor perf looks fine to me. Further, I was trying to test this by picking the whole Patch series on topof 5.11.0-rc3 kernel and using below test scenario:Intention of testcase is to check whether the perf nmi and asynchronous interrupts are gettingcaptured as expected. My test kernel module below tries to create one of performance monitorcounter ( PMC6 ) overflow between local_irq_save/local_irq_restore.[ Here interrupts are disabled and has IRQS_DISABLED as regs->softe ].I am expecting the PMI to come as an NMI in this case. I am also configuring ftrace so that Ican confirm whether it comes as an NMI or a replayed interrupt from the trace.Environment :One CPU onlineprerequisite for ftrace:# cd /sys/kernel/debug/tracing# echo 100 > buffer_percent# echo 20 > buffer_size_kb # echo ppc-tb > trace_clock# echo function > current_tracerPart of sample kernel test module to trigger a PMI between local_irq_save and local_irq_restore:<<>>static ulong delay = 1;static void busy_wait(ulong time){   udelay(delay);}static __always_inline void irq_test(void){   unsigned long flags;   local_irq_save(flags);   trace_printk("IN IRQ TEST\n");   mtspr(SPRN_MMCR0, 0x8000);   mtspr(SPRN_PMC6, 0x8000 - 100);   mtspr(SPRN_MMCR0, 0x6004000);   busy_wait(delay);   trace_printk("IN IRQ TEST DONE\n");   local_irq_restore(flags);   mtspr(SPRN_MMCR0, 0x8000);   mtspr(SPRN_PMC6, 0);}<<>>But this resulted in soft lockup, Adding a snippet of call-trace below:I'm not getting problems with your test case, but I am testing in a VM so may not be getting device interrupts so much (your 0xea0 interrupt).I'll try test on bare metal next. Does it reproduce easily, and unpatched kernel definitely does not have the problem?A different issue, after my series, I don't see the perf "NMI" interrupt in any traces under local_irq_disable, because it's disabling ftrace thesame as the other NMI interrupts, so your test wouldn't see them.I don't know if this is exactly right. Can tracing cope with such NMIsokay even if it's interrupted in the middle of the tracing code? Machinecheck at least has to disable tracing because it's in real-mode, machinecheck and sreset also want to disable tracing because something is goingwrong and we don't want to make it worse (e.g., to get a cleaner crash).Should we still permit tracing of perf NMIs?Hi Nick,Having tracing of perf NMI's enabled is actually helpful for debugging PMU issues. Especially since for perf, we decide at runtime whether PMI is going to be deliveredas an NMI or an asynchronous interrupt. So having the PMI captured in trace will be good.Also while debugging interrupt/overflow issues captured with testsuites like perf fuzzer,  where lot of test combinations are run, having the PMI's ( nmi and async ) in traces willhelp in debug which otherwise will need to be analysed by adding printk's etc.ThanksAthira[  883.900762] watchdog: BUG: soft lockup - CPU#0 stuck for 23s! [swapper/0:0][  883.901381] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G   OE 5.11.0-rc3+ #34--[  883.901999] NIP [c00168d0] replay_soft_interrupts+0x70/0x2f0[  883.902032] LR [c003b2b8] interrupt_exit_kernel_prepare+0x1e8/0x240[  883.902063] Call Trace:[  883.902085] [c1c96f50] [c003b2b8] interrupt_exit_kernel_prepare+0x1e8/0x240 (unreliable)[  883.902139] [c1c96fb0] [c000fd88] interrupt_return+0x158/0x200[  883.902185] --- interrupt: ea0 at __rb_reserve_next+0xc0/0x5b0[  883.902224] NIP:  c02d8980 LR: c02d897c CTR: c01aad90[  883.902262] REGS: c1c97020 TRAP: 0ea0   Tainted: G   OE  (5.11.0-rc3+)[  883.902301] MSR:  90009033   CR: 28000484  XER: 2004[  883.902387] CFAR: c000fe00 IRQMASK: 0 --[  883.902757] NIP [c02d8980] __rb_reserve_next+0xc0/0x5b0[  883.902786] LR [c02d897c] __rb_reserve_next+0xbc/0x5b0[  883.902824] --- interrupt: ea0[  883.902848] [c1c97360] [c02d8fcc] ring_buffer_lock_reserve+0x15c/0x580[  883.902894] [c1c973f0] [c02e82fc] trace_function+0x4c/0x1c0[  883.902930] [c1c97440] [c02f6f50] function_trace_call+0x140/0x190[  883.902976] [c1c97470] [c007d6f8] ftrace_call+0x4/0x44[  883.903021] 

Re: [PATCH] powerpc/64: prevent replayed interrupt handlers from running softirqs

2021-01-20 Thread Athira Rajeev
On 20-Jan-2021, at 1:20 PM, Nicholas Piggin  wrote:Running softirqs enables interrupts, which can then end up recursinginto the irq soft-mask code we're adjusting, including replayinginterrupts itself, which might be theoretically unbounded.This abridged trace shows how this can occur:! NIP replay_soft_interrupts LR  interrupt_exit_kernel_prepare Call Trace:   interrupt_exit_kernel_prepare (unreliable)   interrupt_return --- interrupt: ea0 at __rb_reserve_next NIP __rb_reserve_next LR __rb_reserve_next Call Trace:   ring_buffer_lock_reserve   trace_function   function_trace_call   ftrace_call   __do_softirq   irq_exit   timer_interrupt!   replay_soft_interrupts   interrupt_exit_kernel_prepare   interrupt_return --- interrupt: ea0 at arch_local_irq_restoreFix this by disabling bhs (softirqs) around the interrupt replay.I don't know that commit 3282a3da25bd ("powerpc/64: Implement softinterrupt replay in C") actually introduced the problem. Prior to thatchange, the interrupt replay seems like it should still be subect tothis recusion, however it's done after all the irq state has been fixedup at the end of the replay, so it seems reasonable to fix back to thiscommit.Fixes: 3282a3da25bd ("powerpc/64: Implement soft interrupt replay in C")Signed-off-by: Nicholas Piggin Thanks for the fix Nick.Tested this below scenario where previously it was resulting in soft lockup’s with the trace described in the commit message. With the patch, I don’t see soft lockup’s.Test scenario: My test kernel module below tries to create one of performance monitorcounter ( PMC6 ) overflow between local_irq_save/local_irq_restore. I am also configuring ftrace.Environment :One CPU online and Bare Metal systemprerequisite for ftrace:# cd /sys/kernel/debug/tracing# echo 100 > buffer_percent# echo 20 > buffer_size_kb # echo ppc-tb > trace_clock# echo function > current_tracerPart of sample kernel test module to trigger a PMI between local_irq_save and local_irq_restore:<<>>static ulong delay = 1;static void busy_wait(ulong time){   udelay(delay);}static __always_inline void irq_test(void){   unsigned long flags;   local_irq_save(flags);   trace_printk("IN IRQ TEST\n");   mtspr(SPRN_MMCR0, 0x8000);   mtspr(SPRN_PMC6, 0x8000 - 100);   mtspr(SPRN_MMCR0, 0x6004000);   busy_wait(delay);   trace_printk("IN IRQ TEST DONE\n");   local_irq_restore(flags);   mtspr(SPRN_MMCR0, 0x8000);   mtspr(SPRN_PMC6, 0);}<<>>With the patch, there is no soft lockup’s.Tested-by: Athira Rajeev ---arch/powerpc/kernel/irq.c | 14 ++1 file changed, 14 insertions(+)diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.cindex 6b1eca53e36c..7064135f9dc3 100644--- a/arch/powerpc/kernel/irq.c+++ b/arch/powerpc/kernel/irq.c@@ -188,6 +188,18 @@ void replay_soft_interrupts(void)	unsigned char happened = local_paca->irq_happened;	struct pt_regs regs;+	/*+	 * Prevent softirqs from being run when an interrupt handler returns+	 * and calls irq_exit(), because softirq processing enables interrupts.+	 * If an interrupt is taken, it may then call replay_soft_interrupts+	 * on its way out, which gets messy and recursive.+	 *+	 * softirqs created by replayed interrupts will be run at the end of+	 * this function when bhs are enabled (if they were enabled in our+	 * caller).+	 */+	local_bh_disable();+	ppc_save_regs();	regs.softe = IRQS_ENABLED;@@ -263,6 +275,8 @@ void replay_soft_interrupts(void)		trace_hardirqs_off();		goto again;	}++	local_bh_enable();}notrace void arch_local_irq_restore(unsigned long mask)-- 2.23.0

Re: [PATCH v6 14/39] powerpc/perf: move perf irq/nmi handling details into traps.c

2021-01-19 Thread Athira Rajeev



> On 15-Jan-2021, at 10:19 PM, Nicholas Piggin  wrote:
> 
> This is required in order to allow more significant differences between
> NMI type interrupt handlers and regular asynchronous handlers.
> 
> Signed-off-by: Nicholas Piggin 
> ---
> arch/powerpc/kernel/traps.c  | 31 +++-
> arch/powerpc/perf/core-book3s.c  | 35 ++--
> arch/powerpc/perf/core-fsl-emb.c | 25 ---
> 3 files changed, 32 insertions(+), 59 deletions(-)

Hi Nick,

Reviewed this perf patch which moves the nmi_enter/irq_enter to traps.c and 
code-wise changes
for perf looks fine to me. Further, I was trying to test this by picking the 
whole Patch series on top
of 5.11.0-rc3 kernel and using below test scenario:

Intention of testcase is to check whether the perf nmi and asynchronous 
interrupts are getting
captured as expected. My test kernel module below tries to create one of 
performance monitor
counter ( PMC6 ) overflow between local_irq_save/local_irq_restore.
[ Here interrupts are disabled and has IRQS_DISABLED as regs->softe ].
I am expecting the PMI to come as an NMI in this case. I am also configuring 
ftrace so that I
can confirm whether it comes as an NMI or a replayed interrupt from the trace.

Environment :One CPU online
prerequisite for ftrace:
# cd /sys/kernel/debug/tracing
# echo 100 > buffer_percent
# echo 20 > buffer_size_kb 
# echo ppc-tb > trace_clock
# echo function > current_tracer

Part of sample kernel test module to trigger a PMI between 
local_irq_save and local_irq_restore:

<<>>
static ulong delay = 1;
static void busy_wait(ulong time)
{
udelay(delay);
}
static __always_inline void irq_test(void)
{
unsigned long flags;
local_irq_save(flags);
trace_printk("IN IRQ TEST\n");
mtspr(SPRN_MMCR0, 0x8000);
mtspr(SPRN_PMC6, 0x8000 - 100);
mtspr(SPRN_MMCR0, 0x6004000);
busy_wait(delay);
trace_printk("IN IRQ TEST DONE\n");
local_irq_restore(flags);
mtspr(SPRN_MMCR0, 0x8000);
mtspr(SPRN_PMC6, 0);
}
<<>>

But this resulted in soft lockup, Adding a snippet of call-trace below:

[  883.900762] watchdog: BUG: soft lockup - CPU#0 stuck for 23s! [swapper/0:0]
[  883.901381] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G   OE 
5.11.0-rc3+ #34
--
[  883.901999] NIP [c00168d0] replay_soft_interrupts+0x70/0x2f0
[  883.902032] LR [c003b2b8] interrupt_exit_kernel_prepare+0x1e8/0x240
[  883.902063] Call Trace:
[  883.902085] [c1c96f50] [c003b2b8] 
interrupt_exit_kernel_prepare+0x1e8/0x240 (unreliable)
[  883.902139] [c1c96fb0] [c000fd88] 
interrupt_return+0x158/0x200
[  883.902185] --- interrupt: ea0 at __rb_reserve_next+0xc0/0x5b0
[  883.902224] NIP:  c02d8980 LR: c02d897c CTR: c01aad90
[  883.902262] REGS: c1c97020 TRAP: 0ea0   Tainted: G   OE  
(5.11.0-rc3+)
[  883.902301] MSR:  90009033   CR: 28000484  
XER: 2004
[  883.902387] CFAR: c000fe00 IRQMASK: 0 
--
[  883.902757] NIP [c02d8980] __rb_reserve_next+0xc0/0x5b0
[  883.902786] LR [c02d897c] __rb_reserve_next+0xbc/0x5b0
[  883.902824] --- interrupt: ea0
[  883.902848] [c1c97360] [c02d8fcc] 
ring_buffer_lock_reserve+0x15c/0x580
[  883.902894] [c1c973f0] [c02e82fc] trace_function+0x4c/0x1c0
[  883.902930] [c1c97440] [c02f6f50] 
function_trace_call+0x140/0x190
[  883.902976] [c1c97470] [c007d6f8] ftrace_call+0x4/0x44
[  883.903021] [c1c97660] [c0dcf70c] __do_softirq+0x15c/0x3d4
[  883.903066] [c1c97750] [c015fc68] irq_exit+0x198/0x1b0
[  883.903102] [c1c97780] [c0dc1790] timer_interrupt+0x170/0x3b0
[  883.903148] [c1c977e0] [c0016994] 
replay_soft_interrupts+0x134/0x2f0
[  883.903193] [c1c979d0] [c003b2b8] 
interrupt_exit_kernel_prepare+0x1e8/0x240
[  883.903240] [c1c97a30] [c000fd88] 
interrupt_return+0x158/0x200
[  883.903276] --- interrupt: ea0 at arch_local_irq_restore+0x70/0xc0

Thanks
Athira
> 
> diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
> index 738370519937..bd55f201115b 100644
> --- a/arch/powerpc/kernel/traps.c
> +++ b/arch/powerpc/kernel/traps.c
> @@ -1892,11 +1892,40 @@ void vsx_unavailable_tm(struct pt_regs *regs)
> }
> #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
> 
> -void performance_monitor_exception(struct pt_regs *regs)
> +static void performance_monitor_exception_nmi(struct pt_regs *regs)
> +{
> + nmi_enter();
> +
> + __this_cpu_inc(irq_stat.pmu_irqs);
> +
> + perf_irq(regs);
> +
> + nmi_exit();
> +}
> +
> +static void performance_monitor_exception_async(struct pt_regs *regs)
> {
> + irq_enter();
> +
>   __this_cpu_inc(irq_stat.pmu_irqs);
> 
>   perf_irq(regs);
> +
> + irq_exit();
> +}
> +
> +void performance_monitor_exception(struct 

Re: [PATCH] tools/perf: Fix powerpc gap between kernel end and module start

2021-01-12 Thread Athira Rajeev



> On 12-Jan-2021, at 3:08 PM, Jiri Olsa  wrote:
> 
> On Mon, Dec 28, 2020 at 09:14:14PM -0500, Athira Rajeev wrote:
> 
> SNIP
> 
>> c2799370 b backtrace_flag
>> c2799378 B radix_tree_node_cachep
>> c2799380 B __bss_stop
>> c27a B _end
>> c0080389 t icmp_checkentry  [ip_tables]
>> c00803890038 t ipt_alloc_initial_table  [ip_tables]
>> c00803890468 T ipt_do_table [ip_tables]
>> c00803890de8 T ipt_unregister_table_pre_exit[ip_tables]
>> ...
>> 
>> Perf calls function symbols__fixup_end() which sets the end of symbol
>> to 0xc0080389, which is the next address and this is the start
>> address of first module (icmp_checkentry in above) which will make the
>> huge symbol size of 0x8010f.
>> 
>> After symbols__fixup_end:
>> symbols__fixup_end: sym->name: _end, sym->start: 0xc27a,
>> sym->end: 0xc0080389
>> 
>> On powerpc, kernel text segment is located at 0xc000
>> whereas the modules are located at very high memory addresses,
>> 0xc0080xxx. Since the gap between end of kernel text segment
>> and beginning of first module's address is high, histogram allocation
>> using calloc fails.
>> 
>> Fix this by detecting the kernel's last symbol and limiting
>> the range of last kernel symbol to pagesize.
>> 
>> Signed-off-by: Athira Rajeev
> 
> I can't test, but since the same approach works for arm and s390,
> this also looks ok
> 
> Acked-by: Jiri Olsa 
> 
> thanks,
> jirka

Thanks Jiri for reviewing the patch,

Athira
> 
>> ---
>> tools/perf/arch/powerpc/util/Build |  1 +
>> tools/perf/arch/powerpc/util/machine.c | 24 
>> 2 files changed, 25 insertions(+)
>> create mode 100644 tools/perf/arch/powerpc/util/machine.c
>> 
>> diff --git a/tools/perf/arch/powerpc/util/Build 
>> b/tools/perf/arch/powerpc/util/Build
>> index e86e210bf514..b7945e5a543b 100644
>> --- a/tools/perf/arch/powerpc/util/Build
>> +++ b/tools/perf/arch/powerpc/util/Build
>> @@ -1,4 +1,5 @@
>> perf-y += header.o
>> +perf-y += machine.o
>> perf-y += kvm-stat.o
>> perf-y += perf_regs.o
>> perf-y += mem-events.o
>> diff --git a/tools/perf/arch/powerpc/util/machine.c 
>> b/tools/perf/arch/powerpc/util/machine.c
>> new file mode 100644
>> index ..c30e5cc88c16
>> --- /dev/null
>> +++ b/tools/perf/arch/powerpc/util/machine.c
>> @@ -0,0 +1,24 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +
>> +#include 
>> +#include 
>> +#include  // page_size
>> +#include "debug.h"
>> +#include "symbol.h"
>> +
>> +/* On powerpc kernel text segment start at memory addresses, 
>> 0xc000
>> + * whereas the modules are located at very high memory addresses,
>> + * for example 0xc0080xxx. The gap between end of kernel text 
>> segment
>> + * and beginning of first module's text segment is very high.
>> + * Therefore do not fill this gap and do not assign it to the kernel dso 
>> map.
>> + */
>> +
>> +void arch__symbols__fixup_end(struct symbol *p, struct symbol *c)
>> +{
>> +if (strchr(p->name, '[') == NULL && strchr(c->name, '['))
>> +/* Limit the range of last kernel symbol */
>> +p->end += page_size;
>> +else
>> +p->end = c->start;
>> +pr_debug4("%s sym:%s end:%#lx\n", __func__, p->name, p->end);
>> +}
>> -- 
>> 1.8.3.1



[PATCH] tools/perf: Fix powerpc gap between kernel end and module start

2020-12-28 Thread Athira Rajeev
Running "perf mem report" in TUI mode fails with ENOMEM message
in powerpc:

failed to process sample

Running with debug and verbose options points that issue is while
allocating memory for sample histograms.

The error path is:
symbol__inc_addr_samples -> __symbol__inc_addr_samples
-> annotated_source__histogram

symbol__inc_addr_samples calls annotated_source__alloc_histograms
to allocate memory for sample histograms using calloc. Here calloc fails
since the size of symbol is huge. The size of a symbol is calculated as
difference between its start and end address.

Example histogram allocation that fails is:
sym->name is _end, sym->start is 0xc27a, sym->end is
0xc0080389, symbol__size(sym) is 0x8010f

In above case, difference between sym->start (0xc27a)
and sym->end (0xc0080389) is huge.

This is same problem as in s390 and arm64 which are fixed in commits:
'commit b9c0a64901d5 ("perf annotate: Fix s390 gap between kernel end
and module start")'
'commit 78886f3ed37e ("perf symbols: Fix arm64 gap between kernel start
and module end")'

When this symbol was read first, its start and end address was set to
address which matches with data from /proc/kallsyms.

After symbol__new:
symbol__new: _end 0xc27a-0xc27a

>From /proc/kallsyms:
...
c2799370 b backtrace_flag
c2799378 B radix_tree_node_cachep
c2799380 B __bss_stop
c27a B _end
c0080389 t icmp_checkentry  [ip_tables]
c00803890038 t ipt_alloc_initial_table  [ip_tables]
c00803890468 T ipt_do_table [ip_tables]
c00803890de8 T ipt_unregister_table_pre_exit[ip_tables]
...

Perf calls function symbols__fixup_end() which sets the end of symbol
to 0xc0080389, which is the next address and this is the start
address of first module (icmp_checkentry in above) which will make the
huge symbol size of 0x8010f.

After symbols__fixup_end:
symbols__fixup_end: sym->name: _end, sym->start: 0xc27a,
sym->end: 0xc0080389

On powerpc, kernel text segment is located at 0xc000
whereas the modules are located at very high memory addresses,
0xc0080xxx. Since the gap between end of kernel text segment
and beginning of first module's address is high, histogram allocation
using calloc fails.

Fix this by detecting the kernel's last symbol and limiting
the range of last kernel symbol to pagesize.

Signed-off-by: Athira Rajeev
---
 tools/perf/arch/powerpc/util/Build |  1 +
 tools/perf/arch/powerpc/util/machine.c | 24 
 2 files changed, 25 insertions(+)
 create mode 100644 tools/perf/arch/powerpc/util/machine.c

diff --git a/tools/perf/arch/powerpc/util/Build 
b/tools/perf/arch/powerpc/util/Build
index e86e210bf514..b7945e5a543b 100644
--- a/tools/perf/arch/powerpc/util/Build
+++ b/tools/perf/arch/powerpc/util/Build
@@ -1,4 +1,5 @@
 perf-y += header.o
+perf-y += machine.o
 perf-y += kvm-stat.o
 perf-y += perf_regs.o
 perf-y += mem-events.o
diff --git a/tools/perf/arch/powerpc/util/machine.c 
b/tools/perf/arch/powerpc/util/machine.c
new file mode 100644
index ..c30e5cc88c16
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/machine.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include 
+#include 
+#include  // page_size
+#include "debug.h"
+#include "symbol.h"
+
+/* On powerpc kernel text segment start at memory addresses, 0xc000
+ * whereas the modules are located at very high memory addresses,
+ * for example 0xc0080xxx. The gap between end of kernel text segment
+ * and beginning of first module's text segment is very high.
+ * Therefore do not fill this gap and do not assign it to the kernel dso map.
+ */
+
+void arch__symbols__fixup_end(struct symbol *p, struct symbol *c)
+{
+   if (strchr(p->name, '[') == NULL && strchr(c->name, '['))
+   /* Limit the range of last kernel symbol */
+   p->end += page_size;
+   else
+   p->end = c->start;
+   pr_debug4("%s sym:%s end:%#lx\n", __func__, p->name, p->end);
+}
-- 
1.8.3.1



Re: [PATCH v3 04/19] powerpc/perf: move perf irq/nmi handling details into traps.c

2020-12-16 Thread Athira Rajeev
On 28-Nov-2020, at 8:10 PM, Nicholas Piggin  wrote:This is required in order to allow more significant differences betweenNMI type interrupt handlers and regular asynchronous handlers.Signed-off-by: Nicholas Piggin Reviewed this patch and the changes looks good to me.Reviewed-by: Athira Rajeev ThanksAthira---arch/powerpc/kernel/traps.c  | 31 +++-arch/powerpc/perf/core-book3s.c  | 35 ++--arch/powerpc/perf/core-fsl-emb.c | 25 ---3 files changed, 32 insertions(+), 59 deletions(-)diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.cindex 902fcbd1a778..7dda72eb97cc 100644--- a/arch/powerpc/kernel/traps.c+++ b/arch/powerpc/kernel/traps.c@@ -1919,11 +1919,40 @@ void vsx_unavailable_tm(struct pt_regs *regs)}#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */-void performance_monitor_exception(struct pt_regs *regs)+static void performance_monitor_exception_nmi(struct pt_regs *regs)+{+	nmi_enter();++	__this_cpu_inc(irq_stat.pmu_irqs);++	perf_irq(regs);++	nmi_exit();+}++static void performance_monitor_exception_async(struct pt_regs *regs){+	irq_enter();+	__this_cpu_inc(irq_stat.pmu_irqs);	perf_irq(regs);++	irq_exit();+}++void performance_monitor_exception(struct pt_regs *regs)+{+	/*+	 * On 64-bit, if perf interrupts hit in a local_irq_disable+	 * (soft-masked) region, we consider them as NMIs. This is required to+	 * prevent hash faults on user addresses when reading callchains (and+	 * looks better from an irq tracing perspective).+	 */+	if (IS_ENABLED(CONFIG_PPC64) && unlikely(arch_irq_disabled_regs(regs)))+		performance_monitor_exception_nmi(regs);+	else+		performance_monitor_exception_async(regs);}#ifdef CONFIG_PPC_ADV_DEBUG_REGSdiff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.cindex 08643cba1494..9fd8cae09218 100644--- a/arch/powerpc/perf/core-book3s.c+++ b/arch/powerpc/perf/core-book3s.c@@ -109,10 +109,6 @@ static inline void perf_read_regs(struct pt_regs *regs){	regs->result = 0;}-static inline int perf_intr_is_nmi(struct pt_regs *regs)-{-	return 0;-}static inline int siar_valid(struct pt_regs *regs){@@ -328,15 +324,6 @@ static inline void perf_read_regs(struct pt_regs *regs)	regs->result = use_siar;}-/*- * If interrupts were soft-disabled when a PMU interrupt occurs, treat- * it as an NMI.- */-static inline int perf_intr_is_nmi(struct pt_regs *regs)-{-	return (regs->softe & IRQS_DISABLED);-}-/* * On processors like P7+ that have the SIAR-Valid bit, marked instructions * must be sampled only if the SIAR-valid bit is set.@@ -2224,7 +2211,6 @@ static void __perf_event_interrupt(struct pt_regs *regs)	struct perf_event *event;	unsigned long val[8];	int found, active;-	int nmi;	if (cpuhw->n_limited)		freeze_limited_counters(cpuhw, mfspr(SPRN_PMC5),@@ -2232,18 +2218,6 @@ static void __perf_event_interrupt(struct pt_regs *regs)	perf_read_regs(regs);-	/*-	 * If perf interrupts hit in a local_irq_disable (soft-masked) region,-	 * we consider them as NMIs. This is required to prevent hash faults on-	 * user addresses when reading callchains. See the NMI test in-	 * do_hash_page.-	 */-	nmi = perf_intr_is_nmi(regs);-	if (nmi)-		nmi_enter();-	else-		irq_enter();-	/* Read all the PMCs since we'll need them a bunch of times */	for (i = 0; i < ppmu->n_counter; ++i)		val[i] = read_pmc(i + 1);@@ -2289,8 +2263,8 @@ static void __perf_event_interrupt(struct pt_regs *regs)			}		}	}-	if (!found && !nmi && printk_ratelimit())-		printk(KERN_WARNING "Can't find PMC that caused IRQ\n");+	if (unlikely(!found) && !arch_irq_disabled_regs(regs))+		printk_ratelimited(KERN_WARNING "Can't find PMC that caused IRQ\n");	/*	 * Reset MMCR0 to its normal value.  This will set PMXE and@@ -2300,11 +2274,6 @@ static void __perf_event_interrupt(struct pt_regs *regs)	 * we get back out of this interrupt.	 */	write_mmcr0(cpuhw, cpuhw->mmcr.mmcr0);--	if (nmi)-		nmi_exit();-	else-		irq_exit();}static void perf_event_interrupt(struct pt_regs *regs)diff --git a/arch/powerpc/perf/core-fsl-emb.c b/arch/powerpc/perf/core-fsl-emb.cindex e0e7e276bfd2..ee721f420a7b 100644--- a/arch/powerpc/perf/core-fsl-emb.c+++ b/arch/powerpc/perf/core-fsl-emb.c@@ -31,19 +31,6 @@ static atomic_t num_events;/* Used to avoid races in calling reserve/release_pmc_hardware */static DEFINE_MUTEX(pmc_reserve_mutex);-/*- * If interrupts were soft-disabled when a PMU interrupt occurs, treat- * it as an NMI.- */-static inline int perf_intr_is_nmi(struct pt_regs *regs)-{-#ifdef __powerpc64__-	return (regs->softe & IRQS_DISABLED);-#else-	return 0;-#endif-}-static void perf_event_interrupt(struct pt_regs *regs);/*@@ -659,13 +646,6 @@ static void perf_event_interrupt(struct pt_regs *regs)	struct perf_event *event;	unsigned long val;	int found = 0;-	int nmi;--	nmi = perf_intr_is_nmi(regs);-	if (nmi)-		nmi_enter();-	else-		irq_enter();	for (i = 0; i < ppmu->n_counter; ++i) {		event = cpuhw-&

[PATCH V3] powerpc/perf: Fix Threshold Event Counter Multiplier width for P10

2020-12-15 Thread Athira Rajeev
From: Madhavan Srinivasan 

Threshold Event Counter Multiplier (TECM) is part of Monitor Mode
Control Register A (MMCRA). This field along with Threshold Event
Counter Exponent (TECE) is used to get threshould counter value.
In Power10, this is a 8bit field, so patch fixes the
current code to modify the MMCRA[TECM] extraction macro to
handle this change. ISA v3.1 says this is a 7 bit field but
POWER10 it's actually 8 bits which will hopefully be fixed
in ISA v3.1 update.

Fixes: 170a315f41c64 ('powerpc/perf: Support to export MMCRA[TEC*] field to 
userspace')
Signed-off-by: Madhavan Srinivasan 
Signed-off-by: Athira Rajeev 
---
Changelog v2 -> v3:
- Since in POWER10 it's actually 8 bits which will hopefully be
  fixed in ISA3.1 update, bring back the cpu feature check which
  is the appropriate one.

Changelog v1 -> v2:
- Fixed the commit message
- Fixed the condition check to use PVR check for power10
  instead of ISA v3.1 cpu feature check.

 arch/powerpc/perf/isa207-common.c | 3 +++
 arch/powerpc/perf/isa207-common.h | 4 
 2 files changed, 7 insertions(+)

diff --git a/arch/powerpc/perf/isa207-common.c 
b/arch/powerpc/perf/isa207-common.c
index 24f0a90..6ab5b27 100644
--- a/arch/powerpc/perf/isa207-common.c
+++ b/arch/powerpc/perf/isa207-common.c
@@ -247,6 +247,9 @@ void isa207_get_mem_weight(u64 *weight)
u64 sier = mfspr(SPRN_SIER);
u64 val = (sier & ISA207_SIER_TYPE_MASK) >> ISA207_SIER_TYPE_SHIFT;
 
+   if (cpu_has_feature(CPU_FTR_ARCH_31))
+   mantissa = P10_MMCRA_THR_CTR_MANT(mmcra);
+
if (val == 0 || val == 7)
*weight = 0;
else
diff --git a/arch/powerpc/perf/isa207-common.h 
b/arch/powerpc/perf/isa207-common.h
index 4208764..454b32c 100644
--- a/arch/powerpc/perf/isa207-common.h
+++ b/arch/powerpc/perf/isa207-common.h
@@ -231,6 +231,10 @@
 #define MMCRA_THR_CTR_EXP(v)   (((v) >> MMCRA_THR_CTR_EXP_SHIFT) &\
MMCRA_THR_CTR_EXP_MASK)
 
+#define P10_MMCRA_THR_CTR_MANT_MASK0xFFul
+#define P10_MMCRA_THR_CTR_MANT(v)  (((v) >> MMCRA_THR_CTR_MANT_SHIFT) &\
+   P10_MMCRA_THR_CTR_MANT_MASK)
+
 /* MMCRA Threshold Compare bit constant for power9 */
 #define p9_MMCRA_THR_CMP_SHIFT 45
 
-- 
1.8.3.1



[PATCH] powerpc/perf: Invoke per-CPU variable access with disabled interrupts

2020-12-01 Thread Athira Rajeev
The power_pmu_event_init() callback access per-cpu variable
(cpu_hw_events) to check for event constraints and Branch Stack
(BHRB). Current usage is to disable preemption when accessing the
per-cpu variable, but this does not prevent timer callback from
interrupting event_init. Fix this by using 'local_irq_save/restore'
to make sure the code path is invoked with disabled interrupts.

This change is tested in mambo simulator to ensure that, if a timer
interrupt comes in during the per-cpu access in event_init, it will be
soft masked and replayed later. For testing purpose, introduced a
udelay() in power_pmu_event_init() to make sure a timer interrupt arrives
while in per-cpu variable access code between local_irq_save/resore.
As expected the timer interrupt was replayed later during local_irq_restore
called from power_pmu_event_init. This was confirmed by adding
breakpoint in mambo and checking the backtrace when timer_interrupt
was hit.

Reported-by: Sebastian Andrzej Siewior 
Signed-off-by: Athira Rajeev 
---
 arch/powerpc/perf/core-book3s.c | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 3c8c6ce..e38648f0 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -1909,7 +1909,7 @@ static bool is_event_blacklisted(u64 ev)
 static int power_pmu_event_init(struct perf_event *event)
 {
u64 ev;
-   unsigned long flags;
+   unsigned long flags, irq_flags;
struct perf_event *ctrs[MAX_HWEVENTS];
u64 events[MAX_HWEVENTS];
unsigned int cflags[MAX_HWEVENTS];
@@ -2017,7 +2017,9 @@ static int power_pmu_event_init(struct perf_event *event)
if (check_excludes(ctrs, cflags, n, 1))
return -EINVAL;
 
-   cpuhw = _cpu_var(cpu_hw_events);
+   local_irq_save(irq_flags);
+   cpuhw = this_cpu_ptr(_hw_events);
+
err = power_check_constraints(cpuhw, events, cflags, n + 1);
 
if (has_branch_stack(event)) {
@@ -2028,13 +2030,13 @@ static int power_pmu_event_init(struct perf_event 
*event)
event->attr.branch_sample_type);
 
if (bhrb_filter == -1) {
-   put_cpu_var(cpu_hw_events);
+   local_irq_restore(irq_flags);
return -EOPNOTSUPP;
}
cpuhw->bhrb_filter = bhrb_filter;
}
 
-   put_cpu_var(cpu_hw_events);
+   local_irq_restore(irq_flags);
if (err)
return -EINVAL;
 
-- 
1.8.3.1



[PATCH V2 4/7] powerpc/perf: Add generic and cache event list for power10 DD1

2020-11-26 Thread Athira Rajeev
There are event code updates for some of the generic events
and cache events for power10. Inorder to maintain the current
event codes work with DD1 also, create a new array of generic_events,
cache_events and pmu_attr_groups with suffix _dd1, example,
power10_events_attr_dd1. So that further updates to event codes
can be made in the original list, ie, power10_events_attr. Update the
power10 pmu init code to pick the dd1 list while registering
the power PMU, based on the pvr (Processor Version Register) value.

Signed-off-by: Athira Rajeev 
---
 arch/powerpc/perf/power10-pmu.c | 152 
 1 file changed, 152 insertions(+)

diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c
index 88c5430..bc3d4dd 100644
--- a/arch/powerpc/perf/power10-pmu.c
+++ b/arch/powerpc/perf/power10-pmu.c
@@ -129,6 +129,31 @@ static int power10_get_alternatives(u64 event, unsigned 
int flags, u64 alt[])
 CACHE_EVENT_ATTR(dTLB-load-misses, PM_DTLB_MISS);
 CACHE_EVENT_ATTR(iTLB-load-misses, PM_ITLB_MISS);
 
+static struct attribute *power10_events_attr_dd1[] = {
+   GENERIC_EVENT_PTR(PM_RUN_CYC),
+   GENERIC_EVENT_PTR(PM_RUN_INST_CMPL),
+   GENERIC_EVENT_PTR(PM_BR_CMPL),
+   GENERIC_EVENT_PTR(PM_BR_MPRED_CMPL),
+   GENERIC_EVENT_PTR(PM_LD_REF_L1),
+   GENERIC_EVENT_PTR(PM_LD_MISS_L1),
+   GENERIC_EVENT_PTR(MEM_LOADS),
+   GENERIC_EVENT_PTR(MEM_STORES),
+   CACHE_EVENT_PTR(PM_LD_MISS_L1),
+   CACHE_EVENT_PTR(PM_LD_REF_L1),
+   CACHE_EVENT_PTR(PM_LD_PREFETCH_CACHE_LINE_MISS),
+   CACHE_EVENT_PTR(PM_ST_MISS_L1),
+   CACHE_EVENT_PTR(PM_L1_ICACHE_MISS),
+   CACHE_EVENT_PTR(PM_INST_FROM_L1),
+   CACHE_EVENT_PTR(PM_IC_PREF_REQ),
+   CACHE_EVENT_PTR(PM_DATA_FROM_L3MISS),
+   CACHE_EVENT_PTR(PM_DATA_FROM_L3),
+   CACHE_EVENT_PTR(PM_BR_MPRED_CMPL),
+   CACHE_EVENT_PTR(PM_BR_CMPL),
+   CACHE_EVENT_PTR(PM_DTLB_MISS),
+   CACHE_EVENT_PTR(PM_ITLB_MISS),
+   NULL
+};
+
 static struct attribute *power10_events_attr[] = {
GENERIC_EVENT_PTR(PM_RUN_CYC),
GENERIC_EVENT_PTR(PM_RUN_INST_CMPL),
@@ -154,6 +179,11 @@ static int power10_get_alternatives(u64 event, unsigned 
int flags, u64 alt[])
NULL
 };
 
+static struct attribute_group power10_pmu_events_group_dd1 = {
+   .name = "events",
+   .attrs = power10_events_attr_dd1,
+};
+
 static struct attribute_group power10_pmu_events_group = {
.name = "events",
.attrs = power10_events_attr,
@@ -205,12 +235,27 @@ static int power10_get_alternatives(u64 event, unsigned 
int flags, u64 alt[])
.attrs = power10_pmu_format_attr,
 };
 
+static const struct attribute_group *power10_pmu_attr_groups_dd1[] = {
+   _pmu_format_group,
+   _pmu_events_group_dd1,
+   NULL,
+};
+
 static const struct attribute_group *power10_pmu_attr_groups[] = {
_pmu_format_group,
_pmu_events_group,
NULL,
 };
 
+static int power10_generic_events_dd1[] = {
+   [PERF_COUNT_HW_CPU_CYCLES] =PM_RUN_CYC,
+   [PERF_COUNT_HW_INSTRUCTIONS] =  PM_RUN_INST_CMPL,
+   [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] =   PM_BR_CMPL,
+   [PERF_COUNT_HW_BRANCH_MISSES] = PM_BR_MPRED_CMPL,
+   [PERF_COUNT_HW_CACHE_REFERENCES] =  PM_LD_REF_L1,
+   [PERF_COUNT_HW_CACHE_MISSES] =  PM_LD_MISS_L1,
+};
+
 static int power10_generic_events[] = {
[PERF_COUNT_HW_CPU_CYCLES] =PM_RUN_CYC,
[PERF_COUNT_HW_INSTRUCTIONS] =  PM_RUN_INST_CMPL,
@@ -276,6 +321,107 @@ static void power10_config_bhrb(u64 pmu_bhrb_filter)
  * 0 means not supported, -1 means nonsensical, other values
  * are event codes.
  */
+static u64 power10_cache_events_dd1[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+   [C(L1D)] = {
+   [C(OP_READ)] = {
+   [C(RESULT_ACCESS)] = PM_LD_REF_L1,
+   [C(RESULT_MISS)] = PM_LD_MISS_L1,
+   },
+   [C(OP_WRITE)] = {
+   [C(RESULT_ACCESS)] = 0,
+   [C(RESULT_MISS)] = PM_ST_MISS_L1,
+   },
+   [C(OP_PREFETCH)] = {
+   [C(RESULT_ACCESS)] = PM_LD_PREFETCH_CACHE_LINE_MISS,
+   [C(RESULT_MISS)] = 0,
+   },
+   },
+   [C(L1I)] = {
+   [C(OP_READ)] = {
+   [C(RESULT_ACCESS)] = PM_INST_FROM_L1,
+   [C(RESULT_MISS)] = PM_L1_ICACHE_MISS,
+   },
+   [C(OP_WRITE)] = {
+   [C(RESULT_ACCESS)] = PM_INST_FROM_L1MISS,
+   [C(RESULT_MISS)] = -1,
+   },
+   [C(OP_PREFETCH)] = {
+   [C(RESULT_ACCESS)] = PM_IC_PREF_REQ,
+   [C(RESULT_MISS)] = 0,
+   },
+   

[PATCH V2 7/7] powerpc/perf: MMCR0 control for PMU registers under PMCC=00

2020-11-26 Thread Athira Rajeev
PowerISA v3.1 introduces new control bit (PMCCEXT) for restricting
access to group B PMU registers in problem state when
MMCR0 PMCC=0b00. In problem state and when MMCR0 PMCC=0b00,
setting the Monitor Mode Control Register bit 54 (MMCR0 PMCCEXT),
will restrict read permission on Group B Performance Monitor
Registers (SIER, SIAR, SDAR and MMCR1). When this bit is set to zero,
group B registers will be readable. In other platforms (like power9),
the older behaviour is retained where group B PMU SPRs are readable.

Patch adds support for MMCR0 PMCCEXT bit in power10 by enabling
this bit during boot and during the PMU event enable/disable callback
functions.

Signed-off-by: Athira Rajeev 
---
 arch/powerpc/include/asm/reg.h| 1 +
 arch/powerpc/kernel/cpu_setup_power.c | 1 +
 arch/powerpc/kernel/dt_cpu_ftrs.c | 1 +
 arch/powerpc/perf/core-book3s.c   | 4 
 arch/powerpc/perf/isa207-common.c | 8 
 5 files changed, 15 insertions(+)

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index f877a57..cba9965 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -864,6 +864,7 @@
 #define   MMCR0_BHRBA  0x0020UL /* BHRB Access allowed in userspace */
 #define   MMCR0_EBE0x0010UL /* Event based branch enable */
 #define   MMCR0_PMCC   0x000cUL /* PMC control */
+#define   MMCR0_PMCCEXTASM_CONST(0x0200) /* PMCCEXT control */
 #define   MMCR0_PMCC_U60x0008UL /* PMC1-6 are R/W by user (PR) */
 #define   MMCR0_PMC1CE 0x8000UL /* PMC1 count enable*/
 #define   MMCR0_PMCjCE ASM_CONST(0x4000) /* PMCj count enable*/
diff --git a/arch/powerpc/kernel/cpu_setup_power.c 
b/arch/powerpc/kernel/cpu_setup_power.c
index 0c2191e..3cca88e 100644
--- a/arch/powerpc/kernel/cpu_setup_power.c
+++ b/arch/powerpc/kernel/cpu_setup_power.c
@@ -123,6 +123,7 @@ static void init_PMU_ISA31(void)
 {
mtspr(SPRN_MMCR3, 0);
mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE);
+   mtspr(SPRN_MMCR0, MMCR0_PMCCEXT);
 }
 
 /*
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c 
b/arch/powerpc/kernel/dt_cpu_ftrs.c
index 1098863..9d07965 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -454,6 +454,7 @@ static void init_pmu_power10(void)
 
mtspr(SPRN_MMCR3, 0);
mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE);
+   mtspr(SPRN_MMCR0, MMCR0_PMCCEXT);
 }
 
 static int __init feat_enable_pmu_power10(struct dt_cpu_feature *f)
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 3c8c6ce..35cf93c 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -95,6 +95,7 @@ struct cpu_hw_events {
 #define SPRN_SIER3 0
 #define MMCRA_SAMPLE_ENABLE0
 #define MMCRA_BHRB_DISABLE 0
+#define MMCR0_PMCCEXT  0
 
 static inline unsigned long perf_ip_adjust(struct pt_regs *regs)
 {
@@ -1270,6 +1271,9 @@ static void power_pmu_disable(struct pmu *pmu)
val |= MMCR0_FC;
val &= ~(MMCR0_EBE | MMCR0_BHRBA | MMCR0_PMCC | MMCR0_PMAO |
 MMCR0_FC56);
+   /* Set mmcr0 PMCCEXT for p10 */
+   if (ppmu->flags & PPMU_ARCH_31)
+   val |= MMCR0_PMCCEXT;
 
/*
 * The barrier is to make sure the mtspr has been
diff --git a/arch/powerpc/perf/isa207-common.c 
b/arch/powerpc/perf/isa207-common.c
index 0f4983e..24f0a90 100644
--- a/arch/powerpc/perf/isa207-common.c
+++ b/arch/powerpc/perf/isa207-common.c
@@ -558,6 +558,14 @@ int isa207_compute_mmcr(u64 event[], int n_ev,
if (!(pmc_inuse & 0x60))
mmcr->mmcr0 |= MMCR0_FC56;
 
+   /*
+* Set mmcr0 (PMCCEXT) for p10 which
+* will restrict access to group B registers
+* when MMCR0 PMCC=0b00.
+*/
+   if (cpu_has_feature(CPU_FTR_ARCH_31))
+   mmcr->mmcr0 |= MMCR0_PMCCEXT;
+
mmcr->mmcr1 = mmcr1;
mmcr->mmcra = mmcra;
mmcr->mmcr2 = mmcr2;
-- 
1.8.3.1



[PATCH V2 6/7] powerpc/perf: Fix to update cache events with l2l3 events in power10

2020-11-26 Thread Athira Rajeev
Export l2l3 events (PM_L2_ST_MISS and PM_L2_ST) and LLC-prefetches
(PM_L3_PF_MISS_L3) via sysfs, and also add these to list of
cache_events.

Signed-off-by: Athira Rajeev 
---
 arch/powerpc/perf/power10-events-list.h |  6 ++
 arch/powerpc/perf/power10-pmu.c | 12 +---
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/perf/power10-events-list.h 
b/arch/powerpc/perf/power10-events-list.h
index abd778f..e45dafe 100644
--- a/arch/powerpc/perf/power10-events-list.h
+++ b/arch/powerpc/perf/power10-events-list.h
@@ -39,6 +39,12 @@
 EVENT(PM_DATA_FROM_L3, 0x0134001c040);
 /* Demand LD - L3 Miss (not L2 hit and not L3 hit) */
 EVENT(PM_DATA_FROM_L3MISS, 0x300fe);
+/* All successful D-side store dispatches for this thread */
+EVENT(PM_L2_ST,0x01046080);
+/* All successful D-side store dispatches for this thread that were L2 Miss */
+EVENT(PM_L2_ST_MISS,   0x26880);
+/* Total HW L3 prefetches(Load+store) */
+EVENT(PM_L3_PF_MISS_L3,0x10016080);
 /* Data PTEG reload */
 EVENT(PM_DTLB_MISS,0x300fc);
 /* ITLB Reloaded */
diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c
index a02da69..79e0206 100644
--- a/arch/powerpc/perf/power10-pmu.c
+++ b/arch/powerpc/perf/power10-pmu.c
@@ -127,6 +127,9 @@ static int power10_get_alternatives(u64 event, unsigned int 
flags, u64 alt[])
 CACHE_EVENT_ATTR(L1-icache-prefetches, PM_IC_PREF_REQ);
 CACHE_EVENT_ATTR(LLC-load-misses,  PM_DATA_FROM_L3MISS);
 CACHE_EVENT_ATTR(LLC-loads,PM_DATA_FROM_L3);
+CACHE_EVENT_ATTR(LLC-prefetches,   PM_L3_PF_MISS_L3);
+CACHE_EVENT_ATTR(LLC-store-misses, PM_L2_ST_MISS);
+CACHE_EVENT_ATTR(LLC-stores,   PM_L2_ST);
 CACHE_EVENT_ATTR(branch-load-misses,   PM_BR_MPRED_CMPL);
 CACHE_EVENT_ATTR(branch-loads, PM_BR_CMPL);
 CACHE_EVENT_ATTR(dTLB-load-misses, PM_DTLB_MISS);
@@ -175,6 +178,9 @@ static int power10_get_alternatives(u64 event, unsigned int 
flags, u64 alt[])
CACHE_EVENT_PTR(PM_IC_PREF_REQ),
CACHE_EVENT_PTR(PM_DATA_FROM_L3MISS),
CACHE_EVENT_PTR(PM_DATA_FROM_L3),
+   CACHE_EVENT_PTR(PM_L3_PF_MISS_L3),
+   CACHE_EVENT_PTR(PM_L2_ST_MISS),
+   CACHE_EVENT_PTR(PM_L2_ST),
CACHE_EVENT_PTR(PM_BR_MPRED_CMPL),
CACHE_EVENT_PTR(PM_BR_CMPL),
CACHE_EVENT_PTR(PM_DTLB_MISS),
@@ -460,11 +466,11 @@ static void power10_config_bhrb(u64 pmu_bhrb_filter)
[C(RESULT_MISS)] = PM_DATA_FROM_L3MISS,
},
[C(OP_WRITE)] = {
-   [C(RESULT_ACCESS)] = -1,
-   [C(RESULT_MISS)] = -1,
+   [C(RESULT_ACCESS)] = PM_L2_ST,
+   [C(RESULT_MISS)] = PM_L2_ST_MISS,
},
[C(OP_PREFETCH)] = {
-   [C(RESULT_ACCESS)] = -1,
+   [C(RESULT_ACCESS)] = PM_L3_PF_MISS_L3,
[C(RESULT_MISS)] = 0,
},
},
-- 
1.8.3.1



[PATCH V2 5/7] powerpc/perf: Fix to update generic event codes for power10

2020-11-26 Thread Athira Rajeev
Fix the event code for events: branch-instructions (to PM_BR_FIN),
branch-misses (to PM_MPRED_BR_FIN) and cache-misses (to
PM_LD_DEMAND_MISS_L1_FIN) for power10 PMU. Update the
list of generic events with this modified event code.

Signed-off-by: Athira Rajeev 
---
 arch/powerpc/perf/power10-events-list.h |  3 +++
 arch/powerpc/perf/power10-pmu.c | 15 +--
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/perf/power10-events-list.h 
b/arch/powerpc/perf/power10-events-list.h
index 60c1b81..abd778f 100644
--- a/arch/powerpc/perf/power10-events-list.h
+++ b/arch/powerpc/perf/power10-events-list.h
@@ -15,6 +15,9 @@
 EVENT(PM_RUN_INST_CMPL,0x500fa);
 EVENT(PM_BR_CMPL,   0x4d05e);
 EVENT(PM_BR_MPRED_CMPL, 0x400f6);
+EVENT(PM_BR_FIN,   0x2f04a);
+EVENT(PM_MPRED_BR_FIN, 0x3e098);
+EVENT(PM_LD_DEMAND_MISS_L1_FIN,0x400f0);
 
 /* All L1 D cache load references counted at finish, gated by reject */
 EVENT(PM_LD_REF_L1,0x100fc);
diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c
index bc3d4dd..a02da69 100644
--- a/arch/powerpc/perf/power10-pmu.c
+++ b/arch/powerpc/perf/power10-pmu.c
@@ -114,6 +114,9 @@ static int power10_get_alternatives(u64 event, unsigned int 
flags, u64 alt[])
 GENERIC_EVENT_ATTR(cache-misses,   PM_LD_MISS_L1);
 GENERIC_EVENT_ATTR(mem-loads,  MEM_LOADS);
 GENERIC_EVENT_ATTR(mem-stores, MEM_STORES);
+GENERIC_EVENT_ATTR(branch-instructions,PM_BR_FIN);
+GENERIC_EVENT_ATTR(branch-misses,  PM_MPRED_BR_FIN);
+GENERIC_EVENT_ATTR(cache-misses,   PM_LD_DEMAND_MISS_L1_FIN);
 
 CACHE_EVENT_ATTR(L1-dcache-load-misses,PM_LD_MISS_L1);
 CACHE_EVENT_ATTR(L1-dcache-loads,  PM_LD_REF_L1);
@@ -157,10 +160,10 @@ static int power10_get_alternatives(u64 event, unsigned 
int flags, u64 alt[])
 static struct attribute *power10_events_attr[] = {
GENERIC_EVENT_PTR(PM_RUN_CYC),
GENERIC_EVENT_PTR(PM_RUN_INST_CMPL),
-   GENERIC_EVENT_PTR(PM_BR_CMPL),
-   GENERIC_EVENT_PTR(PM_BR_MPRED_CMPL),
+   GENERIC_EVENT_PTR(PM_BR_FIN),
+   GENERIC_EVENT_PTR(PM_MPRED_BR_FIN),
GENERIC_EVENT_PTR(PM_LD_REF_L1),
-   GENERIC_EVENT_PTR(PM_LD_MISS_L1),
+   GENERIC_EVENT_PTR(PM_LD_DEMAND_MISS_L1_FIN),
GENERIC_EVENT_PTR(MEM_LOADS),
GENERIC_EVENT_PTR(MEM_STORES),
CACHE_EVENT_PTR(PM_LD_MISS_L1),
@@ -259,10 +262,10 @@ static int power10_get_alternatives(u64 event, unsigned 
int flags, u64 alt[])
 static int power10_generic_events[] = {
[PERF_COUNT_HW_CPU_CYCLES] =PM_RUN_CYC,
[PERF_COUNT_HW_INSTRUCTIONS] =  PM_RUN_INST_CMPL,
-   [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] =   PM_BR_CMPL,
-   [PERF_COUNT_HW_BRANCH_MISSES] = PM_BR_MPRED_CMPL,
+   [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] =   PM_BR_FIN,
+   [PERF_COUNT_HW_BRANCH_MISSES] = PM_MPRED_BR_FIN,
[PERF_COUNT_HW_CACHE_REFERENCES] =  PM_LD_REF_L1,
-   [PERF_COUNT_HW_CACHE_MISSES] =  PM_LD_MISS_L1,
+   [PERF_COUNT_HW_CACHE_MISSES] =  
PM_LD_DEMAND_MISS_L1_FIN,
 };
 
 static u64 power10_bhrb_filter_map(u64 branch_sample_type)
-- 
1.8.3.1



[PATCH V2 3/7] powerpc/perf: Fix the PMU group constraints for threshold events in power10

2020-11-26 Thread Athira Rajeev
The PMU group constraints mask for threshold events covers
all thresholding bits which includes threshold control value
(start/stop), select value as well as thresh_cmp value (MMCRA[9:18].
In power9, thresh_cmp bits were part of the event code. But in case
of power10, thresh_cmp bits are not part of event code due to
inclusion of MMCR3 bits. Hence thresh_cmp is not valid for
group constraints for power10.

Fix the PMU group constraints checking for threshold events in
power10 by using constraint mask and value for only threshold control
and select bits.

Fixes: a64e697cef23 ("powerpc/perf: power10 Performance Monitoring support")
Signed-off-by: Athira Rajeev 
---
 arch/powerpc/perf/isa207-common.c | 7 ++-
 arch/powerpc/perf/isa207-common.h | 3 +++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/perf/isa207-common.c 
b/arch/powerpc/perf/isa207-common.c
index 38ed450c..0f4983e 100644
--- a/arch/powerpc/perf/isa207-common.c
+++ b/arch/powerpc/perf/isa207-common.c
@@ -351,7 +351,12 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, 
unsigned long *valp)
value |= CNST_SAMPLE_VAL(event >> EVENT_SAMPLE_SHIFT);
}
 
-   if (cpu_has_feature(CPU_FTR_ARCH_300))  {
+   if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+   if (event_is_threshold(event)) {
+   mask  |= CNST_THRESH_CTL_SEL_MASK;
+   value |= CNST_THRESH_CTL_SEL_VAL(event >> 
EVENT_THRESH_SHIFT);
+   }
+   } else if (cpu_has_feature(CPU_FTR_ARCH_300))  {
if (event_is_threshold(event) && is_thresh_cmp_valid(event)) {
mask  |= CNST_THRESH_MASK;
value |= CNST_THRESH_VAL(event >> EVENT_THRESH_SHIFT);
diff --git a/arch/powerpc/perf/isa207-common.h 
b/arch/powerpc/perf/isa207-common.h
index dc9c3d2..4208764 100644
--- a/arch/powerpc/perf/isa207-common.h
+++ b/arch/powerpc/perf/isa207-common.h
@@ -149,6 +149,9 @@
 #define CNST_THRESH_VAL(v) (((v) & EVENT_THRESH_MASK) << 32)
 #define CNST_THRESH_MASK   CNST_THRESH_VAL(EVENT_THRESH_MASK)
 
+#define CNST_THRESH_CTL_SEL_VAL(v) (((v) & 0x7ffull) << 32)
+#define CNST_THRESH_CTL_SEL_MASK   CNST_THRESH_CTL_SEL_VAL(0x7ff)
+
 #define CNST_EBB_VAL(v)(((v) & EVENT_EBB_MASK) << 24)
 #define CNST_EBB_MASK  CNST_EBB_VAL(EVENT_EBB_MASK)
 
-- 
1.8.3.1



[PATCH V2 0/7] powerpc/perf: Fixes for power10 PMU

2020-11-26 Thread Athira Rajeev
Patchset contains PMU fixes for power10.

This patchset contains 7 patches.
Patch1 includes fix to update event code with radix_scope_qual
bit in power10.
Patch2 and Patch3 updates the event group constraints for L2/L3
and threshold events in power10.
Patch4, patch5 and patch6 includes the event code changes for
l2/l3 events and some of the generic events.
Patch7 adds fixes for PMCCEXT bit in power10.

Changelog:
Changes from v1 -> v2
- Addressed Michael Ellerman's comments in the patchset.
  Split patch 2 to address l2l3 and threshold events
  group constraints fixes separately.
  Split Patch 3 also to address event code updates
  separately for generic and cache events.
  Fixed commit messages and also PMCCEXT bit setting
  during event enable.

Athira Rajeev (7):
  powerpc/perf: Fix to update radix_scope_qual in power10
  powerpc/perf: Update the PMU group constraints for l2l3 events in
power10
  powerpc/perf: Fix the PMU group constraints for threshold events in
power10
  powerpc/perf: Add generic and cache event list for power10 DD1
  powerpc/perf: Fix to update generic event codes for power10
  powerpc/perf: Fix to update cache events with l2l3 events in power10
  powerpc/perf: MMCR0 control for PMU registers under PMCC=00

 arch/powerpc/include/asm/reg.h  |   1 +
 arch/powerpc/kernel/cpu_setup_power.c   |   1 +
 arch/powerpc/kernel/dt_cpu_ftrs.c   |   1 +
 arch/powerpc/perf/core-book3s.c |   4 +
 arch/powerpc/perf/isa207-common.c   |  35 ++-
 arch/powerpc/perf/isa207-common.h   |  16 ++-
 arch/powerpc/perf/power10-events-list.h |   9 ++
 arch/powerpc/perf/power10-pmu.c | 178 ++--
 8 files changed, 231 insertions(+), 14 deletions(-)

-- 
1.8.3.1



[PATCH V2 2/7] powerpc/perf: Update the PMU group constraints for l2l3 events in power10

2020-11-26 Thread Athira Rajeev
In Power9, L2/L3 bus events are always available as a
"bank" of 4 events. To obtain the counts for any of the
l2/l3 bus events in a given bank, the user will have to
program PMC4 with corresponding l2/l3 bus event for that
bank.

Commit 59029136d750 ("powerpc/perf: Add constraints for power9 l2/l3 bus 
events")
enforced this rule in Power9. But this is not valid for
Power10, since in Power10 Monitor Mode Control Register2
(MMCR2) has bits to configure l2/l3 event bits. Hence remove
this PMC4 constraint check from power10.

Since the l2/l3 bits in MMCR2 are not per-pmc, patch handles
group constrints checks for l2/l3 bits in MMCR2.

Fixes: a64e697cef23 ("powerpc/perf: power10 Performance Monitoring support")
Signed-off-by: Athira Rajeev 
---
 arch/powerpc/perf/isa207-common.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/perf/isa207-common.c 
b/arch/powerpc/perf/isa207-common.c
index f57f54f..38ed450c 100644
--- a/arch/powerpc/perf/isa207-common.c
+++ b/arch/powerpc/perf/isa207-common.c
@@ -311,9 +311,11 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, 
unsigned long *valp)
}
 
if (unit >= 6 && unit <= 9) {
-   if (cpu_has_feature(CPU_FTR_ARCH_31) && (unit == 6)) {
-   mask |= CNST_L2L3_GROUP_MASK;
-   value |= CNST_L2L3_GROUP_VAL(event >> 
p10_L2L3_EVENT_SHIFT);
+   if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+   if (unit == 6) {
+   mask |= CNST_L2L3_GROUP_MASK;
+   value |= CNST_L2L3_GROUP_VAL(event >> 
p10_L2L3_EVENT_SHIFT);
+   }
} else if (cpu_has_feature(CPU_FTR_ARCH_300)) {
mask  |= CNST_CACHE_GROUP_MASK;
value |= CNST_CACHE_GROUP_VAL(event & 0xff);
-- 
1.8.3.1



[PATCH V2 1/7] powerpc/perf: Fix to update radix_scope_qual in power10

2020-11-26 Thread Athira Rajeev
power10 uses bit 9 of the raw event code as RADIX_SCOPE_QUAL.
This bit is used for enabling the radix process events.
Patch fixes the PMU counter support functions to program bit
18 of MMCR1 ( Monitor Mode Control Register1 ) with the
RADIX_SCOPE_QUAL bit value. Since this field is not per-pmc,
add this to PMU group constraints to make sure events in a
group will have same bit value for this field. Use bit 21 as
constraint bit field for radix_scope_qual. Patch also updates
the power10 raw event encoding layout information, format field
and constraints bit layout to include the radix_scope_qual bit.

Fixes: a64e697cef23 ("powerpc/perf: power10 Performance Monitoring support")
Signed-off-by: Athira Rajeev 
---
 arch/powerpc/perf/isa207-common.c | 12 
 arch/powerpc/perf/isa207-common.h | 13 ++---
 arch/powerpc/perf/power10-pmu.c   | 11 +++
 3 files changed, 29 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/perf/isa207-common.c 
b/arch/powerpc/perf/isa207-common.c
index 2848904..f57f54f 100644
--- a/arch/powerpc/perf/isa207-common.c
+++ b/arch/powerpc/perf/isa207-common.c
@@ -339,6 +339,11 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, 
unsigned long *valp)
value |= CNST_L1_QUAL_VAL(cache);
}
 
+   if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+   mask |= CNST_RADIX_SCOPE_GROUP_MASK;
+   value |= CNST_RADIX_SCOPE_GROUP_VAL(event >> 
p10_EVENT_RADIX_SCOPE_QUAL_SHIFT);
+   }
+
if (is_event_marked(event)) {
mask  |= CNST_SAMPLE_MASK;
value |= CNST_SAMPLE_VAL(event >> EVENT_SAMPLE_SHIFT);
@@ -456,6 +461,13 @@ int isa207_compute_mmcr(u64 event[], int n_ev,
}
}
 
+   /* Set RADIX_SCOPE_QUAL bit */
+   if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+   val = (event[i] >> p10_EVENT_RADIX_SCOPE_QUAL_SHIFT) &
+   p10_EVENT_RADIX_SCOPE_QUAL_MASK;
+   mmcr1 |= val << p10_MMCR1_RADIX_SCOPE_QUAL_SHIFT;
+   }
+
if (is_event_marked(event[i])) {
mmcra |= MMCRA_SAMPLE_ENABLE;
 
diff --git a/arch/powerpc/perf/isa207-common.h 
b/arch/powerpc/perf/isa207-common.h
index 7025de5..dc9c3d2 100644
--- a/arch/powerpc/perf/isa207-common.h
+++ b/arch/powerpc/perf/isa207-common.h
@@ -101,6 +101,9 @@
 #define p10_EVENT_CACHE_SEL_MASK   0x3ull
 #define p10_EVENT_MMCR3_MASK   0x7fffull
 #define p10_EVENT_MMCR3_SHIFT  45
+#define p10_EVENT_RADIX_SCOPE_QUAL_SHIFT   9
+#define p10_EVENT_RADIX_SCOPE_QUAL_MASK0x1
+#define p10_MMCR1_RADIX_SCOPE_QUAL_SHIFT   45
 
 #define p10_EVENT_VALID_MASK   \
((p10_SDAR_MODE_MASK   << p10_SDAR_MODE_SHIFT   |   \
@@ -112,6 +115,7 @@
(p9_EVENT_COMBINE_MASK << p9_EVENT_COMBINE_SHIFT)   |   \
(p10_EVENT_MMCR3_MASK  << p10_EVENT_MMCR3_SHIFT)|   \
(EVENT_MARKED_MASK << EVENT_MARKED_SHIFT)   |   \
+   (p10_EVENT_RADIX_SCOPE_QUAL_MASK << p10_EVENT_RADIX_SCOPE_QUAL_SHIFT)   
|   \
 EVENT_LINUX_MASK   |   \
EVENT_PSEL_MASK))
 /*
@@ -125,9 +129,9 @@
  *
  *2824201612 8 4   
  0
  * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - 
- - |
- *   [ ] |   [ ]   [  sample ]   [ ]   [6] [5]   [4] [3]   [2] 
[1]
- *|  || |
- *  BHRB IFM -*  || |  Count of events for 
each PMC.
+ *   [ ] |   [ ] |  [  sample ]   [ ]   [6] [5]   [4] [3]   
[2] [1]
+ *|  ||  |  |
+ *  BHRB IFM -*  ||  |*radix_scope  |  Count of events for 
each PMC.
  *  EBB -*| |p1, p2, p3, p4, p5, 
p6.
  *  L1 I/D qualifier -* |
  * nc - number of counters -*
@@ -165,6 +169,9 @@
 #define CNST_L2L3_GROUP_VAL(v) (((v) & 0x1full) << 55)
 #define CNST_L2L3_GROUP_MASK   CNST_L2L3_GROUP_VAL(0x1f)
 
+#define CNST_RADIX_SCOPE_GROUP_VAL(v)  (((v) & 0x1ull) << 21)
+#define CNST_RADIX_SCOPE_GROUP_MASKCNST_RADIX_SCOPE_GROUP_VAL(1)
+
 /*
  * For NC we are counting up to 4 events. This requires three bits, and we need
  * the fifth event to overflow and set the 4th bit. To achieve that we bias the
diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c
index a01e87f..88c5430 100644
--- a/arch/powerpc/perf/power10-pmu.c
+++ b/arch/powerpc/perf/power10-pmu.c
@@ -23,10 +23,10 @@
  *
  *2824201612 8 4   
  0
  * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | -

[PATCH V2] powerpc/perf: Exclude kernel samples while counting events in user space.

2020-11-24 Thread Athira Rajeev
Perf event attritube supports exclude_kernel flag
to avoid sampling/profiling in supervisor state (kernel).
Based on this event attr flag, Monitor Mode Control Register
bit is set to freeze on supervisor state. But sometime (due
to hardware limitation), Sampled Instruction Address
Register (SIAR) locks on to kernel address even when
freeze on supervisor is set. Patch here adds a check to
drop those samples.

Signed-off-by: Athira Rajeev 
---
Changes in v2:
- Initial patch was sent along with series:
  https://patchwork.ozlabs.org/project/linuxppc-dev/list/?series=209195
  Moving this patch as separate since this change is applicable
  for all PMU platforms.

 arch/powerpc/perf/core-book3s.c | 11 +++
 1 file changed, 11 insertions(+)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 08643cb..40aa117 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -2122,6 +2122,17 @@ static void record_and_restart(struct perf_event *event, 
unsigned long val,
perf_event_update_userpage(event);
 
/*
+* Due to hardware limitation, sometimes SIAR could
+* lock on to kernel address even with freeze on
+* supervisor state (kernel) is set in MMCR2.
+* Check attr.exclude_kernel and address
+* to drop the sample in these cases.
+*/
+   if (event->attr.exclude_kernel && record)
+   if (is_kernel_addr(mfspr(SPRN_SIAR)))
+   record = 0;
+
+   /*
 * Finally record data if requested.
 */
if (record) {
-- 
1.8.3.1



[PATCH V2] powerpc/perf: Fix crash with is_sier_available when pmu is not set

2020-11-23 Thread Athira Rajeev
On systems without any specific PMU driver support registered, running
'perf record' with —intr-regs  will crash ( perf record -I  ).

The relevant portion from crash logs and Call Trace:

Unable to handle kernel paging request for data at address 0x0068
Faulting instruction address: 0xc013eb18
Oops: Kernel access of bad area, sig: 11 [#1]
CPU: 2 PID: 13435 Comm: kill Kdump: loaded Not tainted 4.18.0-193.el8.ppc64le #1
NIP:  c013eb18 LR: c0139f2c CTR: c0393d80
REGS: c004a07ab4f0 TRAP: 0300   Not tainted  (4.18.0-193.el8.ppc64le)
NIP [c013eb18] is_sier_available+0x18/0x30
LR [c0139f2c] perf_reg_value+0x6c/0xb0
Call Trace:
[c004a07ab770] [c004a07ab7c8] 0xc004a07ab7c8 (unreliable)
[c004a07ab7a0] [c03aa77c] perf_output_sample+0x60c/0xac0
[c004a07ab840] [c03ab3f0] perf_event_output_forward+0x70/0xb0
[c004a07ab8c0] [c039e208] __perf_event_overflow+0x88/0x1a0
[c004a07ab910] [c039e42c] perf_swevent_hrtimer+0x10c/0x1d0
[c004a07abc50] [c0228b9c] __hrtimer_run_queues+0x17c/0x480
[c004a07abcf0] [c022aaf4] hrtimer_interrupt+0x144/0x520
[c004a07abdd0] [c002a864] timer_interrupt+0x104/0x2f0
[c004a07abe30] [c00091c4] decrementer_common+0x114/0x120

When perf record session is started with "-I" option, capturing registers
on each sample calls is_sier_available() to check for the
SIER (Sample Instruction Event Register) availability in the platform.
This function in core-book3s accesses 'ppmu->flags'. If a platform specific
PMU driver is not registered, ppmu is set to NULL and accessing its
members results in a crash. Fix the crash by returning false in
is_sier_available() if ppmu is not set.

Fixes: 333804dc3b7a ("powerpc/perf: Update perf_regs structure to include SIER")
Reported-by: Sachin Sant 
Signed-off-by: Athira Rajeev 
---
Changes in v2:
- Corrected the commit message as suggested by
  Michael Ellerman.

 arch/powerpc/perf/core-book3s.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 08643cb..1de4770 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -137,6 +137,9 @@ static void pmao_restore_workaround(bool ebb) { }
 
 bool is_sier_available(void)
 {
+   if (!ppmu)
+   return false;
+
if (ppmu->flags & PPMU_HAS_SIER)
return true;
 
-- 
1.8.3.1



Re: [PATCH v2 04/19] powerpc/perf: move perf irq/nmi handling details into traps.c

2020-11-23 Thread Athira Rajeev



> On 11-Nov-2020, at 3:13 PM, Nicholas Piggin  wrote:
> 
> This is required in order to allow more significant differences between
> NMI type interrupt handlers and regular asynchronous handlers.
> 
> Signed-off-by: Nicholas Piggin 
> ---
> arch/powerpc/kernel/traps.c  | 31 +++-
> arch/powerpc/perf/core-book3s.c  | 35 ++--
> arch/powerpc/perf/core-fsl-emb.c | 25 ---
> 3 files changed, 32 insertions(+), 59 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
> index 902fcbd1a778..7dda72eb97cc 100644
> --- a/arch/powerpc/kernel/traps.c
> +++ b/arch/powerpc/kernel/traps.c
> @@ -1919,11 +1919,40 @@ void vsx_unavailable_tm(struct pt_regs *regs)
> }
> #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
> 
> -void performance_monitor_exception(struct pt_regs *regs)
> +static void performance_monitor_exception_nmi(struct pt_regs *regs)
> +{
> + nmi_enter();
> +
> + __this_cpu_inc(irq_stat.pmu_irqs);
> +
> + perf_irq(regs);
> +
> + nmi_exit();
> +}
> +
> +static void performance_monitor_exception_async(struct pt_regs *regs)
> {
> + irq_enter();
> +
>   __this_cpu_inc(irq_stat.pmu_irqs);
> 
>   perf_irq(regs);
> +
> + irq_exit();
> +}
> +
> +void performance_monitor_exception(struct pt_regs *regs)
> +{
> + /*
> +  * On 64-bit, if perf interrupts hit in a local_irq_disable
> +  * (soft-masked) region, we consider them as NMIs. This is required to
> +  * prevent hash faults on user addresses when reading callchains (and
> +  * looks better from an irq tracing perspective).
> +  */
> + if (IS_ENABLED(CONFIG_PPC64) && unlikely(arch_irq_disabled_regs(regs)))
> + performance_monitor_exception_nmi(regs);
> + else
> + performance_monitor_exception_async(regs);
> }
> 
> #ifdef CONFIG_PPC_ADV_DEBUG_REGS
> diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
> index 08643cba1494..9fd8cae09218 100644
> --- a/arch/powerpc/perf/core-book3s.c
> +++ b/arch/powerpc/perf/core-book3s.c
> @@ -109,10 +109,6 @@ static inline void perf_read_regs(struct pt_regs *regs)
> {
>   regs->result = 0;
> }
> -static inline int perf_intr_is_nmi(struct pt_regs *regs)
> -{
> - return 0;
> -}
> 
> static inline int siar_valid(struct pt_regs *regs)
> {
> @@ -328,15 +324,6 @@ static inline void perf_read_regs(struct pt_regs *regs)
>   regs->result = use_siar;
> }
> 
> -/*
> - * If interrupts were soft-disabled when a PMU interrupt occurs, treat
> - * it as an NMI.
> - */
> -static inline int perf_intr_is_nmi(struct pt_regs *regs)
> -{
> - return (regs->softe & IRQS_DISABLED);
> -}
> -

Hi Nick,

arch_irq_disabled_regs checks the regs->softe value, if it has IRQS_DISABLED 
set.
Core-book3s is also using same logic in perf_intr_is_nmi to check if it is an 
NMI. With the
changes in this patch, if I understood correctly, we will do the irq/nmi 
handling in traps.c 
rather than doing it in the PMI interrupt handler.  But can you please help to 
understand
better on what is the perf weirdness (sometimes NMI, sometimes not) mentioned 
in the cover
letter that we are fixing with this change ?

Thanks
Athira

> /*
>  * On processors like P7+ that have the SIAR-Valid bit, marked instructions
>  * must be sampled only if the SIAR-valid bit is set.
> @@ -2224,7 +2211,6 @@ static void __perf_event_interrupt(struct pt_regs *regs)
>   struct perf_event *event;
>   unsigned long val[8];
>   int found, active;
> - int nmi;
> 
>   if (cpuhw->n_limited)
>   freeze_limited_counters(cpuhw, mfspr(SPRN_PMC5),
> @@ -2232,18 +2218,6 @@ static void __perf_event_interrupt(struct pt_regs 
> *regs)
> 
>   perf_read_regs(regs);
> 
> - /*
> -  * If perf interrupts hit in a local_irq_disable (soft-masked) region,
> -  * we consider them as NMIs. This is required to prevent hash faults on
> -  * user addresses when reading callchains. See the NMI test in
> -  * do_hash_page.
> -  */
> - nmi = perf_intr_is_nmi(regs);
> - if (nmi)
> - nmi_enter();
> - else
> - irq_enter();
> -
>   /* Read all the PMCs since we'll need them a bunch of times */
>   for (i = 0; i < ppmu->n_counter; ++i)
>   val[i] = read_pmc(i + 1);
> @@ -2289,8 +2263,8 @@ static void __perf_event_interrupt(struct pt_regs *regs)
>   }
>   }
>   }
> - if (!found && !nmi && printk_ratelimit())
> - printk(KERN_WARNING "Can't find PMC that caused IRQ\n");
> + if (unlikely(!found) && !arch_irq_disabled_regs(regs))
> + printk_ratelimited(KERN_WARNING "Can't find PMC that caused 
> IRQ\n");
> 
>   /*
>* Reset MMCR0 to its normal value.  This will set PMXE and
> @@ -2300,11 +2274,6 @@ static void __perf_event_interrupt(struct pt_regs 
> *regs)
>* we get back out of this interrupt.
>*/
>   

Re: [PATCH] powerpc/perf: Fix crash with 'is_sier_available' when pmu is not set

2020-11-23 Thread Athira Rajeev



> On 23-Nov-2020, at 4:49 PM, Michael Ellerman  wrote:
> 
> Hi Athira,
> 
> Athira Rajeev  writes:
>> On systems without any platform specific PMU driver support registered or
>> Generic Compat PMU support registered,
> 
> The compat PMU is registered just like other PMUs, so I don't see how we
> can crash like this if the compat PMU is active?
> 
> ie. if we're using the compat PMU then ppmu will be non-NULL and point
> to generic_compat_pmu.

Hi Michael,

Thanks for checking the patch.

Crash happens on systems which neither has compat PMU support registered nor 
has Platform specific PMU. This happens when the distro do not have either the 
PMU 
driver support for that platform or the generic "compat-mode" performance 
monitoring
driver support. 

So in such cases since compat PMU is in-active, ppmu is not set and
results in crash. Sorry for the confusion with my first line. I will correct it.

> 
>> running 'perf record' with
>> —intr-regs  will crash ( perf record -I  ).
>> 
>> The relevant portion from crash logs and Call Trace:
>> 
>> Unable to handle kernel paging request for data at address 0x0068
>> Faulting instruction address: 0xc013eb18
>> Oops: Kernel access of bad area, sig: 11 [#1]
>> CPU: 2 PID: 13435 Comm: kill Kdump: loaded Not tainted 
>> 4.18.0-193.el8.ppc64le #1
>> NIP:  c013eb18 LR: c0139f2c CTR: c0393d80
>> REGS: c004a07ab4f0 TRAP: 0300   Not tainted  (4.18.0-193.el8.ppc64le)
>> NIP [c013eb18] is_sier_available+0x18/0x30
>> LR [c0139f2c] perf_reg_value+0x6c/0xb0
>> Call Trace:
>> [c004a07ab770] [c004a07ab7c8] 0xc004a07ab7c8 (unreliable)
>> [c004a07ab7a0] [c03aa77c] perf_output_sample+0x60c/0xac0
>> [c004a07ab840] [c03ab3f0] perf_event_output_forward+0x70/0xb0
>> [c004a07ab8c0] [c039e208] __perf_event_overflow+0x88/0x1a0
>> [c004a07ab910] [c039e42c] perf_swevent_hrtimer+0x10c/0x1d0
>> [c004a07abc50] [c0228b9c] __hrtimer_run_queues+0x17c/0x480
>> [c004a07abcf0] [c022aaf4] hrtimer_interrupt+0x144/0x520
>> [c004a07abdd0] [c002a864] timer_interrupt+0x104/0x2f0
>> [c004a07abe30] [c00091c4] decrementer_common+0x114/0x120
>> 
>> When perf record session started with "-I" option, capture registers
>  ^
>  is
> 
>> via intr-regs,
> 
> "intr-regs" is just the full name for the -I option, so that kind of
> repeats itself.
> 
>> on each sample ‘is_sier_available()'i is called to check
>  ^
>  extra i
> 
> The single quotes around is_sier_available() aren't necessary IMO.
> 
>> for the SIER ( Sample Instruction Event Register) availability in the
>^
>stray space
>> platform. This function in core-book3s access 'ppmu->flags'. If platform
>   ^ ^
>   esa
>> specific pmu driver is not registered, ppmu is set to null and accessing
>   ^^
>   PMU  NULL
>> its members results in crash. Patch fixes this by returning false in
>^
>a
>> 'is_sier_available()' if 'ppmu' is not set.
> 
> Use the imperative mood for the last sentence which says what the patch
> does:
> 
>  Fix the crash by returning false in is_sier_available() if ppmu is not set.

Sure,  I will make all these changes as suggested.

Thanks
Athira
> 
> 
>> Fixes: 333804dc3b7a ("powerpc/perf: Update perf_regs structure to include 
>> SIER")
>> Reported-by: Sachin Sant 
>> Signed-off-by: Athira Rajeev 
>> ---
>> arch/powerpc/perf/core-book3s.c | 3 +++
>> 1 file changed, 3 insertions(+)
>> 
>> diff --git a/arch/powerpc/perf/core-book3s.c 
>> b/arch/powerpc/perf/core-book3s.c
>> index 08643cb..1de4770 100644
>> --- a/arch/powerpc/perf/core-book3s.c
>> +++ b/arch/powerpc/perf/core-book3s.c
>> @@ -137,6 +137,9 @@ static void pmao_restore_workaround(bool ebb) { }
>> 
>> bool is_sier_available(void)
>> {
>> +if (!ppmu)
>> +return false;
>> +
>>  if (ppmu->flags & PPMU_HAS_SIER)
>>  return true;
>> 
>> -- 
>> 1.8.3.1
> 
> 
> cheers



[PATCH] powerpc/perf: Fix crash with 'is_sier_available' when pmu is not set

2020-11-23 Thread Athira Rajeev
On systems without any platform specific PMU driver support registered or
Generic Compat PMU support registered, running 'perf record' with
—intr-regs  will crash ( perf record -I  ).

The relevant portion from crash logs and Call Trace:

Unable to handle kernel paging request for data at address 0x0068
Faulting instruction address: 0xc013eb18
Oops: Kernel access of bad area, sig: 11 [#1]
CPU: 2 PID: 13435 Comm: kill Kdump: loaded Not tainted 4.18.0-193.el8.ppc64le #1
NIP:  c013eb18 LR: c0139f2c CTR: c0393d80
REGS: c004a07ab4f0 TRAP: 0300   Not tainted  (4.18.0-193.el8.ppc64le)
NIP [c013eb18] is_sier_available+0x18/0x30
LR [c0139f2c] perf_reg_value+0x6c/0xb0
Call Trace:
[c004a07ab770] [c004a07ab7c8] 0xc004a07ab7c8 (unreliable)
[c004a07ab7a0] [c03aa77c] perf_output_sample+0x60c/0xac0
[c004a07ab840] [c03ab3f0] perf_event_output_forward+0x70/0xb0
[c004a07ab8c0] [c039e208] __perf_event_overflow+0x88/0x1a0
[c004a07ab910] [c039e42c] perf_swevent_hrtimer+0x10c/0x1d0
[c004a07abc50] [c0228b9c] __hrtimer_run_queues+0x17c/0x480
[c004a07abcf0] [c022aaf4] hrtimer_interrupt+0x144/0x520
[c004a07abdd0] [c002a864] timer_interrupt+0x104/0x2f0
[c004a07abe30] [c00091c4] decrementer_common+0x114/0x120

When perf record session started with "-I" option, capture registers
via intr-regs, on each sample ‘is_sier_available()'i is called to check
for the SIER ( Sample Instruction Event Register) availability in the
platform. This function in core-book3s access 'ppmu->flags'. If platform
specific pmu driver is not registered, ppmu is set to null and accessing
its members results in crash. Patch fixes this by returning false in
'is_sier_available()' if 'ppmu' is not set.

Fixes: 333804dc3b7a ("powerpc/perf: Update perf_regs structure to include SIER")
Reported-by: Sachin Sant 
Signed-off-by: Athira Rajeev 
---
 arch/powerpc/perf/core-book3s.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 08643cb..1de4770 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -137,6 +137,9 @@ static void pmao_restore_workaround(bool ebb) { }
 
 bool is_sier_available(void)
 {
+   if (!ppmu)
+   return false;
+
if (ppmu->flags & PPMU_HAS_SIER)
return true;
 
-- 
1.8.3.1



Re: [PATCH 3/4] powerpc/perf: Fix to update l2l3 events and generic event codes for power10

2020-11-17 Thread Athira Rajeev



> On 18-Nov-2020, at 10:06 AM, Michael Ellerman  wrote:
> 
> Athira Rajeev  writes:
>> Fix the event code for events: branch-instructions (to PM_BR_FIN),
>> branch-misses (to PM_BR_MPRED_FIN) and cache-misses (to
>> PM_LD_DEMAND_MISS_L1_FIN) for power10 PMU. Update the
>> list of generic events with this modified event code.
> 
> That should be one patch.

Ok, 
> 
>> Export l2l3 events (PM_L2_ST_MISS and PM_L2_ST) and LLC-prefetches
>> (PM_L3_PF_MISS_L3) via sysfs, and also add these to cache_events.
> 
> That should be another patch.

Ok, 
> 
>> To maintain the current event code work with DD1, rename
>> existing array of generic_events, cache_events and pmu_attr_groups
>> with suffix _dd1. Update the power10 pmu init code to pick the
>> dd1 list while registering the power PMU, based on the pvr
>> (Processor Version Register) value.
> 
> And that should be a third patch.
> 

Ok, I will make these changes in the next version

Thanks
Athira
> cheers
> 
>> diff --git a/arch/powerpc/perf/power10-events-list.h 
>> b/arch/powerpc/perf/power10-events-list.h
>> index 60c1b81..9e0b3c9 100644
>> --- a/arch/powerpc/perf/power10-events-list.h
>> +++ b/arch/powerpc/perf/power10-events-list.h
>> @@ -15,6 +15,9 @@
>> EVENT(PM_RUN_INST_CMPL,  0x500fa);
>> EVENT(PM_BR_CMPL,   0x4d05e);
>> EVENT(PM_BR_MPRED_CMPL, 0x400f6);
>> +EVENT(PM_BR_FIN,0x2f04a);
>> +EVENT(PM_BR_MPRED_FIN,  0x35884);
>> +EVENT(PM_LD_DEMAND_MISS_L1_FIN, 0x400f0);
>> 
>> /* All L1 D cache load references counted at finish, gated by reject */
>> EVENT(PM_LD_REF_L1,  0x100fc);
>> @@ -36,6 +39,12 @@
>> EVENT(PM_DATA_FROM_L3,   0x0134001c040);
>> /* Demand LD - L3 Miss (not L2 hit and not L3 hit) */
>> EVENT(PM_DATA_FROM_L3MISS,   0x300fe);
>> +/* All successful D-side store dispatches for this thread */
>> +EVENT(PM_L2_ST, 0x01046080);
>> +/* All successful D-side store dispatches for this thread that were L2 Miss 
>> */
>> +EVENT(PM_L2_ST_MISS,0x26880);
>> +/* Total HW L3 prefetches(Load+store) */
>> +EVENT(PM_L3_PF_MISS_L3, 0x10016080);
>> /* Data PTEG reload */
>> EVENT(PM_DTLB_MISS,  0x300fc);
>> /* ITLB Reloaded */
>> diff --git a/arch/powerpc/perf/power10-pmu.c 
>> b/arch/powerpc/perf/power10-pmu.c
>> index cf44fb7..86665ad 100644
>> --- a/arch/powerpc/perf/power10-pmu.c
>> +++ b/arch/powerpc/perf/power10-pmu.c
>> @@ -114,6 +114,9 @@ static int power10_get_alternatives(u64 event, unsigned 
>> int flags, u64 alt[])
>> GENERIC_EVENT_ATTR(cache-misses, PM_LD_MISS_L1);
>> GENERIC_EVENT_ATTR(mem-loads,MEM_LOADS);
>> GENERIC_EVENT_ATTR(mem-stores,   MEM_STORES);
>> +GENERIC_EVENT_ATTR(branch-instructions, PM_BR_FIN);
>> +GENERIC_EVENT_ATTR(branch-misses,   PM_BR_MPRED_FIN);
>> +GENERIC_EVENT_ATTR(cache-misses,PM_LD_DEMAND_MISS_L1_FIN);
>> 
>> CACHE_EVENT_ATTR(L1-dcache-load-misses,  PM_LD_MISS_L1);
>> CACHE_EVENT_ATTR(L1-dcache-loads,PM_LD_REF_L1);
>> @@ -124,12 +127,15 @@ static int power10_get_alternatives(u64 event, 
>> unsigned int flags, u64 alt[])
>> CACHE_EVENT_ATTR(L1-icache-prefetches,   PM_IC_PREF_REQ);
>> CACHE_EVENT_ATTR(LLC-load-misses,PM_DATA_FROM_L3MISS);
>> CACHE_EVENT_ATTR(LLC-loads,  PM_DATA_FROM_L3);
>> +CACHE_EVENT_ATTR(LLC-prefetches,PM_L3_PF_MISS_L3);
>> +CACHE_EVENT_ATTR(LLC-store-misses,  PM_L2_ST_MISS);
>> +CACHE_EVENT_ATTR(LLC-stores,PM_L2_ST);
>> CACHE_EVENT_ATTR(branch-load-misses, PM_BR_MPRED_CMPL);
>> CACHE_EVENT_ATTR(branch-loads,   PM_BR_CMPL);
>> CACHE_EVENT_ATTR(dTLB-load-misses,   PM_DTLB_MISS);
>> CACHE_EVENT_ATTR(iTLB-load-misses,   PM_ITLB_MISS);
>> 
>> -static struct attribute *power10_events_attr[] = {
>> +static struct attribute *power10_events_attr_dd1[] = {
>>  GENERIC_EVENT_PTR(PM_RUN_CYC),
>>  GENERIC_EVENT_PTR(PM_RUN_INST_CMPL),
>>  GENERIC_EVENT_PTR(PM_BR_CMPL),
>> @@ -154,11 +160,44 @@ static int power10_get_alternatives(u64 event, 
>> unsigned int flags, u64 alt[])
>>  NULL
>> };
>> 
>> 

Re: [PATCH 2/4] powerpc/perf: Update the PMU group constraints for l2l3 and threshold events in power10

2020-11-17 Thread Athira Rajeev



> On 18-Nov-2020, at 10:02 AM, Michael Ellerman  wrote:
> 
> Athira Rajeev  writes:
>> In Power9, L2/L3 bus events are always available as a
>> "bank" of 4 events. To obtain the counts for any of the
>> l2/l3 bus events in a given bank, the user will have to
>> program PMC4 with corresponding l2/l3 bus event for that
>> bank.
>> 
>> Commit 59029136d750 ("powerpc/perf: Add constraints for power9 l2/l3 bus 
>> events")
>> enforced this rule in Power9. But this is not valid for
>> Power10, since in Power10 Monitor Mode Control Register2
>> (MMCR2) has bits to configure l2/l3 event bits. Hence remove
>> this PMC4 constraint check from power10.
>> 
>> Since the l2/l3 bits in MMCR2 are not per-pmc, patch handles
>> group constrints checks for l2/l3 bits in MMCR2.
> 
>> Patch also updates constraints for threshold events in power10.
> 
> That should be done in a separate patch please.

Thanks mpe for checking the patch set.
 
Sure, 
I will make threshold constraint changes as a separate patch and send next 
version


> 
> cheers



[PATCH 2/4] powerpc/perf: Update the PMU group constraints for l2l3 and threshold events in power10

2020-11-10 Thread Athira Rajeev
In Power9, L2/L3 bus events are always available as a
"bank" of 4 events. To obtain the counts for any of the
l2/l3 bus events in a given bank, the user will have to
program PMC4 with corresponding l2/l3 bus event for that
bank.

Commit 59029136d750 ("powerpc/perf: Add constraints for power9 l2/l3 bus 
events")
enforced this rule in Power9. But this is not valid for
Power10, since in Power10 Monitor Mode Control Register2
(MMCR2) has bits to configure l2/l3 event bits. Hence remove
this PMC4 constraint check from power10.

Since the l2/l3 bits in MMCR2 are not per-pmc, patch handles
group constrints checks for l2/l3 bits in MMCR2.
Patch also updates constraints for threshold events in power10.

Fixes: a64e697cef23 ("powerpc/perf: power10 Performance Monitoring support")
Signed-off-by: Athira Rajeev 
---
 arch/powerpc/perf/isa207-common.c | 15 +++
 arch/powerpc/perf/isa207-common.h |  3 +++
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/perf/isa207-common.c 
b/arch/powerpc/perf/isa207-common.c
index f57f54f..0f4983e 100644
--- a/arch/powerpc/perf/isa207-common.c
+++ b/arch/powerpc/perf/isa207-common.c
@@ -311,9 +311,11 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, 
unsigned long *valp)
}
 
if (unit >= 6 && unit <= 9) {
-   if (cpu_has_feature(CPU_FTR_ARCH_31) && (unit == 6)) {
-   mask |= CNST_L2L3_GROUP_MASK;
-   value |= CNST_L2L3_GROUP_VAL(event >> 
p10_L2L3_EVENT_SHIFT);
+   if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+   if (unit == 6) {
+   mask |= CNST_L2L3_GROUP_MASK;
+   value |= CNST_L2L3_GROUP_VAL(event >> 
p10_L2L3_EVENT_SHIFT);
+   }
} else if (cpu_has_feature(CPU_FTR_ARCH_300)) {
mask  |= CNST_CACHE_GROUP_MASK;
value |= CNST_CACHE_GROUP_VAL(event & 0xff);
@@ -349,7 +351,12 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, 
unsigned long *valp)
value |= CNST_SAMPLE_VAL(event >> EVENT_SAMPLE_SHIFT);
}
 
-   if (cpu_has_feature(CPU_FTR_ARCH_300))  {
+   if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+   if (event_is_threshold(event)) {
+   mask  |= CNST_THRESH_CTL_SEL_MASK;
+   value |= CNST_THRESH_CTL_SEL_VAL(event >> 
EVENT_THRESH_SHIFT);
+   }
+   } else if (cpu_has_feature(CPU_FTR_ARCH_300))  {
if (event_is_threshold(event) && is_thresh_cmp_valid(event)) {
mask  |= CNST_THRESH_MASK;
value |= CNST_THRESH_VAL(event >> EVENT_THRESH_SHIFT);
diff --git a/arch/powerpc/perf/isa207-common.h 
b/arch/powerpc/perf/isa207-common.h
index dc9c3d2..4208764 100644
--- a/arch/powerpc/perf/isa207-common.h
+++ b/arch/powerpc/perf/isa207-common.h
@@ -149,6 +149,9 @@
 #define CNST_THRESH_VAL(v) (((v) & EVENT_THRESH_MASK) << 32)
 #define CNST_THRESH_MASK   CNST_THRESH_VAL(EVENT_THRESH_MASK)
 
+#define CNST_THRESH_CTL_SEL_VAL(v) (((v) & 0x7ffull) << 32)
+#define CNST_THRESH_CTL_SEL_MASK   CNST_THRESH_CTL_SEL_VAL(0x7ff)
+
 #define CNST_EBB_VAL(v)(((v) & EVENT_EBB_MASK) << 24)
 #define CNST_EBB_MASK  CNST_EBB_VAL(EVENT_EBB_MASK)
 
-- 
1.8.3.1



[PATCH 3/4] powerpc/perf: Fix to update l2l3 events and generic event codes for power10

2020-11-10 Thread Athira Rajeev
Fix the event code for events: branch-instructions (to PM_BR_FIN),
branch-misses (to PM_BR_MPRED_FIN) and cache-misses (to
PM_LD_DEMAND_MISS_L1_FIN) for power10 PMU. Update the
list of generic events with this modified event code.
Export l2l3 events (PM_L2_ST_MISS and PM_L2_ST) and LLC-prefetches
(PM_L3_PF_MISS_L3) via sysfs, and also add these to cache_events.

To maintain the current event code work with DD1, rename
existing array of generic_events, cache_events and pmu_attr_groups
with suffix _dd1. Update the power10 pmu init code to pick the
dd1 list while registering the power PMU, based on the pvr
(Processor Version Register) value.

Signed-off-by: Athira Rajeev 
---
 arch/powerpc/perf/power10-events-list.h |   9 ++
 arch/powerpc/perf/power10-pmu.c | 166 +++-
 2 files changed, 173 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/perf/power10-events-list.h 
b/arch/powerpc/perf/power10-events-list.h
index 60c1b81..9e0b3c9 100644
--- a/arch/powerpc/perf/power10-events-list.h
+++ b/arch/powerpc/perf/power10-events-list.h
@@ -15,6 +15,9 @@
 EVENT(PM_RUN_INST_CMPL,0x500fa);
 EVENT(PM_BR_CMPL,   0x4d05e);
 EVENT(PM_BR_MPRED_CMPL, 0x400f6);
+EVENT(PM_BR_FIN,   0x2f04a);
+EVENT(PM_BR_MPRED_FIN, 0x35884);
+EVENT(PM_LD_DEMAND_MISS_L1_FIN,0x400f0);
 
 /* All L1 D cache load references counted at finish, gated by reject */
 EVENT(PM_LD_REF_L1,0x100fc);
@@ -36,6 +39,12 @@
 EVENT(PM_DATA_FROM_L3, 0x0134001c040);
 /* Demand LD - L3 Miss (not L2 hit and not L3 hit) */
 EVENT(PM_DATA_FROM_L3MISS, 0x300fe);
+/* All successful D-side store dispatches for this thread */
+EVENT(PM_L2_ST,0x01046080);
+/* All successful D-side store dispatches for this thread that were L2 Miss */
+EVENT(PM_L2_ST_MISS,   0x26880);
+/* Total HW L3 prefetches(Load+store) */
+EVENT(PM_L3_PF_MISS_L3,0x10016080);
 /* Data PTEG reload */
 EVENT(PM_DTLB_MISS,0x300fc);
 /* ITLB Reloaded */
diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c
index cf44fb7..86665ad 100644
--- a/arch/powerpc/perf/power10-pmu.c
+++ b/arch/powerpc/perf/power10-pmu.c
@@ -114,6 +114,9 @@ static int power10_get_alternatives(u64 event, unsigned int 
flags, u64 alt[])
 GENERIC_EVENT_ATTR(cache-misses,   PM_LD_MISS_L1);
 GENERIC_EVENT_ATTR(mem-loads,  MEM_LOADS);
 GENERIC_EVENT_ATTR(mem-stores, MEM_STORES);
+GENERIC_EVENT_ATTR(branch-instructions, PM_BR_FIN);
+GENERIC_EVENT_ATTR(branch-misses,   PM_BR_MPRED_FIN);
+GENERIC_EVENT_ATTR(cache-misses,   PM_LD_DEMAND_MISS_L1_FIN);
 
 CACHE_EVENT_ATTR(L1-dcache-load-misses,PM_LD_MISS_L1);
 CACHE_EVENT_ATTR(L1-dcache-loads,  PM_LD_REF_L1);
@@ -124,12 +127,15 @@ static int power10_get_alternatives(u64 event, unsigned 
int flags, u64 alt[])
 CACHE_EVENT_ATTR(L1-icache-prefetches, PM_IC_PREF_REQ);
 CACHE_EVENT_ATTR(LLC-load-misses,  PM_DATA_FROM_L3MISS);
 CACHE_EVENT_ATTR(LLC-loads,PM_DATA_FROM_L3);
+CACHE_EVENT_ATTR(LLC-prefetches,   PM_L3_PF_MISS_L3);
+CACHE_EVENT_ATTR(LLC-store-misses, PM_L2_ST_MISS);
+CACHE_EVENT_ATTR(LLC-stores,   PM_L2_ST);
 CACHE_EVENT_ATTR(branch-load-misses,   PM_BR_MPRED_CMPL);
 CACHE_EVENT_ATTR(branch-loads, PM_BR_CMPL);
 CACHE_EVENT_ATTR(dTLB-load-misses, PM_DTLB_MISS);
 CACHE_EVENT_ATTR(iTLB-load-misses, PM_ITLB_MISS);
 
-static struct attribute *power10_events_attr[] = {
+static struct attribute *power10_events_attr_dd1[] = {
GENERIC_EVENT_PTR(PM_RUN_CYC),
GENERIC_EVENT_PTR(PM_RUN_INST_CMPL),
GENERIC_EVENT_PTR(PM_BR_CMPL),
@@ -154,11 +160,44 @@ static int power10_get_alternatives(u64 event, unsigned 
int flags, u64 alt[])
NULL
 };
 
+static struct attribute *power10_events_attr[] = {
+   GENERIC_EVENT_PTR(PM_RUN_CYC),
+   GENERIC_EVENT_PTR(PM_RUN_INST_CMPL),
+   GENERIC_EVENT_PTR(PM_BR_FIN),
+   GENERIC_EVENT_PTR(PM_BR_MPRED_FIN),
+   GENERIC_EVENT_PTR(PM_LD_REF_L1),
+   GENERIC_EVENT_PTR(PM_LD_DEMAND_MISS_L1_FIN),
+   GENERIC_EVENT_PTR(MEM_LOADS),
+   GENERIC_EVENT_PTR(MEM_STORES),
+   CACHE_EVENT_PTR(PM_LD_MISS_L1),
+   CACHE_EVENT_PTR(PM_LD_REF_L1),
+   CACHE_EVENT_PTR(PM_LD_PREFETCH_CACHE_LINE_MISS),
+   CACHE_EVENT_PTR(PM_ST_MISS_L1),
+   CACHE_EVENT_PTR(PM_L1_ICACHE_MISS),
+   CACHE_EVENT_PTR(PM_INST_FROM_L1),
+   CACHE_EVENT_PTR(PM_IC_PREF_REQ),
+   CACHE_EVENT_PTR(PM_DATA_FROM_L3MISS),
+   CACHE_EVENT_PTR(PM_DATA_FROM_L3

[PATCH 1/4] powerpc/perf: Fix to update radix_scope_qual in power10

2020-11-10 Thread Athira Rajeev
power10 uses bit 9 of the raw event code as RADIX_SCOPE_QUAL.
This bit is used for enabling the radix process events.
Patch fixes the PMU counter support functions to program bit
18 of MMCR1 ( Monitor Mode Control Register1 ) with the
RADIX_SCOPE_QUAL bit value. Since this field is not per-pmc,
add this to PMU group constraints to make sure events in a
group will have same bit value for this field. Use bit 21 as
constraint bit field for radix_scope_qual. Patch also updates
the power10 raw event encoding layout information, format field
and constraints bit layout to include the radix_scope_qual bit.

Fixes: a64e697cef23 ("powerpc/perf: power10 Performance Monitoring support")
Signed-off-by: Athira Rajeev 
---
 arch/powerpc/perf/isa207-common.c | 12 
 arch/powerpc/perf/isa207-common.h | 13 ++---
 arch/powerpc/perf/power10-pmu.c   | 11 +++
 3 files changed, 29 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/perf/isa207-common.c 
b/arch/powerpc/perf/isa207-common.c
index 2848904..f57f54f 100644
--- a/arch/powerpc/perf/isa207-common.c
+++ b/arch/powerpc/perf/isa207-common.c
@@ -339,6 +339,11 @@ int isa207_get_constraint(u64 event, unsigned long *maskp, 
unsigned long *valp)
value |= CNST_L1_QUAL_VAL(cache);
}
 
+   if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+   mask |= CNST_RADIX_SCOPE_GROUP_MASK;
+   value |= CNST_RADIX_SCOPE_GROUP_VAL(event >> 
p10_EVENT_RADIX_SCOPE_QUAL_SHIFT);
+   }
+
if (is_event_marked(event)) {
mask  |= CNST_SAMPLE_MASK;
value |= CNST_SAMPLE_VAL(event >> EVENT_SAMPLE_SHIFT);
@@ -456,6 +461,13 @@ int isa207_compute_mmcr(u64 event[], int n_ev,
}
}
 
+   /* Set RADIX_SCOPE_QUAL bit */
+   if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+   val = (event[i] >> p10_EVENT_RADIX_SCOPE_QUAL_SHIFT) &
+   p10_EVENT_RADIX_SCOPE_QUAL_MASK;
+   mmcr1 |= val << p10_MMCR1_RADIX_SCOPE_QUAL_SHIFT;
+   }
+
if (is_event_marked(event[i])) {
mmcra |= MMCRA_SAMPLE_ENABLE;
 
diff --git a/arch/powerpc/perf/isa207-common.h 
b/arch/powerpc/perf/isa207-common.h
index 7025de5..dc9c3d2 100644
--- a/arch/powerpc/perf/isa207-common.h
+++ b/arch/powerpc/perf/isa207-common.h
@@ -101,6 +101,9 @@
 #define p10_EVENT_CACHE_SEL_MASK   0x3ull
 #define p10_EVENT_MMCR3_MASK   0x7fffull
 #define p10_EVENT_MMCR3_SHIFT  45
+#define p10_EVENT_RADIX_SCOPE_QUAL_SHIFT   9
+#define p10_EVENT_RADIX_SCOPE_QUAL_MASK0x1
+#define p10_MMCR1_RADIX_SCOPE_QUAL_SHIFT   45
 
 #define p10_EVENT_VALID_MASK   \
((p10_SDAR_MODE_MASK   << p10_SDAR_MODE_SHIFT   |   \
@@ -112,6 +115,7 @@
(p9_EVENT_COMBINE_MASK << p9_EVENT_COMBINE_SHIFT)   |   \
(p10_EVENT_MMCR3_MASK  << p10_EVENT_MMCR3_SHIFT)|   \
(EVENT_MARKED_MASK << EVENT_MARKED_SHIFT)   |   \
+   (p10_EVENT_RADIX_SCOPE_QUAL_MASK << p10_EVENT_RADIX_SCOPE_QUAL_SHIFT)   
|   \
 EVENT_LINUX_MASK   |   \
EVENT_PSEL_MASK))
 /*
@@ -125,9 +129,9 @@
  *
  *2824201612 8 4   
  0
  * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - 
- - |
- *   [ ] |   [ ]   [  sample ]   [ ]   [6] [5]   [4] [3]   [2] 
[1]
- *|  || |
- *  BHRB IFM -*  || |  Count of events for 
each PMC.
+ *   [ ] |   [ ] |  [  sample ]   [ ]   [6] [5]   [4] [3]   
[2] [1]
+ *|  ||  |  |
+ *  BHRB IFM -*  ||  |*radix_scope  |  Count of events for 
each PMC.
  *  EBB -*| |p1, p2, p3, p4, p5, 
p6.
  *  L1 I/D qualifier -* |
  * nc - number of counters -*
@@ -165,6 +169,9 @@
 #define CNST_L2L3_GROUP_VAL(v) (((v) & 0x1full) << 55)
 #define CNST_L2L3_GROUP_MASK   CNST_L2L3_GROUP_VAL(0x1f)
 
+#define CNST_RADIX_SCOPE_GROUP_VAL(v)  (((v) & 0x1ull) << 21)
+#define CNST_RADIX_SCOPE_GROUP_MASKCNST_RADIX_SCOPE_GROUP_VAL(1)
+
 /*
  * For NC we are counting up to 4 events. This requires three bits, and we need
  * the fifth event to overflow and set the 4th bit. To achieve that we bias the
diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c
index 9dbe8f9..cf44fb7 100644
--- a/arch/powerpc/perf/power10-pmu.c
+++ b/arch/powerpc/perf/power10-pmu.c
@@ -23,10 +23,10 @@
  *
  *2824201612 8 4   
  0
  * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | -

[PATCH 4/4] powerpc/perf: MMCR0 control for PMU registers under PMCC=00

2020-11-10 Thread Athira Rajeev
PowerISA v3.1 introduces new control bit (PMCCEXT) for enabling
secure access to group B PMU registers in problem state when
MMCR0 PMCC=0b00. This patch adds support for MMCR0 PMCCEXT bit
in power10 by enabling this bit during boot and during the PMU
event enable/disable operations when MMCR0 PMCC=0b00

Signed-off-by: Athira Rajeev 
---
 arch/powerpc/include/asm/reg.h|  1 +
 arch/powerpc/kernel/cpu_setup_power.S |  2 ++
 arch/powerpc/kernel/dt_cpu_ftrs.c |  1 +
 arch/powerpc/perf/core-book3s.c   | 16 
 4 files changed, 20 insertions(+)

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index f877a57..cba9965 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -864,6 +864,7 @@
 #define   MMCR0_BHRBA  0x0020UL /* BHRB Access allowed in userspace */
 #define   MMCR0_EBE0x0010UL /* Event based branch enable */
 #define   MMCR0_PMCC   0x000cUL /* PMC control */
+#define   MMCR0_PMCCEXTASM_CONST(0x0200) /* PMCCEXT control */
 #define   MMCR0_PMCC_U60x0008UL /* PMC1-6 are R/W by user (PR) */
 #define   MMCR0_PMC1CE 0x8000UL /* PMC1 count enable*/
 #define   MMCR0_PMCjCE ASM_CONST(0x4000) /* PMCj count enable*/
diff --git a/arch/powerpc/kernel/cpu_setup_power.S 
b/arch/powerpc/kernel/cpu_setup_power.S
index 704e8b9..8fc8b72 100644
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S
@@ -249,4 +249,6 @@ __init_PMU_ISA31:
mtspr   SPRN_MMCR3,r5
LOAD_REG_IMMEDIATE(r5, MMCRA_BHRB_DISABLE)
mtspr   SPRN_MMCRA,r5
+   LOAD_REG_IMMEDIATE(r5, MMCR0_PMCCEXT)
+   mtspr   SPRN_MMCR0,r5
blr
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c 
b/arch/powerpc/kernel/dt_cpu_ftrs.c
index 1098863..9d07965 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -454,6 +454,7 @@ static void init_pmu_power10(void)
 
mtspr(SPRN_MMCR3, 0);
mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE);
+   mtspr(SPRN_MMCR0, MMCR0_PMCCEXT);
 }
 
 static int __init feat_enable_pmu_power10(struct dt_cpu_feature *f)
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 08643cb..f328bc0 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -95,6 +95,7 @@ struct cpu_hw_events {
 #define SPRN_SIER3 0
 #define MMCRA_SAMPLE_ENABLE0
 #define MMCRA_BHRB_DISABLE 0
+#define MMCR0_PMCCEXT  0
 
 static inline unsigned long perf_ip_adjust(struct pt_regs *regs)
 {
@@ -1242,6 +1243,9 @@ static void power_pmu_disable(struct pmu *pmu)
val |= MMCR0_FC;
val &= ~(MMCR0_EBE | MMCR0_BHRBA | MMCR0_PMCC | MMCR0_PMAO |
 MMCR0_FC56);
+   /* Set mmcr0 PMCCEXT for p10 */
+   if (ppmu->flags & PPMU_ARCH_31)
+   val |= MMCR0_PMCCEXT;
 
/*
 * The barrier is to make sure the mtspr has been
@@ -1449,6 +1453,18 @@ static void power_pmu_enable(struct pmu *pmu)
 
mmcr0 = ebb_switch_in(ebb, cpuhw);
 
+   /*
+* Set mmcr0 (PMCCEXT) for p10
+* if mmcr0 PMCC=0b00 to allow secure
+* mode of access to group B registers.
+*/
+   if (ppmu->flags & PPMU_ARCH_31) {
+   if (!(mmcr0 & MMCR0_PMCC)) {
+   cpuhw->mmcr.mmcr0 |= MMCR0_PMCCEXT;
+   mmcr0 |= MMCR0_PMCCEXT;
+   }
+   }
+
mb();
if (cpuhw->bhrb_users)
ppmu->config_bhrb(cpuhw->bhrb_filter);
-- 
1.8.3.1



[PATCH 0/4] powerpc/perf: Fixes for power10 PMU

2020-11-10 Thread Athira Rajeev
Patchset contains PMU fixes for power10.

This patchset contains 4 patches.
Patch1 includes fix to update event code with radix_scope_qual
bit in power10.
Patch2 updates the event group constraints for L2/L3 and threshold
events in power10.
Patch3 includes the event code changes for l2/l3 events and
some of the generic events.
Patch4 adds fixes for PMCCEXT bit in power10.

Athira Rajeev (4):
  powerpc/perf: Fix to update radix_scope_qual in power10
  powerpc/perf: Update the PMU group constraints for l2l3 and threshold
events in power10
  powerpc/perf: Fix to update l2l3 events and generic event codes for
power10
  powerpc/perf: MMCR0 control for PMU registers under PMCC=00

 arch/powerpc/include/asm/reg.h  |   1 +
 arch/powerpc/kernel/cpu_setup_power.S   |   2 +
 arch/powerpc/kernel/dt_cpu_ftrs.c   |   1 +
 arch/powerpc/perf/core-book3s.c |  16 +++
 arch/powerpc/perf/isa207-common.c   |  27 -
 arch/powerpc/perf/isa207-common.h   |  16 ++-
 arch/powerpc/perf/power10-events-list.h |   9 ++
 arch/powerpc/perf/power10-pmu.c | 177 ++--
 8 files changed, 236 insertions(+), 13 deletions(-)

-- 
1.8.3.1



[PATCH 4/4] powerpc/perf: Exclude kernel samples while counting events in user space.

2020-10-08 Thread Athira Rajeev
By setting exclude_kernel for user space profiling, we set the
freeze bits in Monitor Mode Control Register. Due to hardware
limitation, sometimes, Sampled Instruction Address register (SIAR)
captures kernel address even when counter freeze bits are set in
Monitor Mode Control Register (MMCR2). Patch adds a check to drop
these samples at such conditions.

Signed-off-by: Athira Rajeev 
---
 arch/powerpc/perf/core-book3s.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index c018004..10a2d1f 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -2143,6 +2143,18 @@ static void record_and_restart(struct perf_event *event, 
unsigned long val,
perf_event_update_userpage(event);
 
/*
+* Setting exclude_kernel will only freeze the
+* Performance Monitor counters and we may have
+* kernel address captured in SIAR. Hence drop
+* the kernel sample captured during user space
+* profiling. Setting `record` to zero will also
+* make sure event throlling is handled.
+*/
+   if (event->attr.exclude_kernel && record)
+   if (is_kernel_addr(mfspr(SPRN_SIAR)))
+   record = 0;
+
+   /*
 * Finally record data if requested.
 */
if (record) {
-- 
1.8.3.1



[PATCH 2/4] powerpc/perf: Using SIER[CMPL] instead of SIER[SIAR_VALID]

2020-10-08 Thread Athira Rajeev
On power10 DD1, there is an issue that causes the SIAR_VALID
bit of Sampled Instruction Event Register(SIER) not to be
set. But the SIAR_VALID bit is used for fetching the instruction
address from Sampled Instruction Address Register(SIAR), and
marked events are sampled only if the SIAR_VALID bit is set.
So add a condition check for power10 DD1 to use SIER[CMPL] bit
instead.

Signed-off-by: Athira Rajeev 
---
 arch/powerpc/perf/core-book3s.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 08643cb..d766090 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -350,7 +350,14 @@ static inline int siar_valid(struct pt_regs *regs)
int marked = mmcra & MMCRA_SAMPLE_ENABLE;
 
if (marked) {
-   if (ppmu->flags & PPMU_HAS_SIER)
+   /*
+* SIER[SIAR_VALID] is not set for some
+* marked events on power10 DD1, so use
+* SIER[CMPL] instead.
+*/
+   if (ppmu->flags & PPMU_P10_DD1)
+   return regs->dar & 0x1;
+   else if (ppmu->flags & PPMU_HAS_SIER)
return regs->dar & SIER_SIAR_VALID;
 
if (ppmu->flags & PPMU_SIAR_VALID)
-- 
1.8.3.1



[PATCH 3/4] powerpc/perf: Use the address from SIAR register to set cpumode flags

2020-10-08 Thread Athira Rajeev
While setting the processor mode for any sample, `perf_get_misc_flags`
expects the privilege level to differentiate the userspace and kernel
address. On power10 DD1, there is an issue that causes [MSR_HV MSR_PR] bits
of Sampled Instruction Event Register (SIER) not to be set for marked
events. Hence add a check to use the address in Sampled Instruction Address
Register (SIAR) to identify the privilege level.

Signed-off-by: Athira Rajeev 
---
 arch/powerpc/perf/core-book3s.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index d766090..c018004 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -250,11 +250,25 @@ static inline u32 perf_flags_from_msr(struct pt_regs 
*regs)
 static inline u32 perf_get_misc_flags(struct pt_regs *regs)
 {
bool use_siar = regs_use_siar(regs);
+   unsigned long mmcra = regs->dsisr;
+   int marked = mmcra & MMCRA_SAMPLE_ENABLE;
 
if (!use_siar)
return perf_flags_from_msr(regs);
 
/*
+* Check the address in SIAR to identify the
+* privilege levels since the SIER[MSR_HV, MSR_PR]
+* bits are not set for marked events in power10
+* DD1.
+*/
+   if (marked && (ppmu->flags & PPMU_P10_DD1)) {
+   if (is_kernel_addr(mfspr(SPRN_SIAR)))
+   return PERF_RECORD_MISC_KERNEL;
+   return PERF_RECORD_MISC_USER;
+   }
+
+   /*
 * If we don't have flags in MMCRA, rather than using
 * the MSR, we intuit the flags from the address in
 * SIAR which should give slightly more reliable
-- 
1.8.3.1



[PATCH 1/4] powerpc/perf: Add new power pmu flag "PPMU_P10_DD1" for power10 DD1

2020-10-08 Thread Athira Rajeev
Add a new power PMU flag "PPMU_P10_DD1" which can be
used to conditionally add any code path for power10 DD1 processor
version. Also modify power10 PMU driver code to set this
flag only for DD1, based on the Processor Version Register (PVR)
value.

Signed-off-by: Athira Rajeev 
---
 arch/powerpc/include/asm/perf_event_server.h | 1 +
 arch/powerpc/perf/power10-pmu.c  | 6 ++
 2 files changed, 7 insertions(+)

diff --git a/arch/powerpc/include/asm/perf_event_server.h 
b/arch/powerpc/include/asm/perf_event_server.h
index f6acabb..3b7baba 100644
--- a/arch/powerpc/include/asm/perf_event_server.h
+++ b/arch/powerpc/include/asm/perf_event_server.h
@@ -82,6 +82,7 @@ struct power_pmu {
 #define PPMU_ARCH_207S 0x0080 /* PMC is architecture v2.07S */
 #define PPMU_NO_SIAR   0x0100 /* Do not use SIAR */
 #define PPMU_ARCH_31   0x0200 /* Has MMCR3, SIER2 and SIER3 */
+#define PPMU_P10_DD1   0x0400 /* Is power10 DD1 processor version 
*/
 
 /*
  * Values for flags to get_alternatives()
diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c
index 8314865..47d930a 100644
--- a/arch/powerpc/perf/power10-pmu.c
+++ b/arch/powerpc/perf/power10-pmu.c
@@ -404,6 +404,7 @@ static void power10_config_bhrb(u64 pmu_bhrb_filter)
 
 int init_power10_pmu(void)
 {
+   unsigned int pvr;
int rc;
 
/* Comes from cpu_specs[] */
@@ -411,6 +412,11 @@ int init_power10_pmu(void)
strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power10"))
return -ENODEV;
 
+   pvr = mfspr(SPRN_PVR);
+   /* Add the ppmu flag for power10 DD1 */
+   if ((PVR_CFG(pvr) == 1))
+   power10_pmu.flags |= PPMU_P10_DD1;
+
/* Set the PERF_REG_EXTENDED_MASK here */
PERF_REG_EXTENDED_MASK = PERF_REG_PMU_MASK_31;
 
-- 
1.8.3.1



<    1   2   3   4   5   6   7   8   >