On Tue, Jul 10, 2018 at 09:58:04AM +0100, Suzuki K Poulose wrote: > Add support for 64bit event by using chained event counters > and 64bit cycle counters. > > PMUv3 allows chaining a pair of adjacent 32-bit counters, effectively > forming a 64-bit counter. The low/even counter is programmed to count > the event of interest, and the high/odd counter is programmed to count > the CHAIN event, taken when the low/even counter overflows. > > For CPU cycles, when 64bit mode is requested, the cycle counter > is used in 64bit mode. If the cycle counter is not available, > falls back to chaining. > > Cc: Mark Rutland <[email protected]> > Cc: Will Deacon <[email protected]> > Signed-off-by: Suzuki K Poulose <[email protected]>
Acked-by: Mark Rutland <[email protected]> Mark. > --- > Changes since v4: > - Drop comment in cpu_pm_pmu_setup > Changes since v3: > - Rename format name from "bits64 => long" > - Address other comments on style. > --- > arch/arm64/kernel/perf_event.c | 185 > +++++++++++++++++++++++++++++++++++------ > drivers/perf/arm_pmu.c | 9 +- > 2 files changed, 160 insertions(+), 34 deletions(-) > > diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c > index b414d81..dfff5ed 100644 > --- a/arch/arm64/kernel/perf_event.c > +++ b/arch/arm64/kernel/perf_event.c > @@ -446,9 +446,16 @@ static struct attribute_group > armv8_pmuv3_events_attr_group = { > }; > > PMU_FORMAT_ATTR(event, "config:0-15"); > +PMU_FORMAT_ATTR(long, "config1:0"); > + > +static inline bool armv8pmu_event_is_64bit(struct perf_event *event) > +{ > + return event->attr.config1 & 0x1; > +} > > static struct attribute *armv8_pmuv3_format_attrs[] = { > &format_attr_event.attr, > + &format_attr_long.attr, > NULL, > }; > > @@ -466,6 +473,21 @@ static struct attribute_group > armv8_pmuv3_format_attr_group = { > (ARMV8_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1) > > /* > + * We must chain two programmable counters for 64 bit events, > + * except when we have allocated the 64bit cycle counter (for CPU > + * cycles event). This must be called only when the event has > + * a counter allocated. > + */ > +static inline bool armv8pmu_event_is_chained(struct perf_event *event) > +{ > + int idx = event->hw.idx; > + > + return !WARN_ON(idx < 0) && > + armv8pmu_event_is_64bit(event) && > + (idx != ARMV8_IDX_CYCLE_COUNTER); > +} > + > +/* > * ARMv8 low level PMU access > */ > > @@ -516,12 +538,23 @@ static inline u32 armv8pmu_read_evcntr(int idx) > return read_sysreg(pmxevcntr_el0); > } > > +static inline u64 armv8pmu_read_hw_counter(struct perf_event *event) > +{ > + int idx = event->hw.idx; > + u64 val = 0; > + > + val = armv8pmu_read_evcntr(idx); > + if (armv8pmu_event_is_chained(event)) > + val = (val << 32) | armv8pmu_read_evcntr(idx - 1); > + return val; > +} > + > static inline u64 armv8pmu_read_counter(struct perf_event *event) > { > struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); > struct hw_perf_event *hwc = &event->hw; > int idx = hwc->idx; > - u32 value = 0; > + u64 value = 0; > > if (!armv8pmu_counter_valid(cpu_pmu, idx)) > pr_err("CPU%u reading wrong counter %d\n", > @@ -529,7 +562,7 @@ static inline u64 armv8pmu_read_counter(struct perf_event > *event) > else if (idx == ARMV8_IDX_CYCLE_COUNTER) > value = read_sysreg(pmccntr_el0); > else > - value = armv8pmu_read_evcntr(idx); > + value = armv8pmu_read_hw_counter(event); > > return value; > } > @@ -540,6 +573,19 @@ static inline void armv8pmu_write_evcntr(int idx, u32 > value) > write_sysreg(value, pmxevcntr_el0); > } > > +static inline void armv8pmu_write_hw_counter(struct perf_event *event, > + u64 value) > +{ > + int idx = event->hw.idx; > + > + if (armv8pmu_event_is_chained(event)) { > + armv8pmu_write_evcntr(idx, upper_32_bits(value)); > + armv8pmu_write_evcntr(idx - 1, lower_32_bits(value)); > + } else { > + armv8pmu_write_evcntr(idx, value); > + } > +} > + > static inline void armv8pmu_write_counter(struct perf_event *event, u64 > value) > { > struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); > @@ -551,14 +597,16 @@ static inline void armv8pmu_write_counter(struct > perf_event *event, u64 value) > smp_processor_id(), idx); > else if (idx == ARMV8_IDX_CYCLE_COUNTER) { > /* > - * Set the upper 32bits as this is a 64bit counter but we only > - * count using the lower 32bits and we want an interrupt when > - * it overflows. > + * The cycles counter is really a 64-bit counter. > + * When treating it as a 32-bit counter, we only count > + * the lower 32 bits, and set the upper 32-bits so that > + * we get an interrupt upon 32-bit overflow. > */ > - value |= 0xffffffff00000000ULL; > + if (!armv8pmu_event_is_64bit(event)) > + value |= 0xffffffff00000000ULL; > write_sysreg(value, pmccntr_el0); > } else > - armv8pmu_write_evcntr(idx, value); > + armv8pmu_write_hw_counter(event, value); > } > > static inline void armv8pmu_write_evtype(int idx, u32 val) > @@ -568,6 +616,27 @@ static inline void armv8pmu_write_evtype(int idx, u32 > val) > write_sysreg(val, pmxevtyper_el0); > } > > +static inline void armv8pmu_write_event_type(struct perf_event *event) > +{ > + struct hw_perf_event *hwc = &event->hw; > + int idx = hwc->idx; > + > + /* > + * For chained events, the low counter is programmed to count > + * the event of interest and the high counter is programmed > + * with CHAIN event code with filters set to count at all ELs. > + */ > + if (armv8pmu_event_is_chained(event)) { > + u32 chain_evt = ARMV8_PMUV3_PERFCTR_CHAIN | > + ARMV8_PMU_INCLUDE_EL2; > + > + armv8pmu_write_evtype(idx - 1, hwc->config_base); > + armv8pmu_write_evtype(idx, chain_evt); > + } else { > + armv8pmu_write_evtype(idx, hwc->config_base); > + } > +} > + > static inline int armv8pmu_enable_counter(int idx) > { > u32 counter = ARMV8_IDX_TO_COUNTER(idx); > @@ -575,6 +644,16 @@ static inline int armv8pmu_enable_counter(int idx) > return idx; > } > > +static inline void armv8pmu_enable_event_counter(struct perf_event *event) > +{ > + int idx = event->hw.idx; > + > + armv8pmu_enable_counter(idx); > + if (armv8pmu_event_is_chained(event)) > + armv8pmu_enable_counter(idx - 1); > + isb(); > +} > + > static inline int armv8pmu_disable_counter(int idx) > { > u32 counter = ARMV8_IDX_TO_COUNTER(idx); > @@ -582,6 +661,16 @@ static inline int armv8pmu_disable_counter(int idx) > return idx; > } > > +static inline void armv8pmu_disable_event_counter(struct perf_event *event) > +{ > + struct hw_perf_event *hwc = &event->hw; > + int idx = hwc->idx; > + > + if (armv8pmu_event_is_chained(event)) > + armv8pmu_disable_counter(idx - 1); > + armv8pmu_disable_counter(idx); > +} > + > static inline int armv8pmu_enable_intens(int idx) > { > u32 counter = ARMV8_IDX_TO_COUNTER(idx); > @@ -589,6 +678,11 @@ static inline int armv8pmu_enable_intens(int idx) > return idx; > } > > +static inline int armv8pmu_enable_event_irq(struct perf_event *event) > +{ > + return armv8pmu_enable_intens(event->hw.idx); > +} > + > static inline int armv8pmu_disable_intens(int idx) > { > u32 counter = ARMV8_IDX_TO_COUNTER(idx); > @@ -601,6 +695,11 @@ static inline int armv8pmu_disable_intens(int idx) > return idx; > } > > +static inline int armv8pmu_disable_event_irq(struct perf_event *event) > +{ > + return armv8pmu_disable_intens(event->hw.idx); > +} > + > static inline u32 armv8pmu_getreset_flags(void) > { > u32 value; > @@ -618,10 +717,8 @@ static inline u32 armv8pmu_getreset_flags(void) > static void armv8pmu_enable_event(struct perf_event *event) > { > unsigned long flags; > - struct hw_perf_event *hwc = &event->hw; > struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); > struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); > - int idx = hwc->idx; > > /* > * Enable counter and interrupt, and set the counter to count > @@ -632,22 +729,22 @@ static void armv8pmu_enable_event(struct perf_event > *event) > /* > * Disable counter > */ > - armv8pmu_disable_counter(idx); > + armv8pmu_disable_event_counter(event); > > /* > * Set event (if destined for PMNx counters). > */ > - armv8pmu_write_evtype(idx, hwc->config_base); > + armv8pmu_write_event_type(event); > > /* > * Enable interrupt for this counter > */ > - armv8pmu_enable_intens(idx); > + armv8pmu_enable_event_irq(event); > > /* > * Enable counter > */ > - armv8pmu_enable_counter(idx); > + armv8pmu_enable_event_counter(event); > > raw_spin_unlock_irqrestore(&events->pmu_lock, flags); > } > @@ -655,10 +752,8 @@ static void armv8pmu_enable_event(struct perf_event > *event) > static void armv8pmu_disable_event(struct perf_event *event) > { > unsigned long flags; > - struct hw_perf_event *hwc = &event->hw; > struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); > struct pmu_hw_events *events = this_cpu_ptr(cpu_pmu->hw_events); > - int idx = hwc->idx; > > /* > * Disable counter and interrupt > @@ -668,12 +763,12 @@ static void armv8pmu_disable_event(struct perf_event > *event) > /* > * Disable counter > */ > - armv8pmu_disable_counter(idx); > + armv8pmu_disable_event_counter(event); > > /* > * Disable interrupt for this counter > */ > - armv8pmu_disable_intens(idx); > + armv8pmu_disable_event_irq(event); > > raw_spin_unlock_irqrestore(&events->pmu_lock, flags); > } > @@ -767,10 +862,42 @@ static irqreturn_t armv8pmu_handle_irq(struct arm_pmu > *cpu_pmu) > return IRQ_HANDLED; > } > > +static int armv8pmu_get_single_idx(struct pmu_hw_events *cpuc, > + struct arm_pmu *cpu_pmu) > +{ > + int idx; > + > + for (idx = ARMV8_IDX_COUNTER0; idx < cpu_pmu->num_events; idx ++) { > + if (!test_and_set_bit(idx, cpuc->used_mask)) > + return idx; > + } > + return -EAGAIN; > +} > + > +static int armv8pmu_get_chain_idx(struct pmu_hw_events *cpuc, > + struct arm_pmu *cpu_pmu) > +{ > + int idx; > + > + /* > + * Chaining requires two consecutive event counters, where > + * the lower idx must be even. > + */ > + for (idx = ARMV8_IDX_COUNTER0 + 1; idx < cpu_pmu->num_events; idx += 2) > { > + if (!test_and_set_bit(idx, cpuc->used_mask)) { > + /* Check if the preceding even counter is available */ > + if (!test_and_set_bit(idx - 1, cpuc->used_mask)) > + return idx; > + /* Release the Odd counter */ > + clear_bit(idx, cpuc->used_mask); > + } > + } > + return -EAGAIN; > +} > + > static int armv8pmu_get_event_idx(struct pmu_hw_events *cpuc, > struct perf_event *event) > { > - int idx; > struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); > struct hw_perf_event *hwc = &event->hw; > unsigned long evtype = hwc->config_base & ARMV8_PMU_EVTYPE_EVENT; > @@ -784,19 +911,20 @@ static int armv8pmu_get_event_idx(struct pmu_hw_events > *cpuc, > /* > * Otherwise use events counters > */ > - for (idx = ARMV8_IDX_COUNTER0; idx < cpu_pmu->num_events; ++idx) { > - if (!test_and_set_bit(idx, cpuc->used_mask)) > - return idx; > - } > - > - /* The counters are all in use. */ > - return -EAGAIN; > + if (armv8pmu_event_is_64bit(event)) > + return armv8pmu_get_chain_idx(cpuc, cpu_pmu); > + else > + return armv8pmu_get_single_idx(cpuc, cpu_pmu); > } > > static void armv8pmu_clear_event_idx(struct pmu_hw_events *cpuc, > - struct perf_event *event) > + struct perf_event *event) > { > - clear_bit(event->hw.idx, cpuc->used_mask); > + int idx = event->hw.idx; > + > + clear_bit(idx, cpuc->used_mask); > + if (armv8pmu_event_is_chained(event)) > + clear_bit(idx - 1, cpuc->used_mask); > } > > /* > @@ -871,6 +999,9 @@ static int __armv8_pmuv3_map_event(struct perf_event > *event, > &armv8_pmuv3_perf_cache_map, > ARMV8_PMU_EVTYPE_EVENT); > > + if (armv8pmu_event_is_64bit(event)) > + event->hw.flags |= ARMPMU_EVT_64BIT; > + > /* Onl expose micro/arch events supported by this PMU */ > if ((hw_event_id > 0) && (hw_event_id < ARMV8_PMUV3_MAX_COMMON_EVENTS) > && test_bit(hw_event_id, armpmu->pmceid_bitmap)) { > diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c > index a288810..7f01f6f 100644 > --- a/drivers/perf/arm_pmu.c > +++ b/drivers/perf/arm_pmu.c > @@ -665,14 +665,9 @@ static void cpu_pm_pmu_setup(struct arm_pmu *armpmu, > unsigned long cmd) > int idx; > > for (idx = 0; idx < armpmu->num_events; idx++) { > - /* > - * If the counter is not used skip it, there is no > - * need of stopping/restarting it. > - */ > - if (!test_bit(idx, hw_events->used_mask)) > - continue; > - > event = hw_events->events[idx]; > + if (!event) > + continue; > > switch (cmd) { > case CPU_PM_ENTER: > -- > 2.7.4 >

