On Tue, Feb 05, 2019 at 02:33:03PM +0000, Julien Thierry wrote:
> Hi Andrew,
>
> On 04/02/2019 16:53, Andrew Murray wrote:
> > Emulate chained PMU counters by creating a single 64 bit event counter
> > for a pair of chained KVM counters.
> >
> > Signed-off-by: Andrew Murray <[email protected]>
> > ---
> > include/kvm/arm_pmu.h | 1 +
> > virt/kvm/arm/pmu.c | 321
> > +++++++++++++++++++++++++++++++++++++++++---------
> > 2 files changed, 269 insertions(+), 53 deletions(-)
> >
> > diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
> > index b73f31b..8e691ee 100644
> > --- a/include/kvm/arm_pmu.h
> > +++ b/include/kvm/arm_pmu.h
> > @@ -29,6 +29,7 @@ struct kvm_pmc {
> > u8 idx; /* index into the pmu->pmc array */
> > struct perf_event *perf_event;
> > u64 bitmask;
> > + u64 overflow_count;
> > };
> >
> > struct kvm_pmu {
> > diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
> > index a64aeb2..9318130 100644
> > --- a/virt/kvm/arm/pmu.c
> > +++ b/virt/kvm/arm/pmu.c
> > @@ -24,9 +24,25 @@
> > #include <kvm/arm_pmu.h>
> > #include <kvm/arm_vgic.h>
> >
> > +#define ARMV8_PMUV3_PERFCTR_CHAIN 0x1E
>
> I find it a bit awkward to have this redefined here.
>
> Maybe we could define a helper in kvm_host.h:
> bool kvm_pmu_typer_is_chain(u64 typer);
>
> That would always return false for arm32?
>
> > +static void kvm_pmu_stop_release_perf_event_pair(struct kvm_vcpu *vcpu,
> > + u64 pair_low);
> > +static void kvm_pmu_stop_release_perf_event(struct kvm_vcpu *vcpu,
> > + u64 select_idx);
> > +static void kvm_pmu_sync_counter_enable_pair(struct kvm_vcpu *vcpu, u64
> > pair_low);
> > static void kvm_pmu_sync_counter_enable(struct kvm_vcpu *vcpu, u64
> > select_idx);
> > static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64
> > select_idx);
> > -static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc
> > *pmc);
> > +
> > +/**
> > + * kvm_pmu_counter_is_high_word - is select_idx high counter of 64bit event
> > + * @pmc: The PMU counter pointer
> > + * @select_idx: The counter index
> > + */
> > +static inline bool kvm_pmu_counter_is_high_word(struct kvm_pmc *pmc)
> > +{
> > + return ((pmc->perf_event->attr.config1 & 0x1)
> > + && (pmc->idx % 2));
> > +}
> >
> > /**
> > * kvm_pmu_get_counter_value - get PMU counter value
> > @@ -35,22 +51,70 @@ static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu,
> > struct kvm_pmc *pmc);
> > */
> > u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx)
> > {
> > - u64 counter, reg, enabled, running;
> > + u64 counter_idx, reg, enabled, running, incr;
> > struct kvm_pmu *pmu = &vcpu->arch.pmu;
> > struct kvm_pmc *pmc = &pmu->pmc[select_idx];
> >
> > reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
> > ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx;
> > - counter = __vcpu_sys_reg(vcpu, reg);
> > + counter_idx = __vcpu_sys_reg(vcpu, reg);
>
> I'm not sure I understand the "_idx" suffix for this variable. This
> holds a counter value, not and index. Right?
Yes it holds a counter value. The reason for the '_idx' suffix was to indicate
that this holds the counter value as selected by 'select_idx'. Also in the
case of a chained counter, this value is only the high or low part and so I
reserve 'counter' for representing the entire chained counter value.
I'll change this such that 'counter_idx' becomes 'counter' again. And 'counter'
(introduced by this patch) becomes 'counter_full' - does that make it clearer?
>
>
> >
> > /* The real counter value is equal to the value of counter register plus
> > * the value perf event counts.
> > */
> > - if (pmc->perf_event)
> > - counter += perf_event_read_value(pmc->perf_event, &enabled,
> > + if (pmc->perf_event) {
> > + incr = perf_event_read_value(pmc->perf_event, &enabled,
> > &running);
> >
> > - return counter & pmc->bitmask;
> > + if (kvm_pmu_counter_is_high_word(pmc)) {
> > + u64 counter_low, counter;
> > +
> > + reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
> > + ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx - 1;
> > + counter_low = __vcpu_sys_reg(vcpu, reg);
> > + counter = lower_32_bits(counter_low) | (counter_idx <<
> > 32);
> > + counter_idx = upper_32_bits(counter + incr);
> > + } else {
> > + counter_idx += incr;
> > + }
> > + }
> > +
> > + return counter_idx & pmc->bitmask;
> > +}
> > +
> > +/**
> > + * kvm_pmu_counter_is_enabled - is a counter active
> > + * @vcpu: The vcpu pointer
> > + * @select_idx: The counter index
> > + */
> > +static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64
> > select_idx)
> > +{
> > + u64 mask = kvm_pmu_valid_counter_mask(vcpu);
> > +
> > + return (__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) &&
> > + (__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask & BIT(select_idx));
> > +}
> > +
> > +/**
> > + * kvnm_pmu_event_is_chained - is a pair of counters chained and enabled
> > + * @vcpu: The vcpu pointer
> > + * @select_idx: The low counter index
> > + */
> > +static bool kvm_pmu_event_is_chained(struct kvm_vcpu *vcpu, u64 pair_low)
> > +{
> > + u64 eventsel, reg;
> > +
> > + reg = (pair_low + 1 == ARMV8_PMU_CYCLE_IDX)
> > + ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + pair_low + 1;
> > + eventsel = __vcpu_sys_reg(vcpu, reg) & ARMV8_PMU_EVTYPE_EVENT;
> > + if (eventsel != ARMV8_PMUV3_PERFCTR_CHAIN)
> > + return false;
> > +
> > + if (kvm_pmu_counter_is_enabled(vcpu, pair_low) !=
> > + kvm_pmu_counter_is_enabled(vcpu, pair_low + 1))
> > + return false;
> > +
> > + return true;
> > }
> >
> > /**
> > @@ -61,29 +125,45 @@ u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu,
> > u64 select_idx)
> > */
> > void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64
> > val)
> > {
> > - u64 reg;
> > - struct kvm_pmu *pmu = &vcpu->arch.pmu;
> > - struct kvm_pmc *pmc = &pmu->pmc[select_idx];
> > + u64 reg, pair_low;
> >
> > reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
> > ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx;
> > __vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu,
> > select_idx);
> >
> > - kvm_pmu_stop_counter(vcpu, pmc);
> > - kvm_pmu_sync_counter_enable(vcpu, select_idx);
> > + pair_low = (select_idx % 2) ? select_idx - 1 : select_idx;
> > +
> > + /* Recreate the perf event to reflect the updated sample_period */
> > + if (kvm_pmu_event_is_chained(vcpu, pair_low)) {
> > + kvm_pmu_stop_release_perf_event_pair(vcpu, pair_low);
> > + kvm_pmu_sync_counter_enable_pair(vcpu, pair_low);
> > + } else {
> > + kvm_pmu_stop_release_perf_event(vcpu, select_idx);
> > + kvm_pmu_sync_counter_enable(vcpu, select_idx);
> > + }
> > }
> >
> > /**
> > * kvm_pmu_release_perf_event - remove the perf event
> > * @pmc: The PMU counter pointer
> > */
> > -static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc)
> > +static void kvm_pmu_release_perf_event(struct kvm_vcpu *vcpu,
> > + struct kvm_pmc *pmc)
> > {
> > - if (pmc->perf_event) {
> > - perf_event_disable(pmc->perf_event);
> > + struct kvm_pmu *pmu = &vcpu->arch.pmu;
> > + struct kvm_pmc *pmc_alt;
> > + u64 pair_alt;
> > +
> > + pair_alt = (pmc->idx % 2) ? pmc->idx - 1 : pmc->idx + 1;
> > + pmc_alt = &pmu->pmc[pair_alt];
> > +
> > + if (pmc->perf_event)
> > perf_event_release_kernel(pmc->perf_event);
> > - pmc->perf_event = NULL;
> > - }
> > +
> > + if (pmc->perf_event == pmc_alt->perf_event)
> > + pmc_alt->perf_event = NULL;
> > +
> > + pmc->perf_event = NULL;
> > }
> >
> > /**
> > @@ -91,22 +171,65 @@ static void kvm_pmu_release_perf_event(struct kvm_pmc
> > *pmc)
> > * @vcpu: The vcpu pointer
> > * @pmc: The PMU counter pointer
> > *
> > - * If this counter has been configured to monitor some event, release it
> > here.
> > + * If this counter has been configured to monitor some event, stop it here.
> > */
> > static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc
> > *pmc)
> > {
> > u64 counter, reg;
> >
> > if (pmc->perf_event) {
> > + perf_event_disable(pmc->perf_event);
> > counter = kvm_pmu_get_counter_value(vcpu, pmc->idx);
> > reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
> > ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx;
> > __vcpu_sys_reg(vcpu, reg) = counter;
> > - kvm_pmu_release_perf_event(pmc);
> > }
> > }
> >
> > /**
> > + * kvm_pmu_stop_release_perf_event_pair - stop and release a pair of
> > counters
> > + * @vcpu: The vcpu pointer
> > + * @pmc_low: The PMU counter pointer for lower word
> > + * @pmc_high: The PMU counter pointer for higher word
> > + *
> > + * As chained counters share the underlying perf event, we stop them
> > + * both first before discarding the underlying perf event
> > + */
> > +static void kvm_pmu_stop_release_perf_event_pair(struct kvm_vcpu *vcpu,
> > + u64 idx_low)
> > +{
> > + struct kvm_pmu *pmu = &vcpu->arch.pmu;
> > + struct kvm_pmc *pmc_low = &pmu->pmc[idx_low];
> > + struct kvm_pmc *pmc_high = &pmu->pmc[idx_low + 1];
> > +
> > + /* Stopping a counter involves adding the perf event value to the
> > + * vcpu sys register value prior to releasing the perf event. As
> > + * kvm_pmu_get_counter_value may depend on the low counter value we
> > + * must always stop the high counter first
> > + */
> > + kvm_pmu_stop_counter(vcpu, pmc_high);
> > + kvm_pmu_stop_counter(vcpu, pmc_low);
> > +
> > + kvm_pmu_release_perf_event(vcpu, pmc_high);
> > + kvm_pmu_release_perf_event(vcpu, pmc_low);
> > +}
> > +
> > +/**
> > + * kvm_pmu_stop_release_perf_event - stop and release a counter
> > + * @vcpu: The vcpu pointer
> > + * @select_idx: The counter index
> > + */
> > +static void kvm_pmu_stop_release_perf_event(struct kvm_vcpu *vcpu,
> > + u64 select_idx)
> > +{
> > + struct kvm_pmu *pmu = &vcpu->arch.pmu;
> > + struct kvm_pmc *pmc = &pmu->pmc[select_idx];
> > +
> > + kvm_pmu_stop_counter(vcpu, pmc);
> > + kvm_pmu_release_perf_event(vcpu, pmc);
> > +}
> > +
> > +/**
> > * kvm_pmu_vcpu_reset - reset pmu state for cpu
> > * @vcpu: The vcpu pointer
> > *
> > @@ -117,7 +240,7 @@ void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu)
> > struct kvm_pmu *pmu = &vcpu->arch.pmu;
> >
> > for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
> > - kvm_pmu_stop_counter(vcpu, &pmu->pmc[i]);
> > + kvm_pmu_stop_release_perf_event(vcpu, i);
> > pmu->pmc[i].idx = i;
> > pmu->pmc[i].bitmask = 0xffffffffUL;
> > }
> > @@ -134,7 +257,7 @@ void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu)
> > struct kvm_pmu *pmu = &vcpu->arch.pmu;
> >
> > for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++)
> > - kvm_pmu_release_perf_event(&pmu->pmc[i]);
> > + kvm_pmu_release_perf_event(vcpu, &pmu->pmc[i]);
> > }
> >
> > u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu)
> > @@ -167,53 +290,115 @@ static void kvm_pmu_enable_counter(struct kvm_vcpu
> > *vcpu, u64 select_idx)
> > }
> >
> > /**
> > + * kvm_pmu_sync_counter_enable - reenable a counter if it should be enabled
> > + * @vcpu: The vcpu pointer
> > + * @select_idx: The counter index
> > + */
> > +static void kvm_pmu_sync_counter_enable(struct kvm_vcpu *vcpu,
> > + u64 select_idx)
> > +{
> > + kvm_pmu_enable_counter_mask(vcpu, BIT(select_idx));
> > +}
> > +
> > +/**
> > + * kvm_pmu_sync_counter_enable_pair - reenable a pair if they should be
> > enabled
> > + * @vcpu: The vcpu pointer
> > + * @pair_low: The low counter index
> > + */
> > +static void kvm_pmu_sync_counter_enable_pair(struct kvm_vcpu *vcpu, u64
> > pair_low)
> > +{
> > + kvm_pmu_enable_counter_mask(vcpu, BIT(pair_low) | BIT(pair_low + 1));
> > +}
> > +
> > +/**
> > + * kvm_pmu_enable_counter_pair - enable counters pair at a time
> > + * @vcpu: The vcpu pointer
> > + * @val: counters to enable
> > + * @pair_low: The low counter index
> > + */
> > +static void kvm_pmu_enable_counter_pair(struct kvm_vcpu *vcpu, u64 val,
> > + u64 pair_low)
> > +{
> > + struct kvm_pmu *pmu = &vcpu->arch.pmu;
> > + struct kvm_pmc *pmc_low = &pmu->pmc[pair_low];
> > + struct kvm_pmc *pmc_high = &pmu->pmc[pair_low + 1];
> > +
> > + if (kvm_pmu_event_is_chained(vcpu, pair_low)) {
> > + if (pmc_low->perf_event != pmc_high->perf_event)
> > + kvm_pmu_stop_release_perf_event_pair(vcpu, pair_low);
> > + }
> > +
> > + if (val & BIT(pair_low))
> > + kvm_pmu_enable_counter(vcpu, pair_low);
> > +
> > + if (val & BIT(pair_low+1))
>
> Style nit: I think there should be spaces around the '+', might be worth
> running checkpatch to check for other style stuff.
Thanks - for some reason checkpatch doesn't pick this one up.
Andrew Murray
>
> > + kvm_pmu_enable_counter(vcpu, pair_low + 1);
> > +}
> > +
> > +/**
> > * kvm_pmu_enable_counter_mask - enable selected PMU counters
> > * @vcpu: The vcpu pointer
> > - * @val: the value guest writes to PMCNTENSET register
> > + * @val: the value guest writes to PMCNTENSET register or a subset
> > *
> > * Call perf_event_enable to start counting the perf event
> > */
> > void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
> > {
> > int i;
> > + u64 mask = kvm_pmu_valid_counter_mask(vcpu);
> > + u64 set = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask;
> >
> > if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val)
> > return;
> >
> > - for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
> > - if (!(val & BIT(i)))
> > - continue;
> > -
> > - kvm_pmu_enable_counter(vcpu, i);
> > - }
> > + for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i += 2)
> > + kvm_pmu_enable_counter_pair(vcpu, val & set, i);
> > }
> >
> > /**
> > - * kvm_pmu_sync_counter_enable - reenable a counter if it should be enabled
> > + * kvm_pmu_disable_counter - disable selected PMU counter
> > * @vcpu: The vcpu pointer
> > * @select_idx: The counter index
> > */
> > -static void kvm_pmu_sync_counter_enable(struct kvm_vcpu *vcpu,
> > - u64 select_idx)
> > +static void kvm_pmu_disable_counter(struct kvm_vcpu *vcpu, u64 select_idx)
> > {
> > - u64 set = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
> > + struct kvm_pmu *pmu = &vcpu->arch.pmu;
> > + struct kvm_pmc *pmc = &pmu->pmc[select_idx];
> >
> > - if (set & BIT(select_idx))
> > - kvm_pmu_enable_counter_mask(vcpu, BIT(select_idx));
> > + if (pmc->perf_event) {
> > + perf_event_disable(pmc->perf_event);
> > + if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE)
> > + kvm_debug("fail to enable perf event\n");
> > + }
> > }
> >
> > /**
> > - * kvm_pmu_disable_counter - disable selected PMU counter
> > - * @vcpu: The vcpu pointer
> > - * @pmc: The counter to disable
> > + * kvm_pmu_disable_counter_pair - disable counters pair at a time
> > + * @val: counters to disable
> > + * @pair_low: The low counter index
> > */
> > -static void kvm_pmu_disable_counter(struct kvm_vcpu *vcpu, u64 select_idx)
> > +static void kvm_pmu_disable_counter_pair(struct kvm_vcpu *vcpu, u64 val,
> > + u64 pair_low)
> > {
> > struct kvm_pmu *pmu = &vcpu->arch.pmu;
> > - struct kvm_pmc *pmc = &pmu->pmc[select_idx];
> > + struct kvm_pmc *pmc_low = &pmu->pmc[pair_low];
> > + struct kvm_pmc *pmc_high = &pmu->pmc[pair_low + 1];
> > +
> > + if (!kvm_pmu_event_is_chained(vcpu, pair_low)) {
> > + if (pmc_low->perf_event == pmc_high->perf_event) {
> > + if (pmc_low->perf_event) {
> > + kvm_pmu_stop_release_perf_event_pair(vcpu,
> > + pair_low);
> > + kvm_pmu_sync_counter_enable_pair(vcpu,
> > pair_low);
> > + }
> > + }
> > + }
> >
> > - if (pmc->perf_event)
> > - perf_event_disable(pmc->perf_event);
> > + if (val & BIT(pair_low))
> > + kvm_pmu_disable_counter(vcpu, pair_low);
> > +
> > + if (val & BIT(pair_low + 1))
> > + kvm_pmu_disable_counter(vcpu, pair_low + 1);
> > }
> >
> > /**
> > @@ -230,12 +415,8 @@ void kvm_pmu_disable_counter_mask(struct kvm_vcpu
> > *vcpu, u64 val)
> > if (!val)
> > return;
> >
> > - for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
> > - if (!(val & BIT(i)))
> > - continue;
> > -
> > - kvm_pmu_disable_counter(vcpu, i);
> > - }
> > + for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i += 2)
> > + kvm_pmu_disable_counter_pair(vcpu, val, i);
> > }
> >
> > static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu)
> > @@ -346,6 +527,17 @@ static void kvm_pmu_perf_overflow(struct perf_event
> > *perf_event,
> >
> > __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx);
> >
> > + if (kvm_pmu_event_is_chained(vcpu, idx)) {
> > + struct kvm_pmu *pmu = &vcpu->arch.pmu;
> > + struct kvm_pmc *pmc_high = &pmu->pmc[idx + 1];
> > +
> > + if (!(--pmc_high->overflow_count)) {
> > + __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx + 1);
> > + pmc_high->overflow_count = U32_MAX + 1UL;
> > + }
> > +
> > + }
> > +
> > if (kvm_pmu_overflow_status(vcpu)) {
> > kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
> > kvm_vcpu_kick(vcpu);
> > @@ -440,6 +632,10 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu
> > *vcpu, u64 select_idx)
> > select_idx != ARMV8_PMU_CYCLE_IDX)
> > return;
> >
> > + /* Handled by even event */
> > + if (eventsel == ARMV8_PMUV3_PERFCTR_CHAIN)
> > + return;
> > +
> > memset(&attr, 0, sizeof(struct perf_event_attr));
> > attr.type = PERF_TYPE_RAW;
> > attr.size = sizeof(attr);
> > @@ -452,6 +648,9 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu
> > *vcpu, u64 select_idx)
> > attr.config = (select_idx == ARMV8_PMU_CYCLE_IDX) ?
> > ARMV8_PMUV3_PERFCTR_CPU_CYCLES : eventsel;
> >
> > + if (kvm_pmu_event_is_chained(vcpu, select_idx))
> > + attr.config1 |= 0x1;
>
> I'm not very familiar with the usage of perf attributes configs, but is
> there any chance we could name this flag? Even if only for the local
> file? Something like PERF_ATTR_CFG1_KVM_PMU_CHAINED (unless there is an
> existing naming convention for event attributes).
>
> Thanks,
>
> --
> Julien Thierry
_______________________________________________
kvmarm mailing list
[email protected]
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm