Re: [PATCH v3 02/14] KVM: arm64: PMU: Align chained counter implementation with architecture pseudocode

2022-11-12 Thread Reiji Watanabe
Hi Marc,

> > > +#define PERF_ATTR_CFG1_COUNTER_64BIT   BIT(0)
> >
> > Although this isn't the new code (but just a name change),
> > wouldn't it be nicer to have armv8pmu_event_is_64bit()
> > (in arch/arm64/kernel/perf_event.c) use the macro as well ?
>
> We tried that in the past, and the amount of churn wasn't really worth
> it. I'm happy to revisit this in the future, but probably as a
> separate patch.

I see... Thank you for the clarification.  As this isn't new,
I agree with that (not addressing it in this series).

> > > @@ -340,11 +245,8 @@ void kvm_pmu_enable_counter_mask(struct kvm_vcpu 
> > > *vcpu, u64 val)
> > >
> > > pmc = >pmc[i];
> > >
> > > -   /* A change in the enable state may affect the chain 
> > > state */
> > > -   kvm_pmu_update_pmc_chained(vcpu, i);
> > > kvm_pmu_create_perf_event(vcpu, i);
> > >
> > > -   /* At this point, pmc must be the canonical */
> > > if (pmc->perf_event) {
> > > perf_event_enable(pmc->perf_event);
> > > if (pmc->perf_event->state != 
> > > PERF_EVENT_STATE_ACTIVE)
> > > @@ -375,11 +277,8 @@ void kvm_pmu_disable_counter_mask(struct kvm_vcpu 
> > > *vcpu, u64 val)
> > >
> > > pmc = >pmc[i];
> > >
> > > -   /* A change in the enable state may affect the chain 
> > > state */
> > > -   kvm_pmu_update_pmc_chained(vcpu, i);
> > > kvm_pmu_create_perf_event(vcpu, i);
> >
> > Do we still need to call kvm_pmu_update_pmc_chained() here even
> > with this patch ? (I would think the reason why the function was
> > called here was because the chain state change could affect the
> > backed perf event attribute before).
> > I have the same comment for kvm_pmu_enable_counter_mask().
>
> Do you mean kvm_pmu_create_perf_event() instead? I think we can drop
> the one on disable. But the one on enable is required, as we need to
> be able to start counting an event even if the guest hasn't programmed
> the event number (unlikely, but allowed by the architecture). It can
> be made conditional though.

I'm so sorry for the confusion...
Yes, kvm_pmu_create_perf_event() is what I meant.
Thank you for the explanation for the one enabled case.
Now, I understand.

>
> I have the following fix queued:

Looks good!

Thank you,
Reiji

>
> diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
> index 26293f842b0f..b7a5f75d008d 100644
> --- a/arch/arm64/kvm/pmu-emul.c
> +++ b/arch/arm64/kvm/pmu-emul.c
> @@ -277,9 +277,9 @@ void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, 
> u64 val)
>
> pmc = >pmc[i];
>
> -   kvm_pmu_create_perf_event(vcpu, i);
> -
> -   if (pmc->perf_event) {
> +   if (!pmc->perf_event) {
> +   kvm_pmu_create_perf_event(vcpu, i);
> +   } else {
> perf_event_enable(pmc->perf_event);
> if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE)
> kvm_debug("fail to enable perf event\n");
> @@ -309,8 +309,6 @@ void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, 
> u64 val)
>
> pmc = >pmc[i];
>
> -   kvm_pmu_create_perf_event(vcpu, i);
> -
> if (pmc->perf_event)
> perf_event_disable(pmc->perf_event);
> }
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v3 02/14] KVM: arm64: PMU: Align chained counter implementation with architecture pseudocode

2022-11-12 Thread Marc Zyngier
Hi Reiji,

On Sat, 12 Nov 2022 07:55:38 +,
Reiji Watanabe  wrote:
> 
> Hi Marc,
> 
> On Mon, Nov 7, 2022 at 12:54 AM Marc Zyngier  wrote:
> >
> > Ricardo recently pointed out that the PMU chained counter emulation
> > in KVM wasn't quite behaving like the one on actual hardware, in
> > the sense that a chained counter would expose an overflow on
> > both halves of a chained counter, while KVM would only expose the
> > overflow on the top half.
> >
> > The difference is subtle, but significant. What does the architecture
> > say (DDI0087 H.a):
> >
> > - Up to PMUv3p4, all counters but the cycle counter are 32bit
> >
> > - A 32bit counter that overflows generates a CHAIN event on the
> >   adjacent counter after exposing its own overflow status
> >
> > - The CHAIN event is accounted if the counter is correctly
> >   configured (CHAIN event selected and counter enabled)
> >
> > This all means that our current implementation (which uses 64bit
> > perf events) prevents us from emulating this overflow on the lower half.
> >
> > How to fix this? By implementing the above, to the letter.
> >
> > This largly results in code deletion, removing the notions of
> > "counter pair", "chained counters", and "canonical counter".
> > The code is further restructured to make the CHAIN handling similar
> > to SWINC, as the two are now extremely similar in behaviour.
> >
> > Reported-by: Ricardo Koller 
> > Signed-off-by: Marc Zyngier 
> > ---
> >  arch/arm64/kvm/pmu-emul.c | 312 ++
> >  include/kvm/arm_pmu.h |   2 -
> >  2 files changed, 83 insertions(+), 231 deletions(-)
> >
> > diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
> > index 0003c7d37533..a38b3127f649 100644
> > --- a/arch/arm64/kvm/pmu-emul.c
> > +++ b/arch/arm64/kvm/pmu-emul.c
> > @@ -15,16 +15,14 @@
> >  #include 
> >  #include 
> >
> > +#define PERF_ATTR_CFG1_COUNTER_64BIT   BIT(0)
> 
> Although this isn't the new code (but just a name change),
> wouldn't it be nicer to have armv8pmu_event_is_64bit()
> (in arch/arm64/kernel/perf_event.c) use the macro as well ?

We tried that in the past, and the amount of churn wasn't really worth
it. I'm happy to revisit this in the future, but probably as a
separate patch.

[...]

> > @@ -163,29 +97,7 @@ static u64 kvm_pmu_get_pair_counter_value(struct 
> > kvm_vcpu *vcpu,
> > counter += perf_event_read_value(pmc->perf_event, ,
> >  );
> >
> > -   return counter;
> > -}
> > -
> > -/**
> > - * kvm_pmu_get_counter_value - get PMU counter value
> > - * @vcpu: The vcpu pointer
> > - * @select_idx: The counter index
> > - */
> > -u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx)
> > -{
> > -   u64 counter;
> > -   struct kvm_pmu *pmu = >arch.pmu;
> > -   struct kvm_pmc *pmc = >pmc[select_idx];
> > -
> > -   if (!kvm_vcpu_has_pmu(vcpu))
> > -   return 0;
> > -
> > -   counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
> > -
> > -   if (kvm_pmu_pmc_is_chained(pmc) &&
> > -   kvm_pmu_idx_is_high_counter(select_idx))
> > -   counter = upper_32_bits(counter);
> > -   else if (select_idx != ARMV8_PMU_CYCLE_IDX)
> > +   if (select_idx != ARMV8_PMU_CYCLE_IDX)
> 
> Nit:Using 'pmc->idx' instead of 'select_idx' appears to be more consistent.

Well, this is the exact opposite of Oliver's comment last time. I
initially used pmc->idx, but it made the diff somehow larger and also
more difficult to understand what changed.

In the end, I'd rather rework the whole file to consistently use
vcpu+idx or pmc, as the mixed use of both is annoying. And that's
probably a cleanup patch for later.

[...]

> > @@ -340,11 +245,8 @@ void kvm_pmu_enable_counter_mask(struct kvm_vcpu 
> > *vcpu, u64 val)
> >
> > pmc = >pmc[i];
> >
> > -   /* A change in the enable state may affect the chain state 
> > */
> > -   kvm_pmu_update_pmc_chained(vcpu, i);
> > kvm_pmu_create_perf_event(vcpu, i);
> >
> > -   /* At this point, pmc must be the canonical */
> > if (pmc->perf_event) {
> > perf_event_enable(pmc->perf_event);
> > if (pmc->perf_event->state != 
> > PERF_EVENT_STATE_ACTIVE)
> > @@ -375,11 +277,8 @@ void kvm_pmu_disable_counter_mask(struct kvm_vcpu 
> > *vcpu, u64 val)
> >
> > pmc = >pmc[i];
> >
> > -   /* A change in the enable state may affect the chain state 
> > */
> > -   kvm_pmu_update_pmc_chained(vcpu, i);
> > kvm_pmu_create_perf_event(vcpu, i);
> 
> Do we still need to call kvm_pmu_update_pmc_chained() here even
> with this patch ? (I would think the reason why the function was
> called here was because the chain state change could affect the
> backed perf event attribute before).
> I have the same comment for kvm_pmu_enable_counter_mask().

Do you mean 

Re: [PATCH v3 02/14] KVM: arm64: PMU: Align chained counter implementation with architecture pseudocode

2022-11-11 Thread Reiji Watanabe
Hi Marc,

On Mon, Nov 7, 2022 at 12:54 AM Marc Zyngier  wrote:
>
> Ricardo recently pointed out that the PMU chained counter emulation
> in KVM wasn't quite behaving like the one on actual hardware, in
> the sense that a chained counter would expose an overflow on
> both halves of a chained counter, while KVM would only expose the
> overflow on the top half.
>
> The difference is subtle, but significant. What does the architecture
> say (DDI0087 H.a):
>
> - Up to PMUv3p4, all counters but the cycle counter are 32bit
>
> - A 32bit counter that overflows generates a CHAIN event on the
>   adjacent counter after exposing its own overflow status
>
> - The CHAIN event is accounted if the counter is correctly
>   configured (CHAIN event selected and counter enabled)
>
> This all means that our current implementation (which uses 64bit
> perf events) prevents us from emulating this overflow on the lower half.
>
> How to fix this? By implementing the above, to the letter.
>
> This largly results in code deletion, removing the notions of
> "counter pair", "chained counters", and "canonical counter".
> The code is further restructured to make the CHAIN handling similar
> to SWINC, as the two are now extremely similar in behaviour.
>
> Reported-by: Ricardo Koller 
> Signed-off-by: Marc Zyngier 
> ---
>  arch/arm64/kvm/pmu-emul.c | 312 ++
>  include/kvm/arm_pmu.h |   2 -
>  2 files changed, 83 insertions(+), 231 deletions(-)
>
> diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
> index 0003c7d37533..a38b3127f649 100644
> --- a/arch/arm64/kvm/pmu-emul.c
> +++ b/arch/arm64/kvm/pmu-emul.c
> @@ -15,16 +15,14 @@
>  #include 
>  #include 
>
> +#define PERF_ATTR_CFG1_COUNTER_64BIT   BIT(0)

Although this isn't the new code (but just a name change),
wouldn't it be nicer to have armv8pmu_event_is_64bit()
(in arch/arm64/kernel/perf_event.c) use the macro as well ?

> +
>  DEFINE_STATIC_KEY_FALSE(kvm_arm_pmu_available);
>
>  static LIST_HEAD(arm_pmus);
>  static DEFINE_MUTEX(arm_pmus_lock);
>
>  static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx);
> -static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 
> select_idx);
> -static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc);
> -
> -#define PERF_ATTR_CFG1_KVM_PMU_CHAINED 0x1
>
>  static u32 kvm_pmu_event_mask(struct kvm *kvm)
>  {
> @@ -57,6 +55,11 @@ static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, 
> u64 select_idx)
> __vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_LC);
>  }
>
> +static bool kvm_pmu_counter_can_chain(struct kvm_vcpu *vcpu, u64 idx)
> +{
> +   return (!(idx & 1) && (idx + 1) < ARMV8_PMU_CYCLE_IDX);
> +}
> +
>  static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc)
>  {
> struct kvm_pmu *pmu;
> @@ -69,91 +72,22 @@ static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc 
> *pmc)
>  }
>
>  /**
> - * kvm_pmu_pmc_is_chained - determine if the pmc is chained
> - * @pmc: The PMU counter pointer
> - */
> -static bool kvm_pmu_pmc_is_chained(struct kvm_pmc *pmc)
> -{
> -   struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
> -
> -   return test_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
> -}
> -
> -/**
> - * kvm_pmu_idx_is_high_counter - determine if select_idx is a high/low 
> counter
> - * @select_idx: The counter index
> - */
> -static bool kvm_pmu_idx_is_high_counter(u64 select_idx)
> -{
> -   return select_idx & 0x1;
> -}
> -
> -/**
> - * kvm_pmu_get_canonical_pmc - obtain the canonical pmc
> - * @pmc: The PMU counter pointer
> - *
> - * When a pair of PMCs are chained together we use the low counter 
> (canonical)
> - * to hold the underlying perf event.
> - */
> -static struct kvm_pmc *kvm_pmu_get_canonical_pmc(struct kvm_pmc *pmc)
> -{
> -   if (kvm_pmu_pmc_is_chained(pmc) &&
> -   kvm_pmu_idx_is_high_counter(pmc->idx))
> -   return pmc - 1;
> -
> -   return pmc;
> -}
> -static struct kvm_pmc *kvm_pmu_get_alternate_pmc(struct kvm_pmc *pmc)
> -{
> -   if (kvm_pmu_idx_is_high_counter(pmc->idx))
> -   return pmc - 1;
> -   else
> -   return pmc + 1;
> -}
> -
> -/**
> - * kvm_pmu_idx_has_chain_evtype - determine if the event type is chain
> + * kvm_pmu_get_counter_value - get PMU counter value
>   * @vcpu: The vcpu pointer
>   * @select_idx: The counter index
>   */
> -static bool kvm_pmu_idx_has_chain_evtype(struct kvm_vcpu *vcpu, u64 
> select_idx)
> -{
> -   u64 eventsel, reg;
> -
> -   select_idx |= 0x1;
> -
> -   if (select_idx == ARMV8_PMU_CYCLE_IDX)
> -   return false;
> -
> -   reg = PMEVTYPER0_EL0 + select_idx;
> -   eventsel = __vcpu_sys_reg(vcpu, reg) & kvm_pmu_event_mask(vcpu->kvm);
> -
> -   return eventsel == ARMV8_PMUV3_PERFCTR_CHAIN;
> -}
> -
> -/**
> - * kvm_pmu_get_pair_counter_value - get PMU counter value
> - * @vcpu: The vcpu pointer
> - * @pmc: The PMU counter pointer
> - */
> -static u64 

[PATCH v3 02/14] KVM: arm64: PMU: Align chained counter implementation with architecture pseudocode

2022-11-07 Thread Marc Zyngier
Ricardo recently pointed out that the PMU chained counter emulation
in KVM wasn't quite behaving like the one on actual hardware, in
the sense that a chained counter would expose an overflow on
both halves of a chained counter, while KVM would only expose the
overflow on the top half.

The difference is subtle, but significant. What does the architecture
say (DDI0087 H.a):

- Up to PMUv3p4, all counters but the cycle counter are 32bit

- A 32bit counter that overflows generates a CHAIN event on the
  adjacent counter after exposing its own overflow status

- The CHAIN event is accounted if the counter is correctly
  configured (CHAIN event selected and counter enabled)

This all means that our current implementation (which uses 64bit
perf events) prevents us from emulating this overflow on the lower half.

How to fix this? By implementing the above, to the letter.

This largly results in code deletion, removing the notions of
"counter pair", "chained counters", and "canonical counter".
The code is further restructured to make the CHAIN handling similar
to SWINC, as the two are now extremely similar in behaviour.

Reported-by: Ricardo Koller 
Signed-off-by: Marc Zyngier 
---
 arch/arm64/kvm/pmu-emul.c | 312 ++
 include/kvm/arm_pmu.h |   2 -
 2 files changed, 83 insertions(+), 231 deletions(-)

diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index 0003c7d37533..a38b3127f649 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -15,16 +15,14 @@
 #include 
 #include 
 
+#define PERF_ATTR_CFG1_COUNTER_64BIT   BIT(0)
+
 DEFINE_STATIC_KEY_FALSE(kvm_arm_pmu_available);
 
 static LIST_HEAD(arm_pmus);
 static DEFINE_MUTEX(arm_pmus_lock);
 
 static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx);
-static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx);
-static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc);
-
-#define PERF_ATTR_CFG1_KVM_PMU_CHAINED 0x1
 
 static u32 kvm_pmu_event_mask(struct kvm *kvm)
 {
@@ -57,6 +55,11 @@ static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, u64 
select_idx)
__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_LC);
 }
 
+static bool kvm_pmu_counter_can_chain(struct kvm_vcpu *vcpu, u64 idx)
+{
+   return (!(idx & 1) && (idx + 1) < ARMV8_PMU_CYCLE_IDX);
+}
+
 static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc)
 {
struct kvm_pmu *pmu;
@@ -69,91 +72,22 @@ static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc)
 }
 
 /**
- * kvm_pmu_pmc_is_chained - determine if the pmc is chained
- * @pmc: The PMU counter pointer
- */
-static bool kvm_pmu_pmc_is_chained(struct kvm_pmc *pmc)
-{
-   struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
-
-   return test_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
-}
-
-/**
- * kvm_pmu_idx_is_high_counter - determine if select_idx is a high/low counter
- * @select_idx: The counter index
- */
-static bool kvm_pmu_idx_is_high_counter(u64 select_idx)
-{
-   return select_idx & 0x1;
-}
-
-/**
- * kvm_pmu_get_canonical_pmc - obtain the canonical pmc
- * @pmc: The PMU counter pointer
- *
- * When a pair of PMCs are chained together we use the low counter (canonical)
- * to hold the underlying perf event.
- */
-static struct kvm_pmc *kvm_pmu_get_canonical_pmc(struct kvm_pmc *pmc)
-{
-   if (kvm_pmu_pmc_is_chained(pmc) &&
-   kvm_pmu_idx_is_high_counter(pmc->idx))
-   return pmc - 1;
-
-   return pmc;
-}
-static struct kvm_pmc *kvm_pmu_get_alternate_pmc(struct kvm_pmc *pmc)
-{
-   if (kvm_pmu_idx_is_high_counter(pmc->idx))
-   return pmc - 1;
-   else
-   return pmc + 1;
-}
-
-/**
- * kvm_pmu_idx_has_chain_evtype - determine if the event type is chain
+ * kvm_pmu_get_counter_value - get PMU counter value
  * @vcpu: The vcpu pointer
  * @select_idx: The counter index
  */
-static bool kvm_pmu_idx_has_chain_evtype(struct kvm_vcpu *vcpu, u64 select_idx)
-{
-   u64 eventsel, reg;
-
-   select_idx |= 0x1;
-
-   if (select_idx == ARMV8_PMU_CYCLE_IDX)
-   return false;
-
-   reg = PMEVTYPER0_EL0 + select_idx;
-   eventsel = __vcpu_sys_reg(vcpu, reg) & kvm_pmu_event_mask(vcpu->kvm);
-
-   return eventsel == ARMV8_PMUV3_PERFCTR_CHAIN;
-}
-
-/**
- * kvm_pmu_get_pair_counter_value - get PMU counter value
- * @vcpu: The vcpu pointer
- * @pmc: The PMU counter pointer
- */
-static u64 kvm_pmu_get_pair_counter_value(struct kvm_vcpu *vcpu,
- struct kvm_pmc *pmc)
+u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx)
 {
-   u64 counter, counter_high, reg, enabled, running;
-
-   if (kvm_pmu_pmc_is_chained(pmc)) {
-   pmc = kvm_pmu_get_canonical_pmc(pmc);
-   reg = PMEVCNTR0_EL0 + pmc->idx;
+   u64 counter, reg, enabled, running;
+   struct kvm_pmu *pmu = >arch.pmu;
+   struct kvm_pmc *pmc =