Hi, Marc,

On Tue, 2018-03-20 at 15:25 +0100, Auger Eric wrote:
> Hi Marc,
> 
> On 16/03/18 15:30, Marc Zyngier wrote:
> > 
> > It was recently reported that VFIO mediated devices, and anything
> > that VFIO exposes as level interrupts, do no strictly follow the
> > expected logic of such interrupts as it only lowers the input
> > line when the guest has EOId the interrupt at the GIC level, rather
> > than when it Acked the interrupt at the device level.
> > 
> > THe GIC's Active+Pending state is fundamentally incompatible with
> > this behaviour, as it prevents KVM from observing the EOI, and in
> > turn results in VFIO never dropping the line. This results in an
> > interrupt storm in the guest, which it really never expected.
> > 
> > As we cannot really change VFIO to follow the strict rules of level
> > signalling, let's forbid the A+P state altogether, as it is in the
> > end only an optimization. It ensures that we will transition via
> > an invalid state, which we can use to notify VFIO of the EOI.
> > 
> > Signed-off-by: Marc Zyngier <[email protected]>
> Reviewed-by: Eric Auger <[email protected]>
> Tested-by: Eric Auger <[email protected]>
> with mdev tty device
> 
> Thanks
> 
> Eric

Tested-by: Shunyong Yang <[email protected]>

Thanks.
Shunyong.

> > 
> > ---
> > - From v1:
> >   * Only clear the latch when going via an invalid state
> > 
> >  virt/kvm/arm/vgic/vgic-v2.c | 54 +++++++++++++++++++++++++------
> > --------------
> >  virt/kvm/arm/vgic/vgic-v3.c | 54 +++++++++++++++++++++++++------
> > --------------
> >  2 files changed, 60 insertions(+), 48 deletions(-)
> > 
> > diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-
> > v2.c
> > index 29556f71b691..8427586760fc 100644
> > --- a/virt/kvm/arm/vgic/vgic-v2.c
> > +++ b/virt/kvm/arm/vgic/vgic-v2.c
> > @@ -105,12 +105,9 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu
> > *vcpu)
> >  
> >             /*
> >              * Clear soft pending state when level irqs have
> > been acked.
> > -            * Always regenerate the pending state.
> >              */
> > -           if (irq->config == VGIC_CONFIG_LEVEL) {
> > -                   if (!(val & GICH_LR_PENDING_BIT))
> > -                           irq->pending_latch = false;
> > -           }
> > +           if (irq->config == VGIC_CONFIG_LEVEL && !(val &
> > GICH_LR_STATE))
> > +                   irq->pending_latch = false;
> >  
> >             /*
> >              * Level-triggered mapped IRQs are special because
> > we only
> > @@ -153,8 +150,35 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu
> > *vcpu)
> >  void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq
> > *irq, int lr)
> >  {
> >     u32 val = irq->intid;
> > +   bool allow_pending = true;
> > +
> > +   if (irq->active)
> > +           val |= GICH_LR_ACTIVE_BIT;
> > +
> > +   if (irq->hw) {
> > +           val |= GICH_LR_HW;
> > +           val |= irq->hwintid << GICH_LR_PHYSID_CPUID_SHIFT;
> > +           /*
> > +            * Never set pending+active on a HW interrupt, as
> > the
> > +            * pending state is kept at the physical
> > distributor
> > +            * level.
> > +            */
> > +           if (irq->active)
> > +                   allow_pending = false;
> > +   } else {
> > +           if (irq->config == VGIC_CONFIG_LEVEL) {
> > +                   val |= GICH_LR_EOI;
> >  
> > -   if (irq_is_pending(irq)) {
> > +                   /*
> > +                    * Software resampling doesn't work very
> > well
> > +                    * if we allow P+A, so let's not do that.
> > +                    */
> > +                   if (irq->active)
> > +                           allow_pending = false;
> > +           }
> > +   }
> > +
> > +   if (allow_pending && irq_is_pending(irq)) {
> >             val |= GICH_LR_PENDING_BIT;
> >  
> >             if (irq->config == VGIC_CONFIG_EDGE)
> > @@ -171,24 +195,6 @@ void vgic_v2_populate_lr(struct kvm_vcpu
> > *vcpu, struct vgic_irq *irq, int lr)
> >             }
> >     }
> >  
> > -   if (irq->active)
> > -           val |= GICH_LR_ACTIVE_BIT;
> > -
> > -   if (irq->hw) {
> > -           val |= GICH_LR_HW;
> > -           val |= irq->hwintid << GICH_LR_PHYSID_CPUID_SHIFT;
> > -           /*
> > -            * Never set pending+active on a HW interrupt, as
> > the
> > -            * pending state is kept at the physical
> > distributor
> > -            * level.
> > -            */
> > -           if (irq->active && irq_is_pending(irq))
> > -                   val &= ~GICH_LR_PENDING_BIT;
> > -   } else {
> > -           if (irq->config == VGIC_CONFIG_LEVEL)
> > -                   val |= GICH_LR_EOI;
> > -   }
> > -
> >     /*
> >      * Level-triggered mapped IRQs are special because we only
> > observe
> >      * rising edges as input to the VGIC.  We therefore lower
> > the line
> > diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-
> > v3.c
> > index 0ff2006f3781..dd984f726805 100644
> > --- a/virt/kvm/arm/vgic/vgic-v3.c
> > +++ b/virt/kvm/arm/vgic/vgic-v3.c
> > @@ -96,12 +96,9 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu
> > *vcpu)
> >  
> >             /*
> >              * Clear soft pending state when level irqs have
> > been acked.
> > -            * Always regenerate the pending state.
> >              */
> > -           if (irq->config == VGIC_CONFIG_LEVEL) {
> > -                   if (!(val & ICH_LR_PENDING_BIT))
> > -                           irq->pending_latch = false;
> > -           }
> > +           if (irq->config == VGIC_CONFIG_LEVEL && !(val &
> > ICH_LR_STATE))
> > +                   irq->pending_latch = false;
> >  
> >             /*
> >              * Level-triggered mapped IRQs are special because
> > we only
> > @@ -135,8 +132,35 @@ void vgic_v3_populate_lr(struct kvm_vcpu
> > *vcpu, struct vgic_irq *irq, int lr)
> >  {
> >     u32 model = vcpu->kvm->arch.vgic.vgic_model;
> >     u64 val = irq->intid;
> > +   bool allow_pending = true;
> > +
> > +   if (irq->active)
> > +           val |= ICH_LR_ACTIVE_BIT;
> > +
> > +   if (irq->hw) {
> > +           val |= ICH_LR_HW;
> > +           val |= ((u64)irq->hwintid) <<
> > ICH_LR_PHYS_ID_SHIFT;
> > +           /*
> > +            * Never set pending+active on a HW interrupt, as
> > the
> > +            * pending state is kept at the physical
> > distributor
> > +            * level.
> > +            */
> > +           if (irq->active)
> > +                   allow_pending = false;
> > +   } else {
> > +           if (irq->config == VGIC_CONFIG_LEVEL) {
> > +                   val |= ICH_LR_EOI;
> >  
> > -   if (irq_is_pending(irq)) {
> > +                   /*
> > +                    * Software resampling doesn't work very
> > well
> > +                    * if we allow P+A, so let's not do that.
> > +                    */
> > +                   if (irq->active)
> > +                           allow_pending = false;
> > +           }
> > +   }
> > +
> > +   if (allow_pending && irq_is_pending(irq)) {
> >             val |= ICH_LR_PENDING_BIT;
> >  
> >             if (irq->config == VGIC_CONFIG_EDGE)
> > @@ -154,24 +178,6 @@ void vgic_v3_populate_lr(struct kvm_vcpu
> > *vcpu, struct vgic_irq *irq, int lr)
> >             }
> >     }
> >  
> > -   if (irq->active)
> > -           val |= ICH_LR_ACTIVE_BIT;
> > -
> > -   if (irq->hw) {
> > -           val |= ICH_LR_HW;
> > -           val |= ((u64)irq->hwintid) <<
> > ICH_LR_PHYS_ID_SHIFT;
> > -           /*
> > -            * Never set pending+active on a HW interrupt, as
> > the
> > -            * pending state is kept at the physical
> > distributor
> > -            * level.
> > -            */
> > -           if (irq->active && irq_is_pending(irq))
> > -                   val &= ~ICH_LR_PENDING_BIT;
> > -   } else {
> > -           if (irq->config == VGIC_CONFIG_LEVEL)
> > -                   val |= ICH_LR_EOI;
> > -   }
> > -
> >     /*
> >      * Level-triggered mapped IRQs are special because we only
> > observe
> >      * rising edges as input to the VGIC.  We therefore lower
> > the line
> > 
_______________________________________________
kvmarm mailing list
[email protected]
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

Reply via email to