On 26/03/16 02:14, Andre Przywara wrote:
> Add emulation for some basic MMIO registers used in the ITS emulation.
> This includes:
> - GITS_{CTLR,TYPER,IIDR}
> - ID registers
> - GITS_{CBASER,CREADR,CWRITER}
>   those implement the ITS command buffer handling
> 
> Most of the handlers are pretty straight forward, but CWRITER goes
> some extra miles to allow fine grained locking. The idea here
> is to let only the first instance iterate through the command ring
> buffer, CWRITER accesses on other VCPUs meanwhile will be picked up
> by that first instance and handled as well. The ITS lock is thus only
> hold for very small periods of time and is dropped before the actual

s/hold/held/

> command handler is called.
> 
> Signed-off-by: Andre Przywara <[email protected]>
> ---
>  include/kvm/vgic/vgic.h            |   3 +
>  include/linux/irqchip/arm-gic-v3.h |   8 ++
>  virt/kvm/arm/vgic/its-emul.c       | 272 
> ++++++++++++++++++++++++++++++++++++-
>  virt/kvm/arm/vgic/vgic.h           |   6 +
>  virt/kvm/arm/vgic/vgic_init.c      |   2 +
>  5 files changed, 284 insertions(+), 7 deletions(-)
> 
> diff --git a/include/kvm/vgic/vgic.h b/include/kvm/vgic/vgic.h
> index c79bed5..bafea11 100644
> --- a/include/kvm/vgic/vgic.h
> +++ b/include/kvm/vgic/vgic.h
> @@ -115,6 +115,9 @@ struct vgic_io_device {
>  struct vgic_its {
>       bool                    enabled;
>       spinlock_t              lock;
> +     u64                     cbaser;
> +     int                     creadr;
> +     int                     cwriter;

Irk. Please use explicitly sized types.

>  };
>  
>  struct vgic_dist {
> diff --git a/include/linux/irqchip/arm-gic-v3.h 
> b/include/linux/irqchip/arm-gic-v3.h
> index a813c3e..7011b98 100644
> --- a/include/linux/irqchip/arm-gic-v3.h
> +++ b/include/linux/irqchip/arm-gic-v3.h
> @@ -179,15 +179,23 @@
>  #define GITS_BASER                   0x0100
>  #define GITS_IDREGS_BASE             0xffd0
>  #define GITS_PIDR2                   GICR_PIDR2
> +#define GITS_PIDR4                   0xffd0
> +#define GITS_CIDR0                   0xfff0
> +#define GITS_CIDR1                   0xfff4
> +#define GITS_CIDR2                   0xfff8
> +#define GITS_CIDR3                   0xfffc
>  
>  #define GITS_TRANSLATER                      0x10040
>  
>  #define GITS_CTLR_ENABLE             (1U << 0)
>  #define GITS_CTLR_QUIESCENT          (1U << 31)
>  
> +#define GITS_TYPER_PLPIS             (1UL << 0)
> +#define GITS_TYPER_IDBITS_SHIFT              8
>  #define GITS_TYPER_DEVBITS_SHIFT     13
>  #define GITS_TYPER_DEVBITS(r)                ((((r) >> 
> GITS_TYPER_DEVBITS_SHIFT) & 0x1f) + 1)
>  #define GITS_TYPER_PTA                       (1UL << 19)
> +#define GITS_TYPER_HWCOLLCNT_SHIFT   24
>  
>  #define GITS_CBASER_VALID            (1UL << 63)
>  #define GITS_CBASER_nCnB             (0UL << 59)
> diff --git a/virt/kvm/arm/vgic/its-emul.c b/virt/kvm/arm/vgic/its-emul.c
> index 49dd5e4..de8d360 100644
> --- a/virt/kvm/arm/vgic/its-emul.c
> +++ b/virt/kvm/arm/vgic/its-emul.c
> @@ -31,23 +31,263 @@
>  #include "vgic.h"
>  #include "vgic_mmio.h"
>  
> +#define BASER_BASE_ADDRESS(x) ((x) & 0xfffffffff000ULL)
> +
> +static int vgic_mmio_read_its_ctlr(struct kvm_vcpu *vcpu,
> +                                struct kvm_io_device *this,
> +                                gpa_t addr, int len, void *val)
> +{
> +     struct vgic_its *its = &vcpu->kvm->arch.vgic.its;
> +     u32 reg;
> +
> +     reg = GITS_CTLR_QUIESCENT;

So your ITS is always in a quiescent state? Even when you're processing
the command queue? You'll have to convince me...

> +     if (its->enabled)
> +             reg |= GITS_CTLR_ENABLE;
> +
> +     write_mask32(reg, addr & 3, len, val);
> +
> +     return 0;
> +}
> +
> +static int vgic_mmio_write_its_ctlr(struct kvm_vcpu *vcpu,
> +                                 struct kvm_io_device *this,
> +                                 gpa_t addr, int len, const void *val)
> +{
> +     struct vgic_its *its = &vcpu->kvm->arch.vgic.its;
> +     struct vgic_io_device *iodev = container_of(this,
> +                                                 struct vgic_io_device, dev);
> +
> +        if (addr - iodev->base_addr == 0)

whitespace issue.

> +             its->enabled = !!(*(u8*)val & GITS_CTLR_ENABLE);
> +
> +     return 0;
> +}
> +
> +static int vgic_mmio_read_its_typer(struct kvm_vcpu *vcpu,
> +                                 struct kvm_io_device *this,
> +                                 gpa_t addr, int len, void *val)
> +{
> +     u64 reg = GITS_TYPER_PLPIS;
> +
> +     /*
> +      * We use linear CPU numbers for redistributor addressing,
> +      * so GITS_TYPER.PTA is 0.
> +      * To avoid memory waste on the guest side, we keep the
> +      * number of IDBits and DevBits low for the time being.
> +      * This could later be made configurable by userland.
> +      * Since we have all collections in linked list, we claim
> +      * that we can hold all of the collection tables in our
> +      * own memory and that the ITT entry size is 1 byte (the
> +      * smallest possible one).

All of this is going to bite us when we want to implement migration,
specially the HW collection bit.

> +      */
> +     reg |= 0xff << GITS_TYPER_HWCOLLCNT_SHIFT;
> +     reg |= 0x0f << GITS_TYPER_DEVBITS_SHIFT;
> +     reg |= 0x0f << GITS_TYPER_IDBITS_SHIFT;
> +
> +     write_mask64(reg, addr & 7, len, val);
> +
> +     return 0;
> +}
> +
> +static int vgic_mmio_read_its_iidr(struct kvm_vcpu *vcpu,
> +                                struct kvm_io_device *this,
> +                                gpa_t addr, int len, void *val)
> +{
> +     u32 reg = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0);
> +
> +     write_mask32(reg, addr & 3, len, val);
> +
> +     return 0;
> +}
> +
> +static int vgic_mmio_read_its_idregs(struct kvm_vcpu *vcpu,
> +                                  struct kvm_io_device *this,
> +                                  gpa_t addr, int len, void *val)
> +{
> +     struct vgic_io_device *iodev = container_of(this,
> +                                                 struct vgic_io_device, dev);
> +     u32 reg = 0;
> +     int idreg = (addr & ~3) - iodev->base_addr + GITS_IDREGS_BASE;
> +
> +     switch (idreg) {
> +     case GITS_PIDR2:
> +             reg = GIC_PIDR2_ARCH_GICv3;

Are we leaving the lowest 4 bits to zero?

> +             break;
> +     case GITS_PIDR4:
> +             /* This is a 64K software visible page */
> +             reg = 0x40;

Same question.

Also, how about all the others PIDR registers?

> +             break;
> +     /* Those are the ID registers for (any) GIC. */
> +     case GITS_CIDR0:
> +             reg = 0x0d;
> +             break;
> +     case GITS_CIDR1:
> +             reg = 0xf0;
> +             break;
> +     case GITS_CIDR2:
> +             reg = 0x05;
> +             break;
> +     case GITS_CIDR3:
> +             reg = 0xb1;
> +             break;
> +     }

Given that these values are directly taken from the architecture, and
seem common to the whole GICv3 architecture when implemented by ARM, we
could have a common handler for the whole GICv3 implementatuin. Not a
bit deal though.

> +
> +     write_mask32(reg, addr & 3, len, val);
> +
> +     return 0;
> +}
> +
> +/*
> + * This function is called with both the ITS and the distributor lock 
> dropped,
> + * so the actual command handlers must take the respective locks when needed.
> + */
> +static int vits_handle_command(struct kvm_vcpu *vcpu, u64 *its_cmd)
> +{
> +     return -ENODEV;
> +}
> +
> +static int vgic_mmio_read_its_cbaser(struct kvm_vcpu *vcpu,
> +                                 struct kvm_io_device *this,
> +                                 gpa_t addr, int len, void *val)
> +{
> +     struct vgic_its *its = &vcpu->kvm->arch.vgic.its;
> +
> +     write_mask64(its->cbaser, addr & 7, len, val);
> +
> +     return 0;
> +}
> +
> +static int vgic_mmio_write_its_cbaser(struct kvm_vcpu *vcpu,
> +                                   struct kvm_io_device *this,
> +                                   gpa_t addr, int len, const void *val)
> +{
> +     struct vgic_its *its = &vcpu->kvm->arch.vgic.its;
> +
> +     if (its->enabled)
> +             return 0;
> +
> +     its->cbaser = mask64(its->cbaser, addr & 7, len, val);
> +     its->creadr = 0;

Don't you need to acquire the command queue lock here?

> +
> +     return 0;
> +}
> +
> +static int its_cmd_buffer_size(struct kvm *kvm)
> +{
> +     struct vgic_its *its = &kvm->arch.vgic.its;
> +
> +     return ((its->cbaser & 0xff) + 1) << 12;
> +}
> +
> +static gpa_t its_cmd_buffer_base(struct kvm *kvm)
> +{
> +     struct vgic_its *its = &kvm->arch.vgic.its;
> +
> +     return BASER_BASE_ADDRESS(its->cbaser);
> +}
> +
> +/*
> + * By writing to CWRITER the guest announces new commands to be processed.
> + * Since we cannot read from guest memory inside the ITS spinlock, we
> + * iterate over the command buffer (with the lock dropped) until the read
> + * pointer matches the write pointer. Other VCPUs writing this register in 
> the
> + * meantime will just update the write pointer, leaving the command
> + * processing to the first instance of the function.
> + */
> +static int vgic_mmio_write_its_cwriter(struct kvm_vcpu *vcpu,
> +                                    struct kvm_io_device *this,
> +                                    gpa_t addr, int len, const void *val)
> +{
> +     struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
> +     struct vgic_its *its = &dist->its;
> +     gpa_t cbaser = its_cmd_buffer_base(vcpu->kvm);
> +     u64 cmd_buf[4];
> +     u32 reg;
> +     bool finished;
> +
> +     reg = mask64(its->cwriter & 0xfffe0, addr & 7, len, val);
> +     reg &= 0xfffe0;
> +     if (reg > its_cmd_buffer_size(vcpu->kvm))
> +             return 0;
> +
> +     spin_lock(&its->lock);
> +
> +     /*
> +      * If there is still another VCPU handling commands, let this
> +      * one pick up the new CWRITER and process "our" new commands as well.
> +      */

How do you detect that condition? All I see is a massive race here, with
two threads processing the queue in parallel, possibly corrupting each
other's data.

Please explain why you think this is safe.

> +     finished = (its->cwriter != its->creadr);
> +     its->cwriter = reg;
> +
> +     spin_unlock(&its->lock);
> +
> +     while (!finished) {
> +             int ret = kvm_read_guest(vcpu->kvm, cbaser + its->creadr,
> +                                      cmd_buf, 32);
> +             if (ret) {
> +                     /*
> +                      * Gah, we are screwed. Reset CWRITER to that command
> +                      * that we have finished processing and return.
> +                      */
> +                     spin_lock(&its->lock);
> +                     its->cwriter = its->creadr;
> +                     spin_unlock(&its->lock);
> +                     break;
> +             }
> +             vits_handle_command(vcpu, cmd_buf);
> +
> +             spin_lock(&its->lock);
> +             its->creadr += 32;
> +             if (its->creadr == its_cmd_buffer_size(vcpu->kvm))
> +                     its->creadr = 0;
> +             finished = (its->creadr == its->cwriter);
> +             spin_unlock(&its->lock);
> +     }
> +
> +     return 0;
> +}
> +
> +static int vgic_mmio_read_its_cwriter(struct kvm_vcpu *vcpu,
> +                                   struct kvm_io_device *this,
> +                                   gpa_t addr, int len, void *val)
> +{
> +     struct vgic_its *its = &vcpu->kvm->arch.vgic.its;
> +     u64 reg = its->cwriter & 0xfffe0;
> +
> +     write_mask64(reg, addr & 7, len, val);
> +
> +     return 0;
> +}
> +
> +static int vgic_mmio_read_its_creadr(struct kvm_vcpu *vcpu,
> +                                  struct kvm_io_device *this,
> +                                  gpa_t addr, int len, void *val)
> +{
> +     struct vgic_its *its = &vcpu->kvm->arch.vgic.its;
> +     u64 reg = its->creadr & 0xfffe0;
> +
> +     write_mask64(reg, addr & 7, len, val);
> +
> +     return 0;
> +}
> +
>  struct vgic_register_region its_registers[] = {
>       REGISTER_DESC_WITH_LENGTH(GITS_CTLR,
> -             vgic_mmio_read_raz, vgic_mmio_write_wi, 4),
> +             vgic_mmio_read_its_ctlr, vgic_mmio_write_its_ctlr, 4),
>       REGISTER_DESC_WITH_LENGTH(GITS_IIDR,
> -             vgic_mmio_read_raz, vgic_mmio_write_wi, 4),
> +             vgic_mmio_read_its_iidr, vgic_mmio_write_wi, 4),
>       REGISTER_DESC_WITH_LENGTH(GITS_TYPER,
> -             vgic_mmio_read_raz, vgic_mmio_write_wi, 4),
> +             vgic_mmio_read_its_typer, vgic_mmio_write_wi, 4),
>       REGISTER_DESC_WITH_LENGTH(GITS_CBASER,
> -             vgic_mmio_read_raz, vgic_mmio_write_wi, 8),
> +             vgic_mmio_read_its_cbaser, vgic_mmio_write_its_cbaser, 8),
>       REGISTER_DESC_WITH_LENGTH(GITS_CWRITER,
> -             vgic_mmio_read_raz, vgic_mmio_write_wi, 8),
> +             vgic_mmio_read_its_cwriter, vgic_mmio_write_its_cwriter, 8),
>       REGISTER_DESC_WITH_LENGTH(GITS_CREADR,
> -             vgic_mmio_read_raz, vgic_mmio_write_wi, 8),
> +             vgic_mmio_read_its_creadr, vgic_mmio_write_wi, 8),
>       REGISTER_DESC_WITH_LENGTH(GITS_BASER,
>               vgic_mmio_read_raz, vgic_mmio_write_wi, 0x40),
>       REGISTER_DESC_WITH_LENGTH(GITS_IDREGS_BASE,
> -             vgic_mmio_read_raz, vgic_mmio_write_wi, 0x30),
> +             vgic_mmio_read_its_idregs, vgic_mmio_write_wi, 0x30),
>  };
>  
>  /* This is called on setting the LPI enable bit in the redistributor. */
> @@ -59,9 +299,14 @@ int vits_init(struct kvm *kvm)
>  {
>       struct vgic_dist *dist = &kvm->arch.vgic;
>       struct vgic_its *its = &dist->its;
> +     int nr_vcpus = atomic_read(&kvm->online_vcpus);
>       struct vgic_io_device *regions;
>       int ret, i;
>  
> +     dist->pendbaser = kcalloc(nr_vcpus, sizeof(u64), GFP_KERNEL);
> +     if (!dist->pendbaser)
> +             return -ENOMEM;
> +
>       spin_lock_init(&its->lock);
>  
>       regions = kmalloc_array(ARRAY_SIZE(its_registers),
> @@ -82,3 +327,16 @@ int vits_init(struct kvm *kvm)
>  
>       return -ENXIO;
>  }
> +
> +void vits_destroy(struct kvm *kvm)
> +{
> +     struct vgic_dist *dist = &kvm->arch.vgic;
> +     struct vgic_its *its = &dist->its;
> +
> +     if (!vgic_has_its(kvm))
> +             return;
> +
> +     kfree(dist->pendbaser);
> +
> +     its->enabled = false;
> +}
> diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h
> index 4e7dcb8..08f97d1 100644
> --- a/virt/kvm/arm/vgic/vgic.h
> +++ b/virt/kvm/arm/vgic/vgic.h
> @@ -63,6 +63,7 @@ int vgic_register_redist_regions(struct kvm *kvm, gpa_t 
> dist_base_address);
>  
>  int vits_init(struct kvm *kvm);
>  void vgic_enable_lpis(struct kvm_vcpu *vcpu);
> +void vits_destroy(struct kvm *kvm);
>  #else
>  static inline void vgic_v3_irq_change_affinity(struct kvm *kvm, u32 intid,
>                                              u64 mpidr)
> @@ -137,6 +138,11 @@ static inline void vgic_enable_lpis(struct kvm_vcpu 
> *vcpu)
>  {
>       return;
>  }
> +
> +static inline void vits_destroy(struct kvm *kvm)
> +{
> +     return;
> +}
>  #endif
>  
>  void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr);
> diff --git a/virt/kvm/arm/vgic/vgic_init.c b/virt/kvm/arm/vgic/vgic_init.c
> index dcfb93d..e4459e3 100644
> --- a/virt/kvm/arm/vgic/vgic_init.c
> +++ b/virt/kvm/arm/vgic/vgic_init.c
> @@ -298,6 +298,8 @@ void kvm_vgic_destroy(struct kvm *kvm)
>  
>       kvm_vgic_dist_destroy(kvm);
>  
> +     vits_destroy(kvm);
> +
>       kvm_for_each_vcpu(i, vcpu, kvm)
>               kvm_vgic_vcpu_destroy(vcpu);
>  }
> 

Thanks,

        M.
-- 
Jazz is not dead. It just smells funny...
_______________________________________________
kvmarm mailing list
[email protected]
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

Reply via email to