Re: [PATCH 3/4] KVM: Emulate MSI-X table in kernel

Michael S. Tsirkin Thu, 24 Feb 2011 02:45:26 -0800

On Thu, Feb 24, 2011 at 05:51:04PM +0800, Sheng Yang wrote:
> Then we can support mask bit operation of assigned devices now.
> 
> Signed-off-by: Sheng Yang <sh...@linux.intel.com>


Doesn't look like all comments got addressed.
E.g. gpa_t entry_base is still there and in reality
you said it's a host virtual address so
should be void __user *;
And ENOTSYNC meaning 'MSIX' is pretty hacky.

> ---
>  arch/x86/include/asm/kvm_host.h |    1 +
>  arch/x86/kvm/Makefile           |    2 +-
>  arch/x86/kvm/mmu.c              |    2 +
>  arch/x86/kvm/x86.c              |   40 ++++-
>  include/linux/kvm.h             |   28 ++++
>  include/linux/kvm_host.h        |   34 +++++
>  virt/kvm/assigned-dev.c         |   44 ++++++
>  virt/kvm/kvm_main.c             |   38 +++++-
>  virt/kvm/msix_mmio.c            |  296 
> +++++++++++++++++++++++++++++++++++++++
>  virt/kvm/msix_mmio.h            |   25 ++++
>  10 files changed, 497 insertions(+), 13 deletions(-)
>  create mode 100644 virt/kvm/msix_mmio.c
>  create mode 100644 virt/kvm/msix_mmio.h
> 
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index aa75f21..4a390a4 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -635,6 +635,7 @@ enum emulation_result {
>       EMULATE_DONE,       /* no further processing */
>       EMULATE_DO_MMIO,      /* kvm_run filled with mmio request */
>       EMULATE_FAIL,         /* can't emulate this instruction */
> +     EMULATE_USERSPACE_EXIT, /* we need exit to userspace */
>  };
>  
>  #define EMULTYPE_NO_DECODE       (1 << 0)
> diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
> index f15501f..3a0d851 100644
> --- a/arch/x86/kvm/Makefile
> +++ b/arch/x86/kvm/Makefile
> @@ -7,7 +7,7 @@ CFLAGS_vmx.o := -I.
>  
>  kvm-y                        += $(addprefix ../../../virt/kvm/, kvm_main.o 
> ioapic.o \
>                               coalesced_mmio.o irq_comm.o eventfd.o \
> -                             assigned-dev.o)
> +                             assigned-dev.o msix_mmio.o)
>  kvm-$(CONFIG_IOMMU_API)      += $(addprefix ../../../virt/kvm/, iommu.o)
>  kvm-$(CONFIG_KVM_ASYNC_PF)   += $(addprefix ../../../virt/kvm/, async_pf.o)
>  
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index 9cafbb4..912dca4 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -3358,6 +3358,8 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t 
> cr2, u32 error_code,
>       case EMULATE_DO_MMIO:
>               ++vcpu->stat.mmio_exits;
>               /* fall through */
> +     case EMULATE_USERSPACE_EXIT:
> +             /* fall through */
>       case EMULATE_FAIL:
>               return 0;
>       default:
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 21b84e2..87308eb 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -1966,6 +1966,7 @@ int kvm_dev_ioctl_check_extension(long ext)
>       case KVM_CAP_X86_ROBUST_SINGLESTEP:
>       case KVM_CAP_XSAVE:
>       case KVM_CAP_ASYNC_PF:
> +     case KVM_CAP_MSIX_MMIO:
>               r = 1;
>               break;
>       case KVM_CAP_COALESCED_MMIO:
> @@ -3809,6 +3810,7 @@ static int emulator_write_emulated_onepage(unsigned 
> long addr,
>  {
>       gpa_t                 gpa;
>       struct kvm_io_ext_data ext_data;
> +     int r;
>  
>       gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, exception);
>  
> @@ -3824,18 +3826,32 @@ static int emulator_write_emulated_onepage(unsigned 
> long addr,
>  
>  mmio:
>       trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val);
> +     r = vcpu_mmio_write(vcpu, gpa, bytes, val, &ext_data);
>       /*
>        * Is this MMIO handled locally?
>        */
> -     if (!vcpu_mmio_write(vcpu, gpa, bytes, val, &ext_data))
> +     if (!r)
>               return X86EMUL_CONTINUE;
>  
> -     vcpu->mmio_needed = 1;
> -     vcpu->run->exit_reason = KVM_EXIT_MMIO;
> -     vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa;
> -     vcpu->run->mmio.len = vcpu->mmio_size = bytes;
> -     vcpu->run->mmio.is_write = vcpu->mmio_is_write = 1;
> -     memcpy(vcpu->run->mmio.data, val, bytes);
> +     if (r == -ENOTSYNC) {
> +             vcpu->userspace_exit_needed = 1;
> +             vcpu->run->exit_reason = KVM_EXIT_MSIX_ROUTING_UPDATE;
> +             vcpu->run->msix_routing.dev_id =
> +                     ext_data.msix_routing.dev_id;
> +             vcpu->run->msix_routing.type =
> +                     ext_data.msix_routing.type;
> +             vcpu->run->msix_routing.entry_idx =
> +                     ext_data.msix_routing.entry_idx;
> +             vcpu->run->msix_routing.flags =
> +                     ext_data.msix_routing.flags;
> +     } else  {
> +             vcpu->mmio_needed = 1;
> +             vcpu->run->exit_reason = KVM_EXIT_MMIO;
> +             vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa;
> +             vcpu->run->mmio.len = vcpu->mmio_size = bytes;
> +             vcpu->run->mmio.is_write = vcpu->mmio_is_write = 1;
> +             memcpy(vcpu->run->mmio.data, val, bytes);
> +     }
>  
>       return X86EMUL_CONTINUE;
>  }
> @@ -4469,6 +4485,8 @@ done:
>               r = EMULATE_DO_MMIO;
>       } else if (r == EMULATION_RESTART)
>               goto restart;
> +     else if (vcpu->userspace_exit_needed)
> +             r = EMULATE_USERSPACE_EXIT;
>       else
>               r = EMULATE_DONE;
>  
> @@ -5397,12 +5415,18 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, 
> struct kvm_run *kvm_run)
>               }
>       }
>  
> -     if (vcpu->arch.pio.count || vcpu->mmio_needed) {
> +     if (vcpu->arch.pio.count || vcpu->mmio_needed ||
> +                     vcpu->userspace_exit_needed) {
>               if (vcpu->mmio_needed) {
>                       memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
>                       vcpu->mmio_read_completed = 1;
>                       vcpu->mmio_needed = 0;
>               }
> +             if (vcpu->userspace_exit_needed) {
> +                     vcpu->userspace_exit_needed = 0;
> +                     r = 0;
> +                     goto out;
> +             }
>               vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
>               r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE);
>               srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
> diff --git a/include/linux/kvm.h b/include/linux/kvm.h
> index ea2dc1a..4393e4e 100644
> --- a/include/linux/kvm.h
> +++ b/include/linux/kvm.h
> @@ -161,6 +161,7 @@ struct kvm_pit_config {
>  #define KVM_EXIT_NMI              16
>  #define KVM_EXIT_INTERNAL_ERROR   17
>  #define KVM_EXIT_OSI              18
> +#define KVM_EXIT_MSIX_ROUTING_UPDATE 19
>  
>  /* For KVM_EXIT_INTERNAL_ERROR */
>  #define KVM_INTERNAL_ERROR_EMULATION 1
> @@ -264,6 +265,13 @@ struct kvm_run {
>               struct {
>                       __u64 gprs[32];
>               } osi;
> +             /* KVM_EXIT_MSIX_ROUTING_UPDATE*/
> +             struct {
> +                     __u32 dev_id;
> +                     __u16 type;
> +                     __u16 entry_idx;
> +                     __u64 flags;
> +             } msix_routing;
>               /* Fix the size of the union. */
>               char padding[256];
>       };
> @@ -541,6 +549,7 @@ struct kvm_ppc_pvinfo {
>  #define KVM_CAP_PPC_GET_PVINFO 57
>  #define KVM_CAP_PPC_IRQ_LEVEL 58
>  #define KVM_CAP_ASYNC_PF 59
> +#define KVM_CAP_MSIX_MMIO 60
>  
>  #ifdef KVM_CAP_IRQ_ROUTING
>  
> @@ -672,6 +681,9 @@ struct kvm_clock_data {
>  #define KVM_XEN_HVM_CONFIG        _IOW(KVMIO,  0x7a, struct 
> kvm_xen_hvm_config)
>  #define KVM_SET_CLOCK             _IOW(KVMIO,  0x7b, struct kvm_clock_data)
>  #define KVM_GET_CLOCK             _IOR(KVMIO,  0x7c, struct kvm_clock_data)
> +/* Available with KVM_CAP_MSIX_MMIO */
> +#define KVM_REGISTER_MSIX_MMIO    _IOW(KVMIO,  0x7d, struct 
> kvm_msix_mmio_user)
> +#define KVM_UNREGISTER_MSIX_MMIO  _IOW(KVMIO,  0x7e, struct 
> kvm_msix_mmio_user)
>  /* Available with KVM_CAP_PIT_STATE2 */
>  #define KVM_GET_PIT2              _IOR(KVMIO,  0x9f, struct kvm_pit_state2)
>  #define KVM_SET_PIT2              _IOW(KVMIO,  0xa0, struct kvm_pit_state2)
> @@ -795,4 +807,20 @@ struct kvm_assigned_msix_entry {
>       __u16 padding[3];
>  };
>  
> +#define KVM_MSIX_MMIO_TYPE_ASSIGNED_DEV          (1 << 0)
> +
> +#define KVM_MSIX_MMIO_TYPE_BASE_TABLE            (1 << 8)
> +
> +#define KVM_MSIX_MMIO_TYPE_DEV_MASK      0x00ff
> +#define KVM_MSIX_MMIO_TYPE_BASE_MASK     0xff00
> +struct kvm_msix_mmio_user {
> +     __u32 dev_id;
> +     __u16 type;
> +     __u16 max_entries_nr;
> +     __u64 base_addr;
> +     __u64 base_va;
> +     __u64 flags;
> +     __u64 reserved[4];
> +};
> +
>  #endif /* __LINUX_KVM_H */
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index 6bb211d..6aaf85e 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -68,9 +68,16 @@ enum kvm_bus {
>       KVM_NR_BUSES
>  };
>  
> +#define KVM_IO_EXT_DATA_TYPE_MSIX_ROUTING   1
>  struct kvm_io_ext_data {
>       int type;
>       union {
> +             struct {
> +                     u32 dev_id;
> +                     u16 type;
> +                     u16 entry_idx;
> +                     u64 flags;
> +             } msix_routing;
>               char padding[256];
>       };
>  };
> @@ -168,6 +175,8 @@ struct kvm_vcpu {
>       } async_pf;
>  #endif
>  
> +     int userspace_exit_needed;
> +
>       struct kvm_vcpu_arch arch;
>  };
>  
> @@ -241,6 +250,27 @@ struct kvm_memslots {
>                                       KVM_PRIVATE_MEM_SLOTS];
>  };
>  
> +#define KVM_MSIX_MMIO_MAX    32
> +
> +struct kvm_msix_mmio {
> +     u32 dev_id;
> +     u16 type;
> +     u16 max_entries_nr;
> +     u64 flags;
> +     gpa_t table_base_addr;
> +     hva_t table_base_va;
> +     gpa_t pba_base_addr;
> +     hva_t pba_base_va;
> +};
> +
> +struct kvm_msix_mmio_dev {
> +     struct kvm *kvm;
> +     struct kvm_io_device table_dev;
> +     int mmio_nr;
> +     struct kvm_msix_mmio mmio[KVM_MSIX_MMIO_MAX];
> +     struct mutex lock;
> +};
> +
>  struct kvm {
>       spinlock_t mmu_lock;
>       raw_spinlock_t requests_lock;
> @@ -289,6 +319,7 @@ struct kvm {
>       long mmu_notifier_count;
>  #endif
>       long tlbs_dirty;
> +     struct kvm_msix_mmio_dev msix_mmio_dev;
>  };
>  
>  /* The guest did something we don't support. */
> @@ -561,6 +592,9 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
>  int kvm_request_irq_source_id(struct kvm *kvm);
>  void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
>  
> +int kvm_assigned_device_update_msix_mask_bit(struct kvm *kvm,
> +                     int assigned_dev_id, int entry, bool mask);
> +
>  /* For vcpu->arch.iommu_flags */
>  #define KVM_IOMMU_CACHE_COHERENCY    0x1
>  
> diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c
> index ae72ae6..d1598a6 100644
> --- a/virt/kvm/assigned-dev.c
> +++ b/virt/kvm/assigned-dev.c
> @@ -18,6 +18,7 @@
>  #include <linux/interrupt.h>
>  #include <linux/slab.h>
>  #include "irq.h"
> +#include "msix_mmio.h"
>  
>  static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct 
> list_head *head,
>                                                     int assigned_dev_id)
> @@ -191,12 +192,25 @@ static void kvm_free_assigned_irq(struct kvm *kvm,
>       kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type);
>  }
>  
> +static void assigned_device_free_msix_mmio(struct kvm *kvm,
> +                             struct kvm_assigned_dev_kernel *adev)
> +{
> +     struct kvm_msix_mmio mmio;
> +
> +     mmio.dev_id = adev->assigned_dev_id;
> +     mmio.type = KVM_MSIX_MMIO_TYPE_ASSIGNED_DEV |
> +                 KVM_MSIX_MMIO_TYPE_BASE_TABLE;
> +     kvm_free_msix_mmio(kvm, &mmio);
> +}
> +
>  static void kvm_free_assigned_device(struct kvm *kvm,
>                                    struct kvm_assigned_dev_kernel
>                                    *assigned_dev)
>  {
>       kvm_free_assigned_irq(kvm, assigned_dev);
>  
> +     assigned_device_free_msix_mmio(kvm, assigned_dev);
> +
>       __pci_reset_function(assigned_dev->dev);
>       pci_restore_state(assigned_dev->dev);
>  
> @@ -785,3 +799,33 @@ out:
>       return r;
>  }
>  
> +/* The caller should hold kvm->lock */
> +int kvm_assigned_device_update_msix_mask_bit(struct kvm *kvm,
> +                             int assigned_dev_id, int entry, bool mask)
> +{
> +     int r = -EFAULT;
> +     struct kvm_assigned_dev_kernel *adev;
> +     int i;
> +
> +     if (!irqchip_in_kernel(kvm))
> +             return r;
> +
> +     adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
> +                                   assigned_dev_id);
> +     if (!adev)
> +             goto out;
> +
> +     /* For non-MSIX enabled devices, entries_nr == 0 */
> +     for (i = 0; i < adev->entries_nr; i++)
> +             if (adev->host_msix_entries[i].entry == entry) {
> +                     if (mask)
> +                             disable_irq_nosync(
> +                                     adev->host_msix_entries[i].vector);
> +                     else
> +                             enable_irq(adev->host_msix_entries[i].vector);
> +                     r = 0;
> +                     break;
> +             }
> +out:
> +     return r;
> +}
> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
> index a61f90e..f211e49 100644
> --- a/virt/kvm/kvm_main.c
> +++ b/virt/kvm/kvm_main.c
> @@ -56,6 +56,7 @@
>  
>  #include "coalesced_mmio.h"
>  #include "async_pf.h"
> +#include "msix_mmio.h"
>  
>  #define CREATE_TRACE_POINTS
>  #include <trace/events/kvm.h>
> @@ -509,6 +510,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
>       struct mm_struct *mm = kvm->mm;
>  
>       kvm_arch_sync_events(kvm);
> +     kvm_unregister_msix_mmio_dev(kvm);
>       spin_lock(&kvm_lock);
>       list_del(&kvm->vm_list);
>       spin_unlock(&kvm_lock);
> @@ -1877,6 +1879,24 @@ static long kvm_vm_ioctl(struct file *filp,
>               mutex_unlock(&kvm->lock);
>               break;
>  #endif
> +     case KVM_REGISTER_MSIX_MMIO: {
> +             struct kvm_msix_mmio_user mmio_user;
> +
> +             r = -EFAULT;
> +             if (copy_from_user(&mmio_user, argp, sizeof mmio_user))
> +                     goto out;
> +             r = kvm_vm_ioctl_register_msix_mmio(kvm, &mmio_user);
> +             break;
> +     }
> +     case KVM_UNREGISTER_MSIX_MMIO: {
> +             struct kvm_msix_mmio_user mmio_user;
> +
> +             r = -EFAULT;
> +             if (copy_from_user(&mmio_user, argp, sizeof mmio_user))
> +                     goto out;
> +             r = kvm_vm_ioctl_unregister_msix_mmio(kvm, &mmio_user);
> +             break;
> +     }
>       default:
>               r = kvm_arch_vm_ioctl(filp, ioctl, arg);
>               if (r == -ENOTTY)
> @@ -1988,6 +2008,12 @@ static int kvm_dev_ioctl_create_vm(void)
>               return r;
>       }
>  #endif
> +     r = kvm_register_msix_mmio_dev(kvm);
> +     if (r < 0) {
> +             kvm_put_kvm(kvm);
> +             return r;
> +     }
> +
>       r = anon_inode_getfd("kvm-vm", &kvm_vm_fops, kvm, O_RDWR);
>       if (r < 0)
>               kvm_put_kvm(kvm);
> @@ -2223,14 +2249,18 @@ static void kvm_io_bus_destroy(struct kvm_io_bus *bus)
>  int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
>                    int len, const void *val, struct kvm_io_ext_data *ext_data)
>  {
> -     int i;
> +     int i, r = -EOPNOTSUPP;
>       struct kvm_io_bus *bus;
>  
>       bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
> -     for (i = 0; i < bus->dev_count; i++)
> -             if (!kvm_iodevice_write(bus->devs[i], addr, len, val, ext_data))
> +     for (i = 0; i < bus->dev_count; i++) {
> +             r = kvm_iodevice_write(bus->devs[i], addr, len, val, ext_data);
> +             if (r == -ENOTSYNC)
> +                     break;
> +             else if (!r)
>                       return 0;
> -     return -EOPNOTSUPP;
> +     }
> +     return r;
>  }
>  
>  /* kvm_io_bus_read - called under kvm->slots_lock */
> diff --git a/virt/kvm/msix_mmio.c b/virt/kvm/msix_mmio.c
> new file mode 100644
> index 0000000..083b15b
> --- /dev/null
> +++ b/virt/kvm/msix_mmio.c
> @@ -0,0 +1,296 @@
> +/*
> + * MSI-X MMIO emulation
> + *
> + * Copyright (c) 2010 Intel Corporation
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2.  See
> + * the COPYING file in the top-level directory.
> + *
> + * Author:
> + *   Sheng Yang <sheng.y...@intel.com>
> + */
> +
> +#include <linux/kvm_host.h>
> +#include <linux/kvm.h>
> +
> +#include "msix_mmio.h"
> +#include "iodev.h"
> +
> +static int update_msix_mask_bit(struct kvm *kvm, struct kvm_msix_mmio *mmio,
> +                             int entry, u32 flag)
> +{
> +     if (mmio->type & KVM_MSIX_MMIO_TYPE_ASSIGNED_DEV)
> +             return kvm_assigned_device_update_msix_mask_bit(kvm,
> +                             mmio->dev_id, entry, flag);
> +     return -EFAULT;
> +}
> +
> +/* Caller must hold dev->lock */
> +static int get_mmio_table_index(struct kvm_msix_mmio_dev *dev,
> +                             gpa_t addr, int len)
> +{
> +     gpa_t start, end;
> +     int i, r = -EINVAL;
> +
> +     for (i = 0; i < dev->mmio_nr; i++) {
> +             start = dev->mmio[i].table_base_addr;
> +             end = dev->mmio[i].table_base_addr + PCI_MSIX_ENTRY_SIZE *
> +                     dev->mmio[i].max_entries_nr;
> +             if (addr >= start && addr + len <= end) {
> +                     r = i;
> +                     break;
> +             }
> +     }
> +
> +     return r;
> +}
> +
> +static int msix_table_mmio_read(struct kvm_io_device *this, gpa_t addr, int 
> len,
> +                             void *val)
> +{
> +     /*TODO: Add big endian support */
> +     struct kvm_msix_mmio_dev *mmio_dev =
> +             container_of(this, struct kvm_msix_mmio_dev, table_dev);
> +     struct kvm_msix_mmio *mmio;
> +     int idx, ret = 0, entry, offset, r;
> +
> +     mutex_lock(&mmio_dev->lock);
> +     idx = get_mmio_table_index(mmio_dev, addr, len);
> +     if (idx < 0) {
> +             ret = -EOPNOTSUPP;
> +             goto out;
> +     }
> +     if ((addr & 0x3) || (len != 4 && len != 8))
> +             goto out;

addr & len as below?

> +
> +     offset = addr % PCI_MSIX_ENTRY_SIZE;
> +     if (offset == PCI_MSIX_ENTRY_VECTOR_CTRL && len == 8)
> +             goto out;

then this test won't be needed.

> +
> +     mmio = &mmio_dev->mmio[idx];
> +     entry = (addr - mmio->table_base_addr) / PCI_MSIX_ENTRY_SIZE;
> +     r = copy_from_user(val, (void __user *)(mmio->table_base_va +
> +                     entry * PCI_MSIX_ENTRY_SIZE + offset), len);
> +     if (r)
> +             goto out;
> +out:
> +     mutex_unlock(&mmio_dev->lock);
> +     return ret;
> +}
> +
> +static int msix_table_mmio_write(struct kvm_io_device *this, gpa_t addr,
> +                             int len, const void *val,
> +                             struct kvm_io_ext_data *ext_data)
> +{
> +     /*TODO: Add big endian support */
> +     struct kvm_msix_mmio_dev *mmio_dev =
> +             container_of(this, struct kvm_msix_mmio_dev, table_dev);
> +     struct kvm_msix_mmio *mmio;
> +     int idx, entry, offset, ret = 0, r = 0;
> +     gpa_t entry_base;
> +     u32 old_ctrl, new_ctrl;
> +     unsigned long __user *ctrl_pos;

long? It's 8 bytes on 64 bit.
You really want
__le32 old_ctrl, new_ctrl;
__le32 __user *ctrl_pos;

> +
> +     mutex_lock(&mmio_dev->kvm->lock);
> +     mutex_lock(&mmio_dev->lock);
> +     idx = get_mmio_table_index(mmio_dev, addr, len);
> +     if (idx < 0) {
> +             ret = -EOPNOTSUPP;
> +             goto out;
> +     }
> +     if (!(len == 4 || len == 8) || addr & (len - 1))

Nice hack. Even a bit nicer
        if ((len != 4 && len != 8) || addr & (len - 1))


> +             goto out;
> +
> +     offset = addr % PCI_MSIX_ENTRY_SIZE;
> +
> +     mmio = &mmio_dev->mmio[idx];
> +     entry = (addr - mmio->table_base_addr) / PCI_MSIX_ENTRY_SIZE;
> +     entry_base = mmio->table_base_va + entry * PCI_MSIX_ENTRY_SIZE;
> +     ctrl_pos = (unsigned long __user *)(entry_base +
> +                     PCI_MSIX_ENTRY_VECTOR_CTRL);

So this is the issue: if you cast a type to unsigned long *
compiler can assume that the address is aligned.
To prevent problems please add a check that table_base_va
is aligned.

> +
> +     if (get_user(old_ctrl, ctrl_pos))
> +             goto out;
> +
> +     /* Don't allow writing to other fields when entry is unmasked */
> +     if (!(old_ctrl & PCI_MSIX_ENTRY_CTRL_MASKBIT) &&
> +         offset != PCI_MSIX_ENTRY_VECTOR_CTRL)
> +             goto out;
> +
> +     if (copy_to_user((void __user *)(entry_base + offset), val, len))
> +             goto out;
> +
> +     ext_data->type = KVM_IO_EXT_DATA_TYPE_MSIX_ROUTING;
> +     ext_data->msix_routing.dev_id = mmio->dev_id;
> +     ext_data->msix_routing.type = mmio->type;
> +     ext_data->msix_routing.entry_idx = entry;
> +     ext_data->msix_routing.flags = 0;
> +
> +     if (offset + len < PCI_MSIX_ENTRY_VECTOR_CTRL) {
> +             ret = -ENOTSYNC;
> +             goto out;
> +     }
> +
> +     if (get_user(new_ctrl, ctrl_pos))
> +             goto out;
> +
> +     if (old_ctrl == new_ctrl) {
> +             if (offset == PCI_MSIX_ENTRY_DATA && len == 8)
> +                     ret = -ENOTSYNC;
> +             goto out;
> +     }
> +     if ((old_ctrl & PCI_MSIX_ENTRY_CTRL_MASKBIT) ^
> +                     (new_ctrl & PCI_MSIX_ENTRY_CTRL_MASKBIT))
> +             r = update_msix_mask_bit(mmio_dev->kvm, mmio, entry,
> +                             !!(new_ctrl & PCI_MSIX_ENTRY_CTRL_MASKBIT));
> +     if (r)
> +             ret = -ENOTSYNC;
> +out:
> +     mutex_unlock(&mmio_dev->lock);
> +     mutex_unlock(&mmio_dev->kvm->lock);
> +     return ret;
> +}
> +
> +static const struct kvm_io_device_ops msix_mmio_table_ops = {
> +     .read     = msix_table_mmio_read,
> +     .write    = msix_table_mmio_write,
> +};
> +
> +int kvm_register_msix_mmio_dev(struct kvm *kvm)
> +{
> +     int ret;
> +
> +     kvm_iodevice_init(&kvm->msix_mmio_dev.table_dev, &msix_mmio_table_ops);
> +     mutex_init(&kvm->msix_mmio_dev.lock);
> +     kvm->msix_mmio_dev.kvm = kvm;
> +     mutex_lock(&kvm->slots_lock);
> +     ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS,
> +                                   &kvm->msix_mmio_dev.table_dev);
> +     mutex_unlock(&kvm->slots_lock);
> +     return ret;
> +}
> +
> +int kvm_unregister_msix_mmio_dev(struct kvm *kvm)
> +{
> +     int ret;
> +
> +     mutex_lock(&kvm->slots_lock);
> +     ret = kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS,
> +                                   &kvm->msix_mmio_dev.table_dev);
> +     mutex_unlock(&kvm->slots_lock);
> +     return ret;
> +}
> +
> +int kvm_vm_ioctl_register_msix_mmio(struct kvm *kvm,
> +                                 struct kvm_msix_mmio_user *mmio_user)
> +{
> +     struct kvm_msix_mmio_dev *mmio_dev = &kvm->msix_mmio_dev;
> +     struct kvm_msix_mmio *mmio = NULL;
> +     int r = 0, i;
> +
> +     mutex_lock(&mmio_dev->lock);
> +     for (i = 0; i < mmio_dev->mmio_nr; i++) {
> +             if (mmio_dev->mmio[i].dev_id == mmio_user->dev_id &&
> +                 (mmio_dev->mmio[i].type & KVM_MSIX_MMIO_TYPE_DEV_MASK) ==
> +                 (mmio_user->type & KVM_MSIX_MMIO_TYPE_DEV_MASK)) {
> +                     mmio = &mmio_dev->mmio[i];
> +                     if (mmio->max_entries_nr != mmio_user->max_entries_nr) {
> +                             r = -EINVAL;
> +                             goto out;
> +                     }
> +                     break;
> +             }
> +     }
> +     if (mmio_user->max_entries_nr > KVM_MAX_MSIX_PER_DEV) {
> +             r = -EINVAL;
> +             goto out;
> +     }
> +     /* All reserved currently */
> +     if (mmio_user->flags) {
> +             r = -EINVAL;
> +             goto out;
> +     }
> +
> +     if ((mmio_user->type & KVM_MSIX_MMIO_TYPE_DEV_MASK) !=
> +                     KVM_MSIX_MMIO_TYPE_ASSIGNED_DEV) {
> +             r = -EINVAL;
> +             goto out;
> +     }
> +     if ((mmio_user->type & KVM_MSIX_MMIO_TYPE_BASE_MASK) !=
> +                     KVM_MSIX_MMIO_TYPE_BASE_TABLE) {
> +             r = -EINVAL;
> +             goto out;
> +     }
> +
> +     /* Check alignment and accessibility */
> +     if ((mmio_user->base_va % PCI_MSIX_ENTRY_SIZE) ||
> +         !access_ok(VERIFY_WRITE, (void __user *)mmio_user->base_va,

You also should check that base_va and friends fit in a pointer
for 32 bit architectures. Same for other va values.

> +                     mmio_user->max_entries_nr * PCI_MSIX_ENTRY_SIZE)) {
> +             r = -EINVAL;
> +             goto out;
> +     }
> +     if (!mmio) {
> +             if (mmio_dev->mmio_nr == KVM_MSIX_MMIO_MAX) {
> +                     r = -ENOSPC;
> +                     goto out;
> +             }
> +             mmio = &mmio_dev->mmio[mmio_dev->mmio_nr];
> +             mmio_dev->mmio_nr++;
> +     }
> +
> +     mmio->max_entries_nr = mmio_user->max_entries_nr;
> +     mmio->dev_id = mmio_user->dev_id;
> +     mmio->flags = mmio_user->flags;
> +
> +     if ((mmio_user->type & KVM_MSIX_MMIO_TYPE_DEV_MASK) ==
> +                     KVM_MSIX_MMIO_TYPE_ASSIGNED_DEV)
> +             mmio->type = KVM_MSIX_MMIO_TYPE_ASSIGNED_DEV;
> +     if ((mmio_user->type & KVM_MSIX_MMIO_TYPE_BASE_MASK) ==
> +                     KVM_MSIX_MMIO_TYPE_BASE_TABLE) {
> +             mmio->type |= KVM_MSIX_MMIO_TYPE_BASE_TABLE;
> +             mmio->table_base_addr = mmio_user->base_addr;
> +             mmio->table_base_va = mmio_user->base_va;
> +     }
> +out:
> +     mutex_unlock(&mmio_dev->lock);
> +     return r;
> +}
> +
> +int kvm_free_msix_mmio(struct kvm *kvm, struct kvm_msix_mmio *mmio)
> +{
> +     struct kvm_msix_mmio_dev *mmio_dev = &kvm->msix_mmio_dev;
> +     int r = -EINVAL, i, j;
> +
> +     if (!mmio)
> +             return 0;
> +
> +     mutex_lock(&mmio_dev->lock);
> +     BUG_ON(mmio_dev->mmio_nr > KVM_MSIX_MMIO_MAX);
> +     for (i = 0; i < mmio_dev->mmio_nr; i++) {
> +             if (mmio_dev->mmio[i].dev_id == mmio->dev_id &&
> +                 mmio_dev->mmio[i].type == mmio->type) {
> +                     r = 0;
> +                     for (j = i; j < mmio_dev->mmio_nr - 1; j++)
> +                             mmio_dev->mmio[j] = mmio_dev->mmio[j + 1];
> +                     mmio_dev->mmio[mmio_dev->mmio_nr].max_entries_nr = 0;
> +                     mmio_dev->mmio[mmio_dev->mmio_nr].dev_id = 0;
> +                     mmio_dev->mmio[mmio_dev->mmio_nr].type = 0;
> +                     mmio_dev->mmio_nr--;
> +                     break;
> +             }
> +     }
> +     mutex_unlock(&mmio_dev->lock);
> +     return r;
> +}
> +
> +int kvm_vm_ioctl_unregister_msix_mmio(struct kvm *kvm,
> +                                   struct kvm_msix_mmio_user *mmio_user)
> +{
> +     struct kvm_msix_mmio mmio;
> +
> +     mmio.dev_id = mmio_user->dev_id;
> +     mmio.type = mmio_user->type;
> +
> +     return kvm_free_msix_mmio(kvm, &mmio);
> +}
> +
> diff --git a/virt/kvm/msix_mmio.h b/virt/kvm/msix_mmio.h
> new file mode 100644
> index 0000000..01b6587
> --- /dev/null
> +++ b/virt/kvm/msix_mmio.h
> @@ -0,0 +1,25 @@
> +#ifndef __KVM_MSIX_MMIO_H__
> +#define __KVM_MSIX_MMIO_H__
> +/*
> + * MSI-X MMIO emulation
> + *
> + * Copyright (c) 2010 Intel Corporation
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2.  See
> + * the COPYING file in the top-level directory.
> + *
> + * Author:
> + *   Sheng Yang <sheng.y...@intel.com>
> + */
> +
> +#include <linux/pci.h>
> +
> +int kvm_register_msix_mmio_dev(struct kvm *kvm);
> +int kvm_unregister_msix_mmio_dev(struct kvm *kvm);
> +int kvm_vm_ioctl_register_msix_mmio(struct kvm *kvm,
> +                                 struct kvm_msix_mmio_user *mmio_user);
> +int kvm_vm_ioctl_unregister_msix_mmio(struct kvm *kvm,
> +                                   struct kvm_msix_mmio_user *mmio_user);
> +int kvm_free_msix_mmio(struct kvm *kvm, struct kvm_msix_mmio *mmio_user);
> +
> +#endif
> -- 
> 1.7.0.1
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 3/4] KVM: Emulate MSI-X table in kernel

Reply via email to