RE: [v2 18/25] KVM: kvm-vfio: implement the VFIO skeleton for VT-d Posted-Interrupts

2014-12-09 Thread Wu, Feng


 -Original Message-
 From: Eric Auger [mailto:eric.au...@linaro.org]
 Sent: Monday, December 08, 2014 6:16 PM
 To: Alex Williamson; Wu, Feng
 Cc: t...@linutronix.de; mi...@redhat.com; h...@zytor.com; x...@kernel.org;
 g...@kernel.org; pbonz...@redhat.com; dw...@infradead.org;
 j...@8bytes.org; jiang@linux.intel.com; linux-ker...@vger.kernel.org;
 io...@lists.linux-foundation.org; kvm@vger.kernel.org
 Subject: Re: [v2 18/25] KVM: kvm-vfio: implement the VFIO skeleton for VT-d
 Posted-Interrupts
 
 On 12/08/2014 06:12 AM, Alex Williamson wrote:
  On Mon, 2014-12-08 at 04:58 +, Wu, Feng wrote:
 
  -Original Message-
  From: Eric Auger [mailto:eric.au...@linaro.org]
  Sent: Thursday, December 04, 2014 11:36 PM
  To: Wu, Feng; t...@linutronix.de; mi...@redhat.com; h...@zytor.com;
  x...@kernel.org; g...@kernel.org; pbonz...@redhat.com;
  dw...@infradead.org; j...@8bytes.org; alex.william...@redhat.com;
  jiang@linux.intel.com
  Cc: linux-ker...@vger.kernel.org; io...@lists.linux-foundation.org;
  kvm@vger.kernel.org
  Subject: Re: [v2 18/25] KVM: kvm-vfio: implement the VFIO skeleton for
 VT-d
  Posted-Interrupts
 
  Hi Feng,
 
  On 12/03/2014 08:39 AM, Feng Wu wrote:
  This patch adds the kvm-vfio interface for VT-d Posted-Interrrupts.
  When guests updates MSI/MSI-x information for an assigned-device,
  update
  QEMU will use KVM_DEV_VFIO_DEVICE_POSTING_IRQ attribute to setup
  IRTE for VT-d PI. This patch implement this IRQ attribute.
  s/implement/implements
 
  Signed-off-by: Feng Wu feng...@intel.com
  ---
   include/linux/kvm_host.h |   19 
   virt/kvm/vfio.c  |  103
  ++
   2 files changed, 122 insertions(+), 0 deletions(-)
 
  diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
  index 5cd4420..8d06678 100644
  --- a/include/linux/kvm_host.h
  +++ b/include/linux/kvm_host.h
  @@ -1134,6 +1134,25 @@ static inline int
  kvm_arch_vfio_set_forward(struct kvm_fwd_irq *fwd_irq,
   }
   #endif
 
  +#ifdef __KVM_HAVE_ARCH_KVM_VFIO_POSTING
  +/*
  + * kvm_arch_vfio_update_pi_irte - set IRTE for Posted-Interrupts
  + *
  + * @kvm: kvm
  + * @host_irq: host irq of the interrupt
  + * @guest_irq: gsi of the interrupt
  + * returns 0 on success,  0 on failure
  + */
  +int kvm_arch_vfio_update_pi_irte(struct kvm *kvm, unsigned int
 host_irq,
  + uint32_t guest_irq);
  +#else
  +static int kvm_arch_vfio_update_pi_irte(struct kvm *kvm, unsigned int
  host_irq,
  +uint32_t guest_irq)
  +{
  +return 0;
  +}
  +#endif
  +
   #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
 
   static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu,
 bool
  val)
  diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
  index 6bc7001..5e5515f 100644
  --- a/virt/kvm/vfio.c
  +++ b/virt/kvm/vfio.c
  @@ -446,6 +446,99 @@ out:
   return ret;
   }
 
  +static int kvm_vfio_pci_get_irq_count(struct pci_dev *pdev, int 
  irq_type)
  +{
  +if (irq_type == VFIO_PCI_INTX_IRQ_INDEX) {
  +u8 pin;
  +
  +pci_read_config_byte(pdev, PCI_INTERRUPT_PIN, pin);
  +if (pin)
  +return 1;
  +} else if (irq_type == VFIO_PCI_MSI_IRQ_INDEX)
  +return pci_msi_vec_count(pdev);
  +else if (irq_type == VFIO_PCI_MSIX_IRQ_INDEX)
  +return pci_msix_vec_count(pdev);
  +
  +return 0;
  +}
  for platform case I was asked to move the retrieval of absolute irq
  number to the architecture specific part. I don't know if it should
  apply to PCI stuff as well? This explains why I need to pass the VFIO
  device (or struct device handle) to the arch specific part. Actually we
  do the same job, we provide a phys/virt IRQ mapping to KVM, right? So to
  me our architecture specific API should look quite similar?
 
  In my patch, QEMU passes IRQ type(MSI/MSIx in my case), VFIO device
 index,
  and sub-index via struct kvm_vfio_dev_irq to KVM, then KVM will find the
  real host irq from the VFIO device index and the IRQ type. Is this 
  something
  similar with your patch?
 
 
  +
  +static int kvm_vfio_set_pi(struct kvm_device *kdev, int32_t __user
 *argp)
  +{
  +struct kvm_vfio_dev_irq pi_info;
  +uint32_t *gsi;
  +unsigned long minsz;
  +struct vfio_device *vdev;
  +struct msi_desc *entry;
  +struct device *dev;
  +struct pci_dev *pdev;
  +int i, max, ret;
  +
  +minsz = offsetofend(struct kvm_vfio_dev_irq, count);
  +
  +if (copy_from_user(pi_info, (void __user *)argp, minsz))
  +return -EFAULT;
  +
  +if (pi_info.argsz  minsz || pi_info.index = VFIO_PCI_NUM_IRQS)
  PCI specific check, same remark as above but I will let Alex further
  comment on this and possibly invalidate this commeny ;-)
  +return -EINVAL;
  +
  +vdev

Re: [v2 18/25] KVM: kvm-vfio: implement the VFIO skeleton for VT-d Posted-Interrupts

2014-12-08 Thread Eric Auger
On 12/08/2014 06:12 AM, Alex Williamson wrote:
 On Mon, 2014-12-08 at 04:58 +, Wu, Feng wrote:

 -Original Message-
 From: Eric Auger [mailto:eric.au...@linaro.org]
 Sent: Thursday, December 04, 2014 11:36 PM
 To: Wu, Feng; t...@linutronix.de; mi...@redhat.com; h...@zytor.com;
 x...@kernel.org; g...@kernel.org; pbonz...@redhat.com;
 dw...@infradead.org; j...@8bytes.org; alex.william...@redhat.com;
 jiang@linux.intel.com
 Cc: linux-ker...@vger.kernel.org; io...@lists.linux-foundation.org;
 kvm@vger.kernel.org
 Subject: Re: [v2 18/25] KVM: kvm-vfio: implement the VFIO skeleton for VT-d
 Posted-Interrupts

 Hi Feng,

 On 12/03/2014 08:39 AM, Feng Wu wrote:
 This patch adds the kvm-vfio interface for VT-d Posted-Interrrupts.
 When guests updates MSI/MSI-x information for an assigned-device,
 update
 QEMU will use KVM_DEV_VFIO_DEVICE_POSTING_IRQ attribute to setup
 IRTE for VT-d PI. This patch implement this IRQ attribute.
 s/implement/implements

 Signed-off-by: Feng Wu feng...@intel.com
 ---
  include/linux/kvm_host.h |   19 
  virt/kvm/vfio.c  |  103
 ++
  2 files changed, 122 insertions(+), 0 deletions(-)

 diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
 index 5cd4420..8d06678 100644
 --- a/include/linux/kvm_host.h
 +++ b/include/linux/kvm_host.h
 @@ -1134,6 +1134,25 @@ static inline int
 kvm_arch_vfio_set_forward(struct kvm_fwd_irq *fwd_irq,
  }
  #endif

 +#ifdef __KVM_HAVE_ARCH_KVM_VFIO_POSTING
 +/*
 + * kvm_arch_vfio_update_pi_irte - set IRTE for Posted-Interrupts
 + *
 + * @kvm: kvm
 + * @host_irq: host irq of the interrupt
 + * @guest_irq: gsi of the interrupt
 + * returns 0 on success,  0 on failure
 + */
 +int kvm_arch_vfio_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
 +   uint32_t guest_irq);
 +#else
 +static int kvm_arch_vfio_update_pi_irte(struct kvm *kvm, unsigned int
 host_irq,
 +  uint32_t guest_irq)
 +{
 +  return 0;
 +}
 +#endif
 +
  #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT

  static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool
 val)
 diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
 index 6bc7001..5e5515f 100644
 --- a/virt/kvm/vfio.c
 +++ b/virt/kvm/vfio.c
 @@ -446,6 +446,99 @@ out:
return ret;
  }

 +static int kvm_vfio_pci_get_irq_count(struct pci_dev *pdev, int irq_type)
 +{
 +  if (irq_type == VFIO_PCI_INTX_IRQ_INDEX) {
 +  u8 pin;
 +
 +  pci_read_config_byte(pdev, PCI_INTERRUPT_PIN, pin);
 +  if (pin)
 +  return 1;
 +  } else if (irq_type == VFIO_PCI_MSI_IRQ_INDEX)
 +  return pci_msi_vec_count(pdev);
 +  else if (irq_type == VFIO_PCI_MSIX_IRQ_INDEX)
 +  return pci_msix_vec_count(pdev);
 +
 +  return 0;
 +}
 for platform case I was asked to move the retrieval of absolute irq
 number to the architecture specific part. I don't know if it should
 apply to PCI stuff as well? This explains why I need to pass the VFIO
 device (or struct device handle) to the arch specific part. Actually we
 do the same job, we provide a phys/virt IRQ mapping to KVM, right? So to
 me our architecture specific API should look quite similar?

 In my patch, QEMU passes IRQ type(MSI/MSIx in my case), VFIO device index,
 and sub-index via struct kvm_vfio_dev_irq to KVM, then KVM will find the
 real host irq from the VFIO device index and the IRQ type. Is this something
 similar with your patch?


 +
 +static int kvm_vfio_set_pi(struct kvm_device *kdev, int32_t __user *argp)
 +{
 +  struct kvm_vfio_dev_irq pi_info;
 +  uint32_t *gsi;
 +  unsigned long minsz;
 +  struct vfio_device *vdev;
 +  struct msi_desc *entry;
 +  struct device *dev;
 +  struct pci_dev *pdev;
 +  int i, max, ret;
 +
 +  minsz = offsetofend(struct kvm_vfio_dev_irq, count);
 +
 +  if (copy_from_user(pi_info, (void __user *)argp, minsz))
 +  return -EFAULT;
 +
 +  if (pi_info.argsz  minsz || pi_info.index = VFIO_PCI_NUM_IRQS)
 PCI specific check, same remark as above but I will let Alex further
 comment on this and possibly invalidate this commeny ;-)
 +  return -EINVAL;
 +
 +  vdev = kvm_vfio_get_vfio_device(pi_info.fd);
 +  if (IS_ERR(vdev))
 +  return PTR_ERR(vdev);
 +
 +  dev = kvm_vfio_external_base_device(vdev);
 +  if (!dev || !dev_is_pci(dev)) {
 +  ret = -EFAULT;
 +  goto put_vfio_device;
 +  }
 +
 +  pdev = to_pci_dev(dev);
 +
 +  max = kvm_vfio_pci_get_irq_count(pdev, pi_info.index);
 +  if (max = 0) {
 +  ret = -EFAULT;
 +  goto put_vfio_device;
 +  }
 +
 +  if (pi_info.argsz - minsz  pi_info.count * sizeof(int) ||
 shouldn' we use the actual datatype?

 I am afraid I don't get this, could you please be more specific? Thanks a 
 lot!
 
 We could have a platform that supports 64bit INTs.
yes this is what I meant (struct datatype is __u32).

Thanks

Eric
 
 +  pi_info.start = max || pi_info.start + pi_info.count  max) {
 +  ret

RE: [v2 18/25] KVM: kvm-vfio: implement the VFIO skeleton for VT-d Posted-Interrupts

2014-12-07 Thread Wu, Feng


 -Original Message-
 From: Eric Auger [mailto:eric.au...@linaro.org]
 Sent: Thursday, December 04, 2014 11:36 PM
 To: Wu, Feng; t...@linutronix.de; mi...@redhat.com; h...@zytor.com;
 x...@kernel.org; g...@kernel.org; pbonz...@redhat.com;
 dw...@infradead.org; j...@8bytes.org; alex.william...@redhat.com;
 jiang@linux.intel.com
 Cc: linux-ker...@vger.kernel.org; io...@lists.linux-foundation.org;
 kvm@vger.kernel.org
 Subject: Re: [v2 18/25] KVM: kvm-vfio: implement the VFIO skeleton for VT-d
 Posted-Interrupts
 
 Hi Feng,
 
 On 12/03/2014 08:39 AM, Feng Wu wrote:
  This patch adds the kvm-vfio interface for VT-d Posted-Interrrupts.
  When guests updates MSI/MSI-x information for an assigned-device,
 update
  QEMU will use KVM_DEV_VFIO_DEVICE_POSTING_IRQ attribute to setup
  IRTE for VT-d PI. This patch implement this IRQ attribute.
 s/implement/implements
 
  Signed-off-by: Feng Wu feng...@intel.com
  ---
   include/linux/kvm_host.h |   19 
   virt/kvm/vfio.c  |  103
 ++
   2 files changed, 122 insertions(+), 0 deletions(-)
 
  diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
  index 5cd4420..8d06678 100644
  --- a/include/linux/kvm_host.h
  +++ b/include/linux/kvm_host.h
  @@ -1134,6 +1134,25 @@ static inline int
 kvm_arch_vfio_set_forward(struct kvm_fwd_irq *fwd_irq,
   }
   #endif
 
  +#ifdef __KVM_HAVE_ARCH_KVM_VFIO_POSTING
  +/*
  + * kvm_arch_vfio_update_pi_irte - set IRTE for Posted-Interrupts
  + *
  + * @kvm: kvm
  + * @host_irq: host irq of the interrupt
  + * @guest_irq: gsi of the interrupt
  + * returns 0 on success,  0 on failure
  + */
  +int kvm_arch_vfio_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
  +uint32_t guest_irq);
  +#else
  +static int kvm_arch_vfio_update_pi_irte(struct kvm *kvm, unsigned int
 host_irq,
  +   uint32_t guest_irq)
  +{
  +   return 0;
  +}
  +#endif
  +
   #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
 
   static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool
 val)
  diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
  index 6bc7001..5e5515f 100644
  --- a/virt/kvm/vfio.c
  +++ b/virt/kvm/vfio.c
  @@ -446,6 +446,99 @@ out:
  return ret;
   }
 
  +static int kvm_vfio_pci_get_irq_count(struct pci_dev *pdev, int irq_type)
  +{
  +   if (irq_type == VFIO_PCI_INTX_IRQ_INDEX) {
  +   u8 pin;
  +
  +   pci_read_config_byte(pdev, PCI_INTERRUPT_PIN, pin);
  +   if (pin)
  +   return 1;
  +   } else if (irq_type == VFIO_PCI_MSI_IRQ_INDEX)
  +   return pci_msi_vec_count(pdev);
  +   else if (irq_type == VFIO_PCI_MSIX_IRQ_INDEX)
  +   return pci_msix_vec_count(pdev);
  +
  +   return 0;
  +}
 for platform case I was asked to move the retrieval of absolute irq
 number to the architecture specific part. I don't know if it should
 apply to PCI stuff as well? This explains why I need to pass the VFIO
 device (or struct device handle) to the arch specific part. Actually we
 do the same job, we provide a phys/virt IRQ mapping to KVM, right? So to
 me our architecture specific API should look quite similar?

In my patch, QEMU passes IRQ type(MSI/MSIx in my case), VFIO device index,
and sub-index via struct kvm_vfio_dev_irq to KVM, then KVM will find the
real host irq from the VFIO device index and the IRQ type. Is this something
similar with your patch?

 
  +
  +static int kvm_vfio_set_pi(struct kvm_device *kdev, int32_t __user *argp)
  +{
  +   struct kvm_vfio_dev_irq pi_info;
  +   uint32_t *gsi;
  +   unsigned long minsz;
  +   struct vfio_device *vdev;
  +   struct msi_desc *entry;
  +   struct device *dev;
  +   struct pci_dev *pdev;
  +   int i, max, ret;
  +
  +   minsz = offsetofend(struct kvm_vfio_dev_irq, count);
  +
  +   if (copy_from_user(pi_info, (void __user *)argp, minsz))
  +   return -EFAULT;
  +
  +   if (pi_info.argsz  minsz || pi_info.index = VFIO_PCI_NUM_IRQS)
 PCI specific check, same remark as above but I will let Alex further
 comment on this and possibly invalidate this commeny ;-)
  +   return -EINVAL;
  +
  +   vdev = kvm_vfio_get_vfio_device(pi_info.fd);
  +   if (IS_ERR(vdev))
  +   return PTR_ERR(vdev);
  +
  +   dev = kvm_vfio_external_base_device(vdev);
  +   if (!dev || !dev_is_pci(dev)) {
  +   ret = -EFAULT;
  +   goto put_vfio_device;
  +   }
  +
  +   pdev = to_pci_dev(dev);
  +
  +   max = kvm_vfio_pci_get_irq_count(pdev, pi_info.index);
  +   if (max = 0) {
  +   ret = -EFAULT;
  +   goto put_vfio_device;
  +   }
  +
  +   if (pi_info.argsz - minsz  pi_info.count * sizeof(int) ||
 shouldn' we use the actual datatype?

I am afraid I don't get this, could you please be more specific? Thanks a lot!

  +   pi_info.start = max || pi_info.start + pi_info.count  max) {
  +   ret = -EINVAL;
  +   goto put_vfio_device;
  +   }
  +
  +   gsi

Re: [v2 18/25] KVM: kvm-vfio: implement the VFIO skeleton for VT-d Posted-Interrupts

2014-12-07 Thread Alex Williamson
On Mon, 2014-12-08 at 04:58 +, Wu, Feng wrote:
 
  -Original Message-
  From: Eric Auger [mailto:eric.au...@linaro.org]
  Sent: Thursday, December 04, 2014 11:36 PM
  To: Wu, Feng; t...@linutronix.de; mi...@redhat.com; h...@zytor.com;
  x...@kernel.org; g...@kernel.org; pbonz...@redhat.com;
  dw...@infradead.org; j...@8bytes.org; alex.william...@redhat.com;
  jiang@linux.intel.com
  Cc: linux-ker...@vger.kernel.org; io...@lists.linux-foundation.org;
  kvm@vger.kernel.org
  Subject: Re: [v2 18/25] KVM: kvm-vfio: implement the VFIO skeleton for VT-d
  Posted-Interrupts
  
  Hi Feng,
  
  On 12/03/2014 08:39 AM, Feng Wu wrote:
   This patch adds the kvm-vfio interface for VT-d Posted-Interrrupts.
   When guests updates MSI/MSI-x information for an assigned-device,
  update
   QEMU will use KVM_DEV_VFIO_DEVICE_POSTING_IRQ attribute to setup
   IRTE for VT-d PI. This patch implement this IRQ attribute.
  s/implement/implements
  
   Signed-off-by: Feng Wu feng...@intel.com
   ---
include/linux/kvm_host.h |   19 
virt/kvm/vfio.c  |  103
  ++
2 files changed, 122 insertions(+), 0 deletions(-)
  
   diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
   index 5cd4420..8d06678 100644
   --- a/include/linux/kvm_host.h
   +++ b/include/linux/kvm_host.h
   @@ -1134,6 +1134,25 @@ static inline int
  kvm_arch_vfio_set_forward(struct kvm_fwd_irq *fwd_irq,
}
#endif
  
   +#ifdef __KVM_HAVE_ARCH_KVM_VFIO_POSTING
   +/*
   + * kvm_arch_vfio_update_pi_irte - set IRTE for Posted-Interrupts
   + *
   + * @kvm: kvm
   + * @host_irq: host irq of the interrupt
   + * @guest_irq: gsi of the interrupt
   + * returns 0 on success,  0 on failure
   + */
   +int kvm_arch_vfio_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
   +  uint32_t guest_irq);
   +#else
   +static int kvm_arch_vfio_update_pi_irte(struct kvm *kvm, unsigned int
  host_irq,
   + uint32_t guest_irq)
   +{
   + return 0;
   +}
   +#endif
   +
#ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
  
static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool
  val)
   diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
   index 6bc7001..5e5515f 100644
   --- a/virt/kvm/vfio.c
   +++ b/virt/kvm/vfio.c
   @@ -446,6 +446,99 @@ out:
 return ret;
}
  
   +static int kvm_vfio_pci_get_irq_count(struct pci_dev *pdev, int irq_type)
   +{
   + if (irq_type == VFIO_PCI_INTX_IRQ_INDEX) {
   + u8 pin;
   +
   + pci_read_config_byte(pdev, PCI_INTERRUPT_PIN, pin);
   + if (pin)
   + return 1;
   + } else if (irq_type == VFIO_PCI_MSI_IRQ_INDEX)
   + return pci_msi_vec_count(pdev);
   + else if (irq_type == VFIO_PCI_MSIX_IRQ_INDEX)
   + return pci_msix_vec_count(pdev);
   +
   + return 0;
   +}
  for platform case I was asked to move the retrieval of absolute irq
  number to the architecture specific part. I don't know if it should
  apply to PCI stuff as well? This explains why I need to pass the VFIO
  device (or struct device handle) to the arch specific part. Actually we
  do the same job, we provide a phys/virt IRQ mapping to KVM, right? So to
  me our architecture specific API should look quite similar?
 
 In my patch, QEMU passes IRQ type(MSI/MSIx in my case), VFIO device index,
 and sub-index via struct kvm_vfio_dev_irq to KVM, then KVM will find the
 real host irq from the VFIO device index and the IRQ type. Is this something
 similar with your patch?
 
  
   +
   +static int kvm_vfio_set_pi(struct kvm_device *kdev, int32_t __user *argp)
   +{
   + struct kvm_vfio_dev_irq pi_info;
   + uint32_t *gsi;
   + unsigned long minsz;
   + struct vfio_device *vdev;
   + struct msi_desc *entry;
   + struct device *dev;
   + struct pci_dev *pdev;
   + int i, max, ret;
   +
   + minsz = offsetofend(struct kvm_vfio_dev_irq, count);
   +
   + if (copy_from_user(pi_info, (void __user *)argp, minsz))
   + return -EFAULT;
   +
   + if (pi_info.argsz  minsz || pi_info.index = VFIO_PCI_NUM_IRQS)
  PCI specific check, same remark as above but I will let Alex further
  comment on this and possibly invalidate this commeny ;-)
   + return -EINVAL;
   +
   + vdev = kvm_vfio_get_vfio_device(pi_info.fd);
   + if (IS_ERR(vdev))
   + return PTR_ERR(vdev);
   +
   + dev = kvm_vfio_external_base_device(vdev);
   + if (!dev || !dev_is_pci(dev)) {
   + ret = -EFAULT;
   + goto put_vfio_device;
   + }
   +
   + pdev = to_pci_dev(dev);
   +
   + max = kvm_vfio_pci_get_irq_count(pdev, pi_info.index);
   + if (max = 0) {
   + ret = -EFAULT;
   + goto put_vfio_device;
   + }
   +
   + if (pi_info.argsz - minsz  pi_info.count * sizeof(int) ||
  shouldn' we use the actual datatype?
 
 I am afraid I don't get this, could you please be more specific? Thanks a lot!

We could have a platform that supports 64bit INTs

Re: [v2 18/25] KVM: kvm-vfio: implement the VFIO skeleton for VT-d Posted-Interrupts

2014-12-04 Thread Eric Auger
Hi Feng,

On 12/03/2014 08:39 AM, Feng Wu wrote:
 This patch adds the kvm-vfio interface for VT-d Posted-Interrrupts.
 When guests updates MSI/MSI-x information for an assigned-device,
update
 QEMU will use KVM_DEV_VFIO_DEVICE_POSTING_IRQ attribute to setup
 IRTE for VT-d PI. This patch implement this IRQ attribute.
s/implement/implements
 
 Signed-off-by: Feng Wu feng...@intel.com
 ---
  include/linux/kvm_host.h |   19 
  virt/kvm/vfio.c  |  103 
 ++
  2 files changed, 122 insertions(+), 0 deletions(-)
 
 diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
 index 5cd4420..8d06678 100644
 --- a/include/linux/kvm_host.h
 +++ b/include/linux/kvm_host.h
 @@ -1134,6 +1134,25 @@ static inline int kvm_arch_vfio_set_forward(struct 
 kvm_fwd_irq *fwd_irq,
  }
  #endif
  
 +#ifdef __KVM_HAVE_ARCH_KVM_VFIO_POSTING
 +/*
 + * kvm_arch_vfio_update_pi_irte - set IRTE for Posted-Interrupts
 + *
 + * @kvm: kvm
 + * @host_irq: host irq of the interrupt
 + * @guest_irq: gsi of the interrupt
 + * returns 0 on success,  0 on failure
 + */
 +int kvm_arch_vfio_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
 +  uint32_t guest_irq);
 +#else
 +static int kvm_arch_vfio_update_pi_irte(struct kvm *kvm, unsigned int 
 host_irq,
 + uint32_t guest_irq)
 +{
 + return 0;
 +}
 +#endif
 +
  #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
  
  static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val)
 diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
 index 6bc7001..5e5515f 100644
 --- a/virt/kvm/vfio.c
 +++ b/virt/kvm/vfio.c
 @@ -446,6 +446,99 @@ out:
   return ret;
  }
  
 +static int kvm_vfio_pci_get_irq_count(struct pci_dev *pdev, int irq_type)
 +{
 + if (irq_type == VFIO_PCI_INTX_IRQ_INDEX) {
 + u8 pin;
 +
 + pci_read_config_byte(pdev, PCI_INTERRUPT_PIN, pin);
 + if (pin)
 + return 1;
 + } else if (irq_type == VFIO_PCI_MSI_IRQ_INDEX)
 + return pci_msi_vec_count(pdev);
 + else if (irq_type == VFIO_PCI_MSIX_IRQ_INDEX)
 + return pci_msix_vec_count(pdev);
 +
 + return 0;
 +}
for platform case I was asked to move the retrieval of absolute irq
number to the architecture specific part. I don't know if it should
apply to PCI stuff as well? This explains why I need to pass the VFIO
device (or struct device handle) to the arch specific part. Actually we
do the same job, we provide a phys/virt IRQ mapping to KVM, right? So to
me our architecture specific API should look quite similar?

 +
 +static int kvm_vfio_set_pi(struct kvm_device *kdev, int32_t __user *argp)
 +{
 + struct kvm_vfio_dev_irq pi_info;
 + uint32_t *gsi;
 + unsigned long minsz;
 + struct vfio_device *vdev;
 + struct msi_desc *entry;
 + struct device *dev;
 + struct pci_dev *pdev;
 + int i, max, ret;
 +
 + minsz = offsetofend(struct kvm_vfio_dev_irq, count);
 +
 + if (copy_from_user(pi_info, (void __user *)argp, minsz))
 + return -EFAULT;
 +
 + if (pi_info.argsz  minsz || pi_info.index = VFIO_PCI_NUM_IRQS)
PCI specific check, same remark as above but I will let Alex further
comment on this and possibly invalidate this commeny ;-)
 + return -EINVAL;
 +
 + vdev = kvm_vfio_get_vfio_device(pi_info.fd);
 + if (IS_ERR(vdev))
 + return PTR_ERR(vdev);
 +
 + dev = kvm_vfio_external_base_device(vdev);
 + if (!dev || !dev_is_pci(dev)) {
 + ret = -EFAULT;
 + goto put_vfio_device;
 + }
 +
 + pdev = to_pci_dev(dev);
 +
 + max = kvm_vfio_pci_get_irq_count(pdev, pi_info.index);
 + if (max = 0) {
 + ret = -EFAULT;
 + goto put_vfio_device;
 + }
 +
 + if (pi_info.argsz - minsz  pi_info.count * sizeof(int) ||
shouldn' we use the actual datatype?
 + pi_info.start = max || pi_info.start + pi_info.count  max) {
 + ret = -EINVAL;
 + goto put_vfio_device;
 + }
 +
 + gsi = memdup_user((void __user *)((unsigned long)argp + minsz),
 +pi_info.count * sizeof(int));
same question as above
 + if (IS_ERR(gsi)) {
 + ret = PTR_ERR(gsi);
 + goto put_vfio_device;
 + }
 +
 +#ifdef CONFIG_PCI_MSI
 + for (i = 0; i  pi_info.count; i++) {
 + list_for_each_entry(entry, pdev-msi_list, list) {
 + if (entry-msi_attrib.entry_nr != pi_info.start+i)
 + continue;
 +
 + ret = kvm_arch_vfio_update_pi_irte(kdev-kvm,
 +entry-irq,
 +gsi[i]);
 + if (ret) {
 + ret = -EFAULT;
why -EFAULT? and not propagation of original error code?
you may have posting set for part of the subindexes 

[v2 18/25] KVM: kvm-vfio: implement the VFIO skeleton for VT-d Posted-Interrupts

2014-12-02 Thread Feng Wu
This patch adds the kvm-vfio interface for VT-d Posted-Interrrupts.
When guests updates MSI/MSI-x information for an assigned-device,
QEMU will use KVM_DEV_VFIO_DEVICE_POSTING_IRQ attribute to setup
IRTE for VT-d PI. This patch implement this IRQ attribute.

Signed-off-by: Feng Wu feng...@intel.com
---
 include/linux/kvm_host.h |   19 
 virt/kvm/vfio.c  |  103 ++
 2 files changed, 122 insertions(+), 0 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 5cd4420..8d06678 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1134,6 +1134,25 @@ static inline int kvm_arch_vfio_set_forward(struct 
kvm_fwd_irq *fwd_irq,
 }
 #endif
 
+#ifdef __KVM_HAVE_ARCH_KVM_VFIO_POSTING
+/*
+ * kvm_arch_vfio_update_pi_irte - set IRTE for Posted-Interrupts
+ *
+ * @kvm: kvm
+ * @host_irq: host irq of the interrupt
+ * @guest_irq: gsi of the interrupt
+ * returns 0 on success,  0 on failure
+ */
+int kvm_arch_vfio_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
+uint32_t guest_irq);
+#else
+static int kvm_arch_vfio_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
+   uint32_t guest_irq)
+{
+   return 0;
+}
+#endif
+
 #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
 
 static inline void kvm_vcpu_set_in_spin_loop(struct kvm_vcpu *vcpu, bool val)
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
index 6bc7001..5e5515f 100644
--- a/virt/kvm/vfio.c
+++ b/virt/kvm/vfio.c
@@ -446,6 +446,99 @@ out:
return ret;
 }
 
+static int kvm_vfio_pci_get_irq_count(struct pci_dev *pdev, int irq_type)
+{
+   if (irq_type == VFIO_PCI_INTX_IRQ_INDEX) {
+   u8 pin;
+
+   pci_read_config_byte(pdev, PCI_INTERRUPT_PIN, pin);
+   if (pin)
+   return 1;
+   } else if (irq_type == VFIO_PCI_MSI_IRQ_INDEX)
+   return pci_msi_vec_count(pdev);
+   else if (irq_type == VFIO_PCI_MSIX_IRQ_INDEX)
+   return pci_msix_vec_count(pdev);
+
+   return 0;
+}
+
+static int kvm_vfio_set_pi(struct kvm_device *kdev, int32_t __user *argp)
+{
+   struct kvm_vfio_dev_irq pi_info;
+   uint32_t *gsi;
+   unsigned long minsz;
+   struct vfio_device *vdev;
+   struct msi_desc *entry;
+   struct device *dev;
+   struct pci_dev *pdev;
+   int i, max, ret;
+
+   minsz = offsetofend(struct kvm_vfio_dev_irq, count);
+
+   if (copy_from_user(pi_info, (void __user *)argp, minsz))
+   return -EFAULT;
+
+   if (pi_info.argsz  minsz || pi_info.index = VFIO_PCI_NUM_IRQS)
+   return -EINVAL;
+
+   vdev = kvm_vfio_get_vfio_device(pi_info.fd);
+   if (IS_ERR(vdev))
+   return PTR_ERR(vdev);
+
+   dev = kvm_vfio_external_base_device(vdev);
+   if (!dev || !dev_is_pci(dev)) {
+   ret = -EFAULT;
+   goto put_vfio_device;
+   }
+
+   pdev = to_pci_dev(dev);
+
+   max = kvm_vfio_pci_get_irq_count(pdev, pi_info.index);
+   if (max = 0) {
+   ret = -EFAULT;
+   goto put_vfio_device;
+   }
+
+   if (pi_info.argsz - minsz  pi_info.count * sizeof(int) ||
+   pi_info.start = max || pi_info.start + pi_info.count  max) {
+   ret = -EINVAL;
+   goto put_vfio_device;
+   }
+
+   gsi = memdup_user((void __user *)((unsigned long)argp + minsz),
+  pi_info.count * sizeof(int));
+   if (IS_ERR(gsi)) {
+   ret = PTR_ERR(gsi);
+   goto put_vfio_device;
+   }
+
+#ifdef CONFIG_PCI_MSI
+   for (i = 0; i  pi_info.count; i++) {
+   list_for_each_entry(entry, pdev-msi_list, list) {
+   if (entry-msi_attrib.entry_nr != pi_info.start+i)
+   continue;
+
+   ret = kvm_arch_vfio_update_pi_irte(kdev-kvm,
+  entry-irq,
+  gsi[i]);
+   if (ret) {
+   ret = -EFAULT;
+   goto free_gsi;
+   }
+   }
+   }
+#endif
+
+   ret = 0;
+
+free_gsi:
+   kfree(gsi);
+
+put_vfio_device:
+   kvm_vfio_put_vfio_device(vdev);
+   return ret;
+}
+
 static int kvm_vfio_set_device(struct kvm_device *kdev, long attr, u64 arg)
 {
int32_t __user *argp = (int32_t __user *)(unsigned long)arg;
@@ -456,6 +549,11 @@ static int kvm_vfio_set_device(struct kvm_device *kdev, 
long attr, u64 arg)
case KVM_DEV_VFIO_DEVICE_UNFORWARD_IRQ:
ret = kvm_vfio_control_irq_forward(kdev, attr, argp);
break;
+#ifdef __KVM_HAVE_ARCH_KVM_VFIO_POSTING
+   case KVM_DEV_VFIO_DEVICE_POSTING_IRQ:
+   ret = kvm_vfio_set_pi(kdev, argp);
+   break;