On 10/20/2010 10:26 AM, Sheng Yang wrote:
It would be work with KVM_CAP_DEVICE_MSIX_MASK, which we would enable in the
last patch.


+struct kvm_assigned_msix_mmio {
+       __u32 assigned_dev_id;
+       __u64 base_addr;

Different alignment and size on 32 and 64 bits.

Is base_addr a guest physical address?  Do we need a size or it it fixed?

+       __u32 flags;
+       __u32 reserved[2];
+};
+

@@ -465,6 +465,8 @@ struct kvm_assigned_dev_kernel {
        struct pci_dev *dev;
        struct kvm *kvm;
        spinlock_t assigned_dev_lock;
+       u64 msix_mmio_base;

gpa_t.

+       struct kvm_io_device msix_mmio_dev;
  };

  struct kvm_irq_mask_notifier {
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c
index bf96ea7..5d2adc4 100644
--- a/virt/kvm/assigned-dev.c
+++ b/virt/kvm/assigned-dev.c
@@ -739,6 +739,137 @@ msix_entry_out:

        return r;
  }
+
+static bool msix_mmio_in_range(struct kvm_assigned_dev_kernel *adev,
+                             gpa_t addr, int len, int *idx)
+{
+       int i;
+
+       if (!(adev->irq_requested_type&  KVM_DEV_IRQ_HOST_MSIX))
+               return false;

Just don't install the io_device in that case.

+       BUG_ON(adev->msix_mmio_base == 0);
+       for (i = 0; i<  adev->entries_nr; i++) {
+               u64 start, end;
+               start = adev->msix_mmio_base +
+                       adev->guest_msix_entries[i].entry * PCI_MSIX_ENTRY_SIZE;
+               end = start + PCI_MSIX_ENTRY_SIZE;
+               if (addr>= start&&  addr + len<= end) {
+                       *idx = i;
+                       return true;
+               }

What if it's a partial hit? write part of an entry and part of another entry?

+       }
+       return false;
+}
+
+static int msix_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
+                         void *val)
+{
+       struct kvm_assigned_dev_kernel *adev =
+                       container_of(this, struct kvm_assigned_dev_kernel,
+                                    msix_mmio_dev);
+       int idx, r = 0;
+       u32 entry[4];
+       struct kvm_kernel_irq_routing_entry *e;
+
+       mutex_lock(&adev->kvm->lock);
+       if (!msix_mmio_in_range(adev, addr, len,&idx)) {
+               r = -EOPNOTSUPP;
+               goto out;
+       }
+       if ((addr&  0x3) || len != 4) {
+               printk(KERN_WARNING
+                       "KVM: Unaligned reading for device MSI-X MMIO! "
+                       "addr 0x%llx, len %d\n", addr, len);

Guest exploitable printk()

+               r = -EOPNOTSUPP;

If the guest assigned the device to another guest, it allows the nested guest to kill the non-nested guest. Need to exit in a graceful fashion.

+               goto out;
+       }
+
+       e = kvm_get_irq_routing_entry(adev->kvm,
+                       adev->guest_msix_entries[idx].vector);
+       if (!e || e->type != KVM_IRQ_ROUTING_MSI) {
+               printk(KERN_WARNING "KVM: Wrong MSI-X routing entry! "
+                       "addr 0x%llx, len %d\n", addr, len);
+               r = -EOPNOTSUPP;
+               goto out;
+       }
+       entry[0] = e->msi.address_lo;
+       entry[1] = e->msi.address_hi;
+       entry[2] = e->msi.data;
+       entry[3] = !!(adev->guest_msix_entries[idx].flags&
+                       KVM_ASSIGNED_MSIX_MASK);
+       memcpy(val,&entry[addr % PCI_MSIX_ENTRY_SIZE / 4], len);
+
+out:
+       mutex_unlock(&adev->kvm->lock);
+       return r;
+}
+
+static int msix_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
+                          const void *val)
+{
+       struct kvm_assigned_dev_kernel *adev =
+                       container_of(this, struct kvm_assigned_dev_kernel,
+                                    msix_mmio_dev);
+       int idx, r = 0;
+       unsigned long new_val = *(unsigned long *)val;
+       bool entry_masked;
+
+       mutex_lock(&adev->kvm->lock);
+       if (!msix_mmio_in_range(adev, addr, len,&idx)) {
+               r = -EOPNOTSUPP;
+               goto out;
+       }
+       if ((addr&  0x3) || len != 4) {
+               printk(KERN_WARNING
+                       "KVM: Unaligned writing for device MSI-X MMIO! "
+                       "addr 0x%llx, len %d, val 0x%lx\n",
+                       addr, len, new_val);
+               r = -EOPNOTSUPP;
+               goto out;
+       }
+       entry_masked = adev->guest_msix_entries[idx].flags&
+                       KVM_ASSIGNED_MSIX_MASK;
+       if (addr % PCI_MSIX_ENTRY_SIZE != PCI_MSIX_ENTRY_VECTOR_CTRL) {
+               /* Only allow entry modification when entry was masked */
+               if (!entry_masked) {
+                       printk(KERN_WARNING
+                               "KVM: guest try to write unmasked MSI-X entry. "
+                               "addr 0x%llx, len %d, val 0x%lx\n",
+                               addr, len, new_val);
+                       r = 0;

What does the spec says about this situation?

+               } else
+                       /* Leave it to QEmu */

s/qemu/userspace/

+                       r = -EOPNOTSUPP;

What would userspace do in this situation? I hope you documented precisely what the kernel handles and what it doesn't?

I prefer more kernel code in the kernel to having an interface which is hard to use correctly.

+               goto out;
+       }
+       if (new_val&  ~1ul) {

Is there a #define for this bit?

+               printk(KERN_WARNING
+                       "KVM: Bad writing for device MSI-X MMIO! "
+                       "addr 0x%llx, len %d, val 0x%lx\n",
+                       addr, len, new_val);
+               r = -EOPNOTSUPP;
+               goto out;
+       }
+       if (new_val == 1&&  !entry_masked) {
+               adev->guest_msix_entries[idx].flags |=
+                       KVM_ASSIGNED_MSIX_MASK;
+               update_msix_mask(adev, idx);
+       } else if (new_val == 0&&  entry_masked) {
+               adev->guest_msix_entries[idx].flags&=
+                       ~KVM_ASSIGNED_MSIX_MASK;
+               update_msix_mask(adev, idx);
+       }

Ah, I see you do reuse update_msix_mask().

+out:
+       mutex_unlock(&adev->kvm->lock);
+
+       return r;
+}
+
+static const struct kvm_io_device_ops msix_mmio_ops = {
+       .read     = msix_mmio_read,
+       .write    = msix_mmio_write,
+};
+
  #endif

  long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to