On 10/20/2010 10:26 AM, Sheng Yang wrote:
It would be work with KVM_CAP_DEVICE_MSIX_MASK, which we would enable in the
last patch.
+struct kvm_assigned_msix_mmio {
+ __u32 assigned_dev_id;
+ __u64 base_addr;
Different alignment and size on 32 and 64 bits.
Is base_addr a guest physical address? Do we need a size or it it fixed?
+ __u32 flags;
+ __u32 reserved[2];
+};
+
@@ -465,6 +465,8 @@ struct kvm_assigned_dev_kernel {
struct pci_dev *dev;
struct kvm *kvm;
spinlock_t assigned_dev_lock;
+ u64 msix_mmio_base;
gpa_t.
+ struct kvm_io_device msix_mmio_dev;
};
struct kvm_irq_mask_notifier {
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c
index bf96ea7..5d2adc4 100644
--- a/virt/kvm/assigned-dev.c
+++ b/virt/kvm/assigned-dev.c
@@ -739,6 +739,137 @@ msix_entry_out:
return r;
}
+
+static bool msix_mmio_in_range(struct kvm_assigned_dev_kernel *adev,
+ gpa_t addr, int len, int *idx)
+{
+ int i;
+
+ if (!(adev->irq_requested_type& KVM_DEV_IRQ_HOST_MSIX))
+ return false;
Just don't install the io_device in that case.
+ BUG_ON(adev->msix_mmio_base == 0);
+ for (i = 0; i< adev->entries_nr; i++) {
+ u64 start, end;
+ start = adev->msix_mmio_base +
+ adev->guest_msix_entries[i].entry * PCI_MSIX_ENTRY_SIZE;
+ end = start + PCI_MSIX_ENTRY_SIZE;
+ if (addr>= start&& addr + len<= end) {
+ *idx = i;
+ return true;
+ }
What if it's a partial hit? write part of an entry and part of another
entry?
+ }
+ return false;
+}
+
+static int msix_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
+ void *val)
+{
+ struct kvm_assigned_dev_kernel *adev =
+ container_of(this, struct kvm_assigned_dev_kernel,
+ msix_mmio_dev);
+ int idx, r = 0;
+ u32 entry[4];
+ struct kvm_kernel_irq_routing_entry *e;
+
+ mutex_lock(&adev->kvm->lock);
+ if (!msix_mmio_in_range(adev, addr, len,&idx)) {
+ r = -EOPNOTSUPP;
+ goto out;
+ }
+ if ((addr& 0x3) || len != 4) {
+ printk(KERN_WARNING
+ "KVM: Unaligned reading for device MSI-X MMIO! "
+ "addr 0x%llx, len %d\n", addr, len);
Guest exploitable printk()
+ r = -EOPNOTSUPP;
If the guest assigned the device to another guest, it allows the nested
guest to kill the non-nested guest. Need to exit in a graceful fashion.
+ goto out;
+ }
+
+ e = kvm_get_irq_routing_entry(adev->kvm,
+ adev->guest_msix_entries[idx].vector);
+ if (!e || e->type != KVM_IRQ_ROUTING_MSI) {
+ printk(KERN_WARNING "KVM: Wrong MSI-X routing entry! "
+ "addr 0x%llx, len %d\n", addr, len);
+ r = -EOPNOTSUPP;
+ goto out;
+ }
+ entry[0] = e->msi.address_lo;
+ entry[1] = e->msi.address_hi;
+ entry[2] = e->msi.data;
+ entry[3] = !!(adev->guest_msix_entries[idx].flags&
+ KVM_ASSIGNED_MSIX_MASK);
+ memcpy(val,&entry[addr % PCI_MSIX_ENTRY_SIZE / 4], len);
+
+out:
+ mutex_unlock(&adev->kvm->lock);
+ return r;
+}
+
+static int msix_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
+ const void *val)
+{
+ struct kvm_assigned_dev_kernel *adev =
+ container_of(this, struct kvm_assigned_dev_kernel,
+ msix_mmio_dev);
+ int idx, r = 0;
+ unsigned long new_val = *(unsigned long *)val;
+ bool entry_masked;
+
+ mutex_lock(&adev->kvm->lock);
+ if (!msix_mmio_in_range(adev, addr, len,&idx)) {
+ r = -EOPNOTSUPP;
+ goto out;
+ }
+ if ((addr& 0x3) || len != 4) {
+ printk(KERN_WARNING
+ "KVM: Unaligned writing for device MSI-X MMIO! "
+ "addr 0x%llx, len %d, val 0x%lx\n",
+ addr, len, new_val);
+ r = -EOPNOTSUPP;
+ goto out;
+ }
+ entry_masked = adev->guest_msix_entries[idx].flags&
+ KVM_ASSIGNED_MSIX_MASK;
+ if (addr % PCI_MSIX_ENTRY_SIZE != PCI_MSIX_ENTRY_VECTOR_CTRL) {
+ /* Only allow entry modification when entry was masked */
+ if (!entry_masked) {
+ printk(KERN_WARNING
+ "KVM: guest try to write unmasked MSI-X entry. "
+ "addr 0x%llx, len %d, val 0x%lx\n",
+ addr, len, new_val);
+ r = 0;
What does the spec says about this situation?
+ } else
+ /* Leave it to QEmu */
s/qemu/userspace/
+ r = -EOPNOTSUPP;
What would userspace do in this situation? I hope you documented
precisely what the kernel handles and what it doesn't?
I prefer more kernel code in the kernel to having an interface which is
hard to use correctly.
+ goto out;
+ }
+ if (new_val& ~1ul) {
Is there a #define for this bit?
+ printk(KERN_WARNING
+ "KVM: Bad writing for device MSI-X MMIO! "
+ "addr 0x%llx, len %d, val 0x%lx\n",
+ addr, len, new_val);
+ r = -EOPNOTSUPP;
+ goto out;
+ }
+ if (new_val == 1&& !entry_masked) {
+ adev->guest_msix_entries[idx].flags |=
+ KVM_ASSIGNED_MSIX_MASK;
+ update_msix_mask(adev, idx);
+ } else if (new_val == 0&& entry_masked) {
+ adev->guest_msix_entries[idx].flags&=
+ ~KVM_ASSIGNED_MSIX_MASK;
+ update_msix_mask(adev, idx);
+ }
Ah, I see you do reuse update_msix_mask().
+out:
+ mutex_unlock(&adev->kvm->lock);
+
+ return r;
+}
+
+static const struct kvm_io_device_ops msix_mmio_ops = {
+ .read = msix_mmio_read,
+ .write = msix_mmio_write,
+};
+
#endif
long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
--
error compiling committee.c: too many arguments to function
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html