Re: [PATCH 2/2][RFC] KVM: Emulate MSI-X table and PBA in kernel

Avi Kivity Tue, 28 Dec 2010 04:27:39 -0800

On 12/22/2010 10:44 AM, Sheng Yang wrote:

Then we can support mask bit operation of assigned devices now.



@@ -3817,14 +3819,16 @@ static int emulator_write_emulated_onepage(unsigned 
long addr,

  mmio:
        trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, bytes, gpa, *(u64 *)val);
+       r = vcpu_mmio_write(vcpu, gpa, bytes, val);
        /*
         * Is this MMIO handled locally?
         */
-       if (!vcpu_mmio_write(vcpu, gpa, bytes, val))
+       if (!r)
                return X86EMUL_CONTINUE;

        vcpu->mmio_needed = 1;
-       vcpu->run->exit_reason = KVM_EXIT_MMIO;
+       vcpu->run->exit_reason = (r == -ENOTSYNC) ?
+               KVM_EXIT_MSIX_ROUTING_UPDATE : KVM_EXIT_MMIO;

This isn't very pretty, exit_reason should be written invcpu_mmio_write(). I guess we can refactor it later.


+#define KVM_MSIX_MMIO_TYPE_ASSIGNED_DEV            (1<<  0)
+
+#define KVM_MSIX_MMIO_TYPE_BASE_TABLE      (1<<  8)
+#define KVM_MSIX_MMIO_TYPE_BASE_PBA        (1<<  9)
+
+#define KVM_MSIX_MMIO_TYPE_DEV_MASK        0x00ff
+#define KVM_MSIX_MMIO_TYPE_BASE_MASK       0xff00


Any explanation of these?

+struct kvm_msix_mmio_user {
+       __u32 dev_id;
+       __u16 type;
+       __u16 max_entries_nr;
+       __u64 base_addr;
+       __u64 base_va;
+       __u64 flags;
+       __u64 reserved[4];
+};
+


+int kvm_assigned_device_update_msix_mask_bit(struct kvm *kvm,
+                               int assigned_dev_id, int entry, u32 flag)
+{


Need a better name for 'flag' (and make it a bool).

+       int r = -EFAULT;
+       struct kvm_assigned_dev_kernel *adev;
+       int i;
+
+       if (!irqchip_in_kernel(kvm))
+               return r;
+
+       mutex_lock(&kvm->lock);
+       adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
+                                     assigned_dev_id);
+       if (!adev)
+               goto out;
+
+       for (i = 0; i<  adev->entries_nr; i++)
+               if (adev->host_msix_entries[i].entry == entry) {
+                       if (flag)
+                               disable_irq_nosync(
+                                       adev->host_msix_entries[i].vector);
+                       else
+                               enable_irq(adev->host_msix_entries[i].vector);
+                       r = 0;
+                       break;
+               }
+out:
+       mutex_unlock(&kvm->lock);
+       return r;
+}

@@ -1988,6 +2008,12 @@ static int kvm_dev_ioctl_create_vm(void)
                return r;
        }
  #endif
+       r = kvm_register_msix_mmio_dev(kvm);
+       if (r<  0) {
+               kvm_put_kvm(kvm);
+               return r;
+       }


Shouldn't this be part of individual KVM_REGISTER_MSIX_MMIO calls?

+static int msix_table_mmio_read(struct kvm_io_device *this, gpa_t addr, int 
len,
+                               void *val)
+{
+       struct kvm_msix_mmio_dev *mmio_dev =
+               container_of(this, struct kvm_msix_mmio_dev, table_dev);
+       struct kvm_msix_mmio *mmio;
+       int idx, ret = 0, entry, offset, r;
+
+       mutex_lock(&mmio_dev->lock);
+       idx = get_mmio_table_index(mmio_dev, addr, len);
+       if (idx<  0) {
+               ret = -EOPNOTSUPP;
+               goto out;
+       }
+       if ((addr&  0x3) || (len != 4&&  len != 8))
+               goto out;

What about (addr & 4) && (len == 8)? Is it supported? It may cross entryboundaries.

+       mmio =&mmio_dev->mmio[idx];
+
+       entry = (addr - mmio->table_base_addr) / PCI_MSIX_ENTRY_SIZE;
+       offset = addr&  0xf;
+       r = copy_from_user(val, (void *)(mmio->table_base_va +
+                       entry * PCI_MSIX_ENTRY_SIZE + offset), len);

+       if (r)
+               goto out;
+out:
+       mutex_unlock(&mmio_dev->lock);
+       return ret;
+}
+
+static int msix_table_mmio_write(struct kvm_io_device *this, gpa_t addr,
+                               int len, const void *val)
+{
+       struct kvm_msix_mmio_dev *mmio_dev =
+               container_of(this, struct kvm_msix_mmio_dev, table_dev);
+       struct kvm_msix_mmio *mmio;
+       int idx, entry, offset, ret = 0, r = 0;
+       gpa_t entry_base;
+       u32 old_ctrl, new_ctrl;
+
+       mutex_lock(&mmio_dev->lock);
+       idx = get_mmio_table_index(mmio_dev, addr, len);
+       if (idx<  0) {
+               ret = -EOPNOTSUPP;
+               goto out;
+       }
+       if ((addr&  0x3) || (len != 4&&  len != 8))
+               goto out;
+       mmio =&mmio_dev->mmio[idx];
+       entry = (addr - mmio->table_base_addr) / PCI_MSIX_ENTRY_SIZE;
+       entry_base = mmio->table_base_va + entry * PCI_MSIX_ENTRY_SIZE;
+       offset = addr&  0xF;
+
+       if (copy_from_user(&old_ctrl,
+                       entry_base + PCI_MSIX_ENTRY_VECTOR_CTRL,
+                       sizeof old_ctrl))
+               goto out;


get_user() is easier.

+
+       /* No allow writing to other fields when entry is unmasked */
+       if (!(old_ctrl&  PCI_MSIX_ENTRY_CTRL_MASKBIT)&&
+           offset != PCI_MSIX_ENTRY_VECTOR_CTRL)
+               goto out;
+
+       if (copy_to_user(entry_base + offset, val, len))
+               goto out;

+
+       if (copy_from_user(&new_ctrl,
+                       entry_base + PCI_MSIX_ENTRY_VECTOR_CTRL,
+                       sizeof new_ctrl))
+               goto out;


put_user()

+
+       if ((offset<  PCI_MSIX_ENTRY_VECTOR_CTRL&&  len == 4) ||
+           (offset<  PCI_MSIX_ENTRY_DATA&&  len == 8))
+               ret = -ENOTSYNC;
+       if (old_ctrl == new_ctrl)
+               goto out;
+       if (!(old_ctrl&  PCI_MSIX_ENTRY_CTRL_MASKBIT)&&
+                       (new_ctrl&  PCI_MSIX_ENTRY_CTRL_MASKBIT))
+               r = update_msix_mask_bit(mmio_dev->kvm, mmio, entry, 1);
+       else if ((old_ctrl&  PCI_MSIX_ENTRY_CTRL_MASKBIT)&&
+                       !(new_ctrl&  PCI_MSIX_ENTRY_CTRL_MASKBIT))
+               r = update_msix_mask_bit(mmio_dev->kvm, mmio, entry, 0);
+       if (r || ret)
+               ret = -ENOTSYNC;
+out:
+       mutex_unlock(&mmio_dev->lock);
+       return ret;
+}


blank line...

+static const struct kvm_io_device_ops msix_mmio_table_ops = {
+       .read     = msix_table_mmio_read,
+       .write    = msix_table_mmio_write,
+};
+
++
+int kvm_vm_ioctl_register_msix_mmio(struct kvm *kvm,
+                                   struct kvm_msix_mmio_user *mmio_user)
+{
+       struct kvm_msix_mmio_dev *mmio_dev =&kvm->msix_mmio_dev;
+       struct kvm_msix_mmio *mmio = NULL;
+       int r = 0, i;
+
+       mutex_lock(&mmio_dev->lock);
+       for (i = 0; i<  mmio_dev->mmio_nr; i++) {
+               if (mmio_dev->mmio[i].dev_id == mmio_user->dev_id&&
+                   (mmio_dev->mmio[i].type&  KVM_MSIX_MMIO_TYPE_DEV_MASK) ==
+                   (mmio_user->type&  KVM_MSIX_MMIO_TYPE_DEV_MASK)) {
+                       mmio =&mmio_dev->mmio[i];
+                       if (mmio->max_entries_nr != mmio_user->max_entries_nr) {
+                               r = -EINVAL;
+                               goto out;
+                       }
+                       break;
+               }
+       }
+       if (!mmio) {
+               if (mmio_dev->mmio_nr == KVM_MSIX_MMIO_MAX) {
+                       r = -ENOSPC;
+                       goto out;
+               }
+               mmio =&mmio_dev->mmio[mmio_dev->mmio_nr];
+               mmio_dev->mmio_nr++;
+       }
+       mmio->max_entries_nr = mmio_user->max_entries_nr;


Sanity check to avoid overflow.

+       mmio->dev_id = mmio_user->dev_id;
+       mmio->flags = mmio_user->flags;


Check for unsupported bits (all of them at present?)

+       if ((mmio_user->type&  KVM_MSIX_MMIO_TYPE_DEV_MASK) ==
+                       KVM_MSIX_MMIO_TYPE_ASSIGNED_DEV)
+               mmio->type = KVM_MSIX_MMIO_TYPE_ASSIGNED_DEV;
+
+       if ((mmio_user->type&  KVM_MSIX_MMIO_TYPE_BASE_MASK) ==
+                       KVM_MSIX_MMIO_TYPE_BASE_TABLE) {
+               mmio->type |= KVM_MSIX_MMIO_TYPE_BASE_TABLE;
+               mmio->table_base_addr = mmio_user->base_addr;
+               mmio->table_base_va = mmio_user->base_va;


Check for va in kernel space.

+       } else if ((mmio_user->type&  KVM_MSIX_MMIO_TYPE_BASE_MASK) ==
+                       KVM_MSIX_MMIO_TYPE_BASE_PBA) {
+               mmio->type |= KVM_MSIX_MMIO_TYPE_BASE_PBA;
+               mmio->pba_base_addr = mmio_user->base_addr;
+               mmio->pba_base_va = mmio_user->base_va;
+       }
+out:
+       mutex_unlock(&mmio_dev->lock);
+       return r;
+}
+
+

In all, looks reasonable. I'd like to see documentation for it, andreview from the pci people. Alex, mst?


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 2/2][RFC] KVM: Emulate MSI-X table and PBA in kernel

Reply via email to