Ben-Ami Yassour wrote:
From: Han, Weidong <[EMAIL PROTECTED]>

This patch adds support for handling PCI devices that are assigned to
the guest ("PCI passthrough").
+
+/*
+ * Used to find a registered host PCI device (a "passthrough" device)
+ * during ioctls, interrupts or EOI
+ */
+struct kvm_pci_pt_dev_list *
+kvm_find_pci_pt_dev(struct list_head *head,
+               struct kvm_pci_pt_info *pt_pci_info, int irq, int source)
+{
+       struct list_head *ptr;
+       struct kvm_pci_pt_dev_list *match;
+
+       list_for_each(ptr, head) {
+               match = list_entry(ptr, struct kvm_pci_pt_dev_list, list);
+
+               switch (source) {
+               case KVM_PT_SOURCE_IRQ:
+                       /*
+                        * Used to find a registered host device
+                        * during interrupt context on host
+                        */
+                       if (match->pt_dev.host.irq == irq)
+                               return match;
+                       break;
+               case KVM_PT_SOURCE_IRQ_ACK:
+                       /*
+                        * Used to find a registered host device when
+                        * the guest acks an interrupt
+                        */
+                       if (match->pt_dev.guest.irq == irq)
+                               return match;
+                       break;
+               case KVM_PT_SOURCE_UPDATE:
+                       if ((match->pt_dev.host.busnr == pt_pci_info->busnr) &&
+                           (match->pt_dev.host.devfn == pt_pci_info->devfn))
+                               return match;
+                       break;
+               }
+       }
+       return NULL;
+}

This monster is best split into three functions each handling a separate case, without the 'source' argument.

+static void kvm_pci_pt_work_fn(struct work_struct *work)
+{
+       struct kvm_pci_pt_dev_list *match;
+       struct kvm_pci_pt_work *int_work;
+       int source;
+       unsigned long flags;
+       int guest_irq;
+       int host_irq;
+
+       int_work = container_of(work, struct kvm_pci_pt_work, work);
+
+       source = int_work->source ? KVM_PT_SOURCE_IRQ_ACK : KVM_PT_SOURCE_IRQ;
+
+       /* This is taken to safely inject irq inside the guest. When
+        * the interrupt injection (or the ioapic code) uses a
+        * finer-grained lock, update this
+        */
+       mutex_lock(&int_work->kvm->lock);
+       read_lock_irqsave(&kvm_pci_pt_lock, flags);
+       match = kvm_find_pci_pt_dev(&int_work->kvm->arch.pci_pt_dev_head, NULL,
+                                   int_work->irq, source);
+       if (!match) {
+               printk(KERN_ERR "%s: no matching device assigned to guest "
+                      "found for irq %d, source = %d!\n",
+                      __func__, int_work->irq, int_work->source);
+               read_unlock_irqrestore(&kvm_pci_pt_lock, flags);
+               goto out;
+       }
+       guest_irq = match->pt_dev.guest.irq;
+       host_irq = match->pt_dev.host.irq;
+       read_unlock_irqrestore(&kvm_pci_pt_lock, flags);
+
+       if (source == KVM_PT_SOURCE_IRQ)
+               kvm_set_irq(int_work->kvm, guest_irq, 1);
+       else {
+               kvm_set_irq(int_work->kvm, int_work->irq, 0);
+               enable_irq(host_irq);
+       }
+out:
+       mutex_unlock(&int_work->kvm->lock);
+       kvm_put_kvm(int_work->kvm);
+}
+
+/* FIXME: Implement the OR logic needed to make shared interrupts on
+ * this line behave properly
+ */

Isn't this a showstopper? There is no easy way for a user to avoid sharing, especially as we have only three pci irqs at present.

+static irqreturn_t kvm_pci_pt_dev_intr(int irq, void *dev_id)
+{
+       struct kvm *kvm = (struct kvm *) dev_id;
+       struct kvm_pci_pt_dev_list *pci_pt_dev;
+
+       if (!test_bit(irq, pt_irq_handled))
+               return IRQ_NONE;
+
+       read_lock(&kvm_pci_pt_lock);
+       pci_pt_dev = kvm_find_pci_pt_dev(&kvm->arch.pci_pt_dev_head, NULL,
+                                        irq, KVM_PT_SOURCE_IRQ);
+       if (!pci_pt_dev) {
+               read_unlock(&kvm_pci_pt_lock);
+               return IRQ_NONE;
+       }


I see we don't reuse the result of the search. I guess we can't, since the list may change between the interrupt and the execution of the work function.

+
+       pci_pt_dev->pt_dev.int_work.irq = irq;
+       pci_pt_dev->pt_dev.int_work.kvm = kvm;
+       pci_pt_dev->pt_dev.int_work.source = 0;
+

For a bool, use false, not 0. But 'source' isn't really a good name for a boolean. Perhaps 'is_ack'?

+
+/* Ack the irq line for a passthrough device */
+static void kvm_pci_pt_ack_irq(void *opaque, int irq)
+{
+       struct kvm *kvm = opaque;
+       struct kvm_pci_pt_dev_list *pci_pt_dev;
+       unsigned long flags;
+
+       if (irq == -1)
+               return;
+
+       read_lock_irqsave(&kvm_pci_pt_lock, flags);
+       pci_pt_dev = kvm_find_pci_pt_dev(&kvm->arch.pci_pt_dev_head, NULL, irq,
+                                        KVM_PT_SOURCE_IRQ_ACK);
+       if (!pci_pt_dev) {
+               read_unlock_irqrestore(&kvm_pci_pt_lock, flags);
+               return;
+       }
+       pci_pt_dev->pt_dev.ack_work.irq = irq;
+       pci_pt_dev->pt_dev.ack_work.kvm = kvm;
+       pci_pt_dev->pt_dev.ack_work.source = 1;
+
+       kvm_get_kvm(kvm);
+       schedule_work(&pci_pt_dev->pt_dev.ack_work.work);
+       read_unlock_irqrestore(&kvm_pci_pt_lock, flags);
+}

Why do we have to schedule this?  We're already in process context, no?

Is this to avoid recursion into the pic/ioapic code? If so, we can use a queue to avoid this. That's not a merge blocker, however, so it can be done later.

diff --git a/include/asm-x86/kvm_para.h b/include/asm-x86/kvm_para.h
index 76f3921..b6c5d00 100644
--- a/include/asm-x86/kvm_para.h
+++ b/include/asm-x86/kvm_para.h
@@ -142,6 +142,20 @@ static inline unsigned int kvm_arch_para_features(void)
        return cpuid_eax(KVM_CPUID_FEATURES);
 }
-#endif
+#endif /* KERNEL */

Move the following to linux/kvm.h. This used to be oriented at paravirt, but is really a host->kernel communication API.

+/* Stores information for identifying host PCI devices assigned to the
+ * guest: this is used in the host kernel and in the userspace.
+ */
+struct kvm_pci_pt_info {
+       unsigned char busnr;
+       unsigned int devfn;
+       __u32 irq;
+};

Badly aligned. Please use __u32 for all fields, and add a __u32 padding field at the end so we have the same ABI for i386 and x86_64.

Some pci devices support more than one irq. Should we make irq an array? Alternatively, decouple irq assignment from pci information and let userspace handle everything. This lets us assign non-pci devices (like serial ports, etc.).

Also, I don't like the term "passthrough", and very much dislike "pt", especially in public interfaces. It's simply too generic. Can we switch to "device assignment"?

  * ioctls for vcpu fds
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 8ce93c7..0b5bc40 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -288,13 +288,22 @@ void kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int 
irq, int level)
 static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int gsi)
 {
        union ioapic_redir_entry *ent;
+       struct kvm_pci_pt_dev_list *match;
+       unsigned long flags;
ent = &ioapic->redirtbl[gsi];
        ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
ent->fields.remote_irr = 0;
-       if (!ent->fields.mask && (ioapic->irr & (1 << gsi)))
-               ioapic_service(ioapic, gsi);
+
+       read_lock_irqsave(&kvm_pci_pt_lock, flags);
+       match = kvm_find_pci_pt_dev(&ioapic->kvm->arch.pci_pt_dev_head, NULL,
+                                   gsi, KVM_PT_SOURCE_IRQ_ACK);
+       read_unlock_irqrestore(&kvm_pci_pt_lock, flags);
+       if (!match) {
+               if (!ent->fields.mask && (ioapic->irr & (1 << gsi)))
+                       ioapic_service(ioapic, gsi);
+       }

What's device assignment code doing in the ioapic? This should be done through the notifier (if it needs to return a value, great).

if (ioapic->ack_notifier)
                ioapic->ack_notifier(ioapic->kvm, gsi);


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to