On Thu, Nov 18, 2010 at 01:03:44PM +0200, Avi Kivity wrote:
> On 11/18/2010 12:57 PM, Michael S. Tsirkin wrote:
> >So the following on top will fix it all.
> >Any more comments befpre I bundle it up,
> >test and report?
> >
> 
> Nope (not that I can comment on an incremental).

Here it is rolled up.

> I guess I should create an empty Documentation/kvm/locking.txt and
> force everyone else to update it.

Comments near the relevant fields not better?

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index a055742..d13ced3 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -16,6 +16,7 @@
 #include <linux/mm.h>
 #include <linux/preempt.h>
 #include <linux/msi.h>
+#include <linux/rcupdate.h>
 #include <asm/signal.h>
 
 #include <linux/kvm.h>
@@ -206,6 +207,8 @@ struct kvm {
 
        struct mutex irq_lock;
 #ifdef CONFIG_HAVE_KVM_IRQCHIP
+       /* Update side is protected by irq_lock and,
+        * if configured, irqfds.lock. */
        struct kvm_irq_routing_table __rcu *irq_routing;
        struct hlist_head mask_notifier_list;
        struct hlist_head irq_ack_notifier_list;
@@ -462,6 +465,8 @@ void kvm_get_intr_delivery_bitmask(struct kvm_ioapic 
*ioapic,
                                   unsigned long *deliver_bitmask);
 #endif
 int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level);
+int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm 
*kvm,
+               int irq_source_id, int level);
 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
 void kvm_register_irq_ack_notifier(struct kvm *kvm,
                                   struct kvm_irq_ack_notifier *kian);
@@ -603,6 +608,7 @@ static inline void kvm_free_irq_routing(struct kvm *kvm) {}
 void kvm_eventfd_init(struct kvm *kvm);
 int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags);
 void kvm_irqfd_release(struct kvm *kvm);
+void kvm_irq_routing_update(struct kvm *, struct kvm_irq_routing_table *);
 int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args);
 
 #else
@@ -614,6 +620,12 @@ static inline int kvm_irqfd(struct kvm *kvm, int fd, int 
gsi, int flags)
 }
 
 static inline void kvm_irqfd_release(struct kvm *kvm) {}
+static inline void kvm_irq_routing_update(struct kvm *kvm,
+                                         struct kvm_irq_routing_table *irq_rt)
+{
+       rcu_assign_pointer(kvm->irq_routing, irq_rt);
+}
+
 static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 {
        return -ENOSYS;
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index c1f1e3c..b0cfae7 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -44,14 +44,19 @@
  */
 
 struct _irqfd {
-       struct kvm               *kvm;
-       struct eventfd_ctx       *eventfd;
-       int                       gsi;
-       struct list_head          list;
-       poll_table                pt;
-       wait_queue_t              wait;
-       struct work_struct        inject;
-       struct work_struct        shutdown;
+       /* Used for MSI fast-path */
+       struct kvm *kvm;
+       wait_queue_t wait;
+       /* Update side is protected by irqfds.lock */
+       struct kvm_kernel_irq_routing_entry __rcu *irq_entry;
+       /* Used for level IRQ fast-path */
+       int gsi;
+       struct work_struct inject;
+       /* Used for setup/shutdown */
+       struct eventfd_ctx *eventfd;
+       struct list_head list;
+       poll_table pt;
+       struct work_struct shutdown;
 };
 
 static struct workqueue_struct *irqfd_cleanup_wq;
@@ -125,10 +130,18 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, 
void *key)
 {
        struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait);
        unsigned long flags = (unsigned long)key;
+       struct kvm_kernel_irq_routing_entry *irq;
 
-       if (flags & POLLIN)
+       if (flags & POLLIN) {
+               rcu_read_lock();
+               irq = rcu_dereference(irqfd->irq_entry);
                /* An event has been signaled, inject an interrupt */
-               schedule_work(&irqfd->inject);
+               if (irq)
+                       kvm_set_msi(irq, irqfd->kvm, 
KVM_USERSPACE_IRQ_SOURCE_ID, 1);
+               else
+                       schedule_work(&irqfd->inject);
+               rcu_read_unlock();
+       }
 
        if (flags & POLLHUP) {
                /* The eventfd is closing, detach from KVM */
@@ -163,9 +176,31 @@ irqfd_ptable_queue_proc(struct file *file, 
wait_queue_head_t *wqh,
        add_wait_queue(wqh, &irqfd->wait);
 }
 
+/* Must be called under irqfds.lock */
+static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd,
+                        struct kvm_irq_routing_table *irq_rt)
+{
+       struct kvm_kernel_irq_routing_entry *e;
+       struct hlist_node *n;
+
+       if (irqfd->gsi >= irq_rt->nr_rt_entries) {
+               rcu_assign_pointer(irqfd->irq_entry, NULL);
+               return;
+       }
+
+       hlist_for_each_entry(e, n, &irq_rt->map[irqfd->gsi], link) {
+               /* Only fast-path MSI. */
+               if (e->type == KVM_IRQ_ROUTING_MSI)
+                       rcu_assign_pointer(irqfd->irq_entry, e);
+               else
+                       rcu_assign_pointer(irqfd->irq_entry, NULL);
+       }
+}
+
 static int
 kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
 {
+       struct kvm_irq_routing_table *irq_rt;
        struct _irqfd *irqfd, *tmp;
        struct file *file = NULL;
        struct eventfd_ctx *eventfd = NULL;
@@ -215,6 +250,10 @@ kvm_irqfd_assign(struct kvm *kvm, int fd, int gsi)
                goto fail;
        }
 
+       irq_rt = rcu_dereference_protected(kvm->irq_routing,
+                                          lockdep_is_held(&kvm->irqfds.lock));
+       irqfd_update(kvm, irqfd, irq_rt);
+
        events = file->f_op->poll(file, &irqfd->pt);
 
        list_add_tail(&irqfd->list, &kvm->irqfds.items);
@@ -271,8 +310,15 @@ kvm_irqfd_deassign(struct kvm *kvm, int fd, int gsi)
        spin_lock_irq(&kvm->irqfds.lock);
 
        list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) {
-               if (irqfd->eventfd == eventfd && irqfd->gsi == gsi)
+               if (irqfd->eventfd == eventfd && irqfd->gsi == gsi) {
+                       /* This rcu_assign_pointer is needed for when
+                        * another thread calls kvm_irqfd_update before
+                        * we flush workqueue below.
+                        * It is paired with synchronize_rcu done by caller
+                        * of that function. */
+                       rcu_assign_pointer(irqfd->irq_entry, NULL);
                        irqfd_deactivate(irqfd);
+               }
        }
 
        spin_unlock_irq(&kvm->irqfds.lock);
@@ -321,6 +367,23 @@ kvm_irqfd_release(struct kvm *kvm)
 
 }
 
+/* Change irq_routing and irqfd.  Caller must invoke synchronize_rcu
+ * afterwards. */
+void kvm_irq_routing_update(struct kvm *kvm,
+                           struct kvm_irq_routing_table *irq_rt)
+{
+       struct _irqfd *irqfd;
+
+       spin_lock_irq(&kvm->irqfds.lock);
+
+       rcu_assign_pointer(kvm->irq_routing, irq_rt);
+
+       list_for_each_entry(irqfd, &kvm->irqfds.items, list)
+               irqfd_update(kvm, irqfd, irq_rt);
+
+       spin_unlock_irq(&kvm->irqfds.lock);
+}
+
 /*
  * create a host-wide workqueue for issuing deferred shutdown requests
  * aggregated from all vm* instances. We need our own isolated single-thread
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index 8edca91..9f614b4 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -114,8 +114,8 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct 
kvm_lapic *src,
        return r;
 }
 
-static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
-                      struct kvm *kvm, int irq_source_id, int level)
+int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
+               struct kvm *kvm, int irq_source_id, int level)
 {
        struct kvm_lapic_irq irq;
 
@@ -409,8 +409,9 @@ int kvm_set_irq_routing(struct kvm *kvm,
 
        mutex_lock(&kvm->irq_lock);
        old = kvm->irq_routing;
-       rcu_assign_pointer(kvm->irq_routing, new);
+       kvm_irq_routing_update(kvm, new);
        mutex_unlock(&kvm->irq_lock);
+
        synchronize_rcu();
 
        new = old;
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to