Currently the method of dealing with an IO operation on a bus (PIO/MMIO)
is to call the read or write callback for each device registered
on the bus until we find a device which handles it.

Since the number of devices on a bus can be significant due to ioeventfds
and coalesced MMIO zones, this leads to a lot of overhead on each IO
operation.

Instead of registering devices, we now register ranges which points to
a device. Lookup is done using an efficient bsearch instead of a linear
search.

Performance test was conducted by comparing exit count per second with
200 ioeventfds created on one byte and the guest is trying to access a
different byte continuously (triggering usermode exits).
Before the patch the guest has achieved 259k exits per second, after the
patch the guest does 274k exits per second.

Changes in V3:
 - Small fix in error path of i8259.

Changes in V2:
 - Only register a device once.
 - Support for multiple devices using same range.
 - Added performance benchmark.
 - Using bsearch() and sort() found in /lib/.

Cc: Avi Kivity <a...@redhat.com>
Cc: Marcelo Tosatti <mtosa...@redhat.com>
Signed-off-by: Sasha Levin <levinsasha...@gmail.com>
---
 arch/x86/kvm/i8254.c      |    5 +-
 arch/x86/kvm/i8259.c      |   56 ++++++++++++++++++----
 arch/x86/kvm/irq.h        |    4 +-
 arch/x86/kvm/x86.c        |    6 ++-
 include/linux/kvm_host.h  |   18 ++++----
 virt/kvm/coalesced_mmio.c |    3 +-
 virt/kvm/eventfd.c        |    3 +-
 virt/kvm/ioapic.c         |    3 +-
 virt/kvm/kvm_main.c       |  112 ++++++++++++++++++++++++++++++++++++++++-----
 9 files changed, 171 insertions(+), 39 deletions(-)

diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index efad723..094e057 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -713,14 +713,15 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
        kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
 
        kvm_iodevice_init(&pit->dev, &pit_dev_ops);
-       ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, &pit->dev);
+       ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, KVM_PIT_BASE_ADDRESS, 
KVM_PIT_MEM_LENGTH, &pit->dev);
        if (ret < 0)
                goto fail;
 
        if (flags & KVM_PIT_SPEAKER_DUMMY) {
                kvm_iodevice_init(&pit->speaker_dev, &speaker_dev_ops);
                ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS,
-                                               &pit->speaker_dev);
+                                              KVM_SPEAKER_BASE_ADDRESS, 4,
+                                             &pit->speaker_dev);
                if (ret < 0)
                        goto fail_unregister;
        }
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 19fe855..9e18698 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -459,15 +459,27 @@ static int picdev_in_range(gpa_t addr)
        }
 }
 
-static inline struct kvm_pic *to_pic(struct kvm_io_device *dev)
+static inline struct kvm_pic *to_pic(struct kvm_io_device *dev, gpa_t addr)
 {
-       return container_of(dev, struct kvm_pic, dev);
+       switch (addr) {
+       case 0x20:
+       case 0x21:
+               return container_of(dev, struct kvm_pic, dev_master);
+       case 0xa0:
+       case 0xa1:
+               return container_of(dev, struct kvm_pic, dev_slave);
+       case 0x4d0:
+       case 0x4d1:
+               return container_of(dev, struct kvm_pic, dev_eclr);
+       }
+
+       return NULL;
 }
 
 static int picdev_write(struct kvm_io_device *this,
                         gpa_t addr, int len, const void *val)
 {
-       struct kvm_pic *s = to_pic(this);
+       struct kvm_pic *s = to_pic(this, addr);
        unsigned char data = *(unsigned char *)val;
        if (!picdev_in_range(addr))
                return -EOPNOTSUPP;
@@ -497,7 +509,7 @@ static int picdev_write(struct kvm_io_device *this,
 static int picdev_read(struct kvm_io_device *this,
                       gpa_t addr, int len, void *val)
 {
-       struct kvm_pic *s = to_pic(this);
+       struct kvm_pic *s = to_pic(this, addr);
        unsigned char data = 0;
        if (!picdev_in_range(addr))
                return -EOPNOTSUPP;
@@ -560,16 +572,36 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm)
        /*
         * Initialize PIO device
         */
-       kvm_iodevice_init(&s->dev, &picdev_ops);
+       kvm_iodevice_init(&s->dev_master, &picdev_ops);
+       kvm_iodevice_init(&s->dev_slave, &picdev_ops);
+       kvm_iodevice_init(&s->dev_eclr, &picdev_ops);
        mutex_lock(&kvm->slots_lock);
-       ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, &s->dev);
+       ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0x20, 2, 
&s->dev_master);
+       if (ret < 0)
+               goto fail_unlock;
+
+       ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0xa0, 2, &s->dev_slave);
+       if (ret < 0)
+               goto fail_unlock;
+
+       ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0x4d0, 2, &s->dev_eclr);
+       if (ret < 0)
+               goto fail_unlock;
+
        mutex_unlock(&kvm->slots_lock);
-       if (ret < 0) {
-               kfree(s);
-               return NULL;
-       }
 
        return s;
+
+fail_unlock:
+
+       mutex_unlock(&kvm->slots_lock);
+       kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &s->dev_master);
+       kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &s->dev_slave);
+       kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &s->dev_eclr);
+
+       kfree(s);
+
+       return NULL;
 }
 
 void kvm_destroy_pic(struct kvm *kvm)
@@ -577,7 +609,9 @@ void kvm_destroy_pic(struct kvm *kvm)
        struct kvm_pic *vpic = kvm->arch.vpic;
 
        if (vpic) {
-               kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev);
+               kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev_master);
+               kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev_slave);
+               kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &vpic->dev_eclr);
                kvm->arch.vpic = NULL;
                kfree(vpic);
        }
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 53e2d08..2086f2b 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -66,7 +66,9 @@ struct kvm_pic {
        struct kvm *kvm;
        struct kvm_kpic_state pics[2]; /* 0 is master pic, 1 is slave pic */
        int output;             /* intr from master PIC */
-       struct kvm_io_device dev;
+       struct kvm_io_device dev_master;
+       struct kvm_io_device dev_slave;
+       struct kvm_io_device dev_eclr;
        void (*ack_notifier)(void *opaque, int irq);
        unsigned long irq_states[16];
 };
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e80f0d7..4cea9cf 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3561,7 +3561,11 @@ long kvm_arch_vm_ioctl(struct file *filp,
                        if (r) {
                                mutex_lock(&kvm->slots_lock);
                                kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
-                                                         &vpic->dev);
+                                                         &vpic->dev_master);
+                               kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
+                                                         &vpic->dev_slave);
+                               kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,
+                                                         &vpic->dev_eclr);
                                mutex_unlock(&kvm->slots_lock);
                                kfree(vpic);
                                goto create_irqchip_unlock;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ff4d406..d0e42f3 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -55,16 +55,16 @@ struct kvm;
 struct kvm_vcpu;
 extern struct kmem_cache *kvm_vcpu_cache;
 
-/*
- * It would be nice to use something smarter than a linear search, TBD...
- * Thankfully we dont expect many devices to register (famous last words :),
- * so until then it will suffice.  At least its abstracted so we can change
- * in one place.
- */
+struct kvm_io_range {
+       gpa_t addr;
+       int len;
+       struct kvm_io_device *dev;
+};
+
 struct kvm_io_bus {
        int                   dev_count;
 #define NR_IOBUS_DEVS 300
-       struct kvm_io_device *devs[NR_IOBUS_DEVS];
+       struct kvm_io_range range[NR_IOBUS_DEVS];
 };
 
 enum kvm_bus {
@@ -77,8 +77,8 @@ int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, 
gpa_t addr,
                     int len, const void *val);
 int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len,
                    void *val);
-int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx,
-                           struct kvm_io_device *dev);
+int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
+                           int len, struct kvm_io_device *dev);
 int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
                              struct kvm_io_device *dev);
 
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c
index 2316ec1..a6ec206 100644
--- a/virt/kvm/coalesced_mmio.c
+++ b/virt/kvm/coalesced_mmio.c
@@ -141,7 +141,8 @@ int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
        dev->zone = *zone;
 
        mutex_lock(&kvm->slots_lock);
-       ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, &dev->dev);
+       ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, zone->addr,
+                                     zone->size, &dev->dev);
        if (ret < 0)
                goto out_free_dev;
        list_add_tail(&dev->list, &kvm->coalesced_zones);
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 73358d2..f59c1e8 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -586,7 +586,8 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd 
*args)
 
        kvm_iodevice_init(&p->dev, &ioeventfd_ops);
 
-       ret = kvm_io_bus_register_dev(kvm, bus_idx, &p->dev);
+       ret = kvm_io_bus_register_dev(kvm, bus_idx, p->addr, p->length,
+                                     &p->dev);
        if (ret < 0)
                goto unlock_fail;
 
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 8df1ca1..86aeea1 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -394,7 +394,8 @@ int kvm_ioapic_init(struct kvm *kvm)
        kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops);
        ioapic->kvm = kvm;
        mutex_lock(&kvm->slots_lock);
-       ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, &ioapic->dev);
+       ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, ioapic->base_address,
+                                      IOAPIC_MEM_LENGTH, &ioapic->dev);
        mutex_unlock(&kvm->slots_lock);
        if (ret < 0) {
                kvm->arch.vioapic = NULL;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index aefdda3..0c228f4 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -47,6 +47,8 @@
 #include <linux/srcu.h>
 #include <linux/hugetlb.h>
 #include <linux/slab.h>
+#include <linux/sort.h>
+#include <linux/bsearch.h>
 
 #include <asm/processor.h>
 #include <asm/io.h>
@@ -2391,24 +2393,94 @@ static void kvm_io_bus_destroy(struct kvm_io_bus *bus)
        int i;
 
        for (i = 0; i < bus->dev_count; i++) {
-               struct kvm_io_device *pos = bus->devs[i];
+               struct kvm_io_device *pos = bus->range[i].dev;
 
                kvm_iodevice_destructor(pos);
        }
        kfree(bus);
 }
 
+int kvm_io_bus_sort_cmp(const void *p1, const void *p2)
+{
+       const struct kvm_io_range *r1 = p1;
+       const struct kvm_io_range *r2 = p2;
+
+       if (r1->addr < r2->addr)
+               return -1;
+       if (r1->addr + r1->len > r2->addr + r2->len)
+               return 1;
+       return 0;
+}
+
+int kvm_io_bus_insert_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev,
+                         gpa_t addr, int len)
+{
+       int i;
+
+       if (bus->dev_count == NR_IOBUS_DEVS)
+               return -ENOSPC;
+
+       bus->range[bus->dev_count++] = (struct kvm_io_range) {
+               .addr = addr,
+               .len = len,
+               .dev = dev,
+       };
+
+       sort(bus->range, bus->dev_count, sizeof(struct kvm_io_range),
+               kvm_io_bus_sort_cmp, NULL);
+
+       return 0;
+}
+
+int kvm_io_bus_get_first_dev(struct kvm_io_bus *bus,
+                            gpa_t addr, int len)
+{
+       struct kvm_io_range *range, key;
+       int off;
+
+       key = (struct kvm_io_range) {
+               .addr = addr,
+               .len = len,
+       };
+
+       range = bsearch(&key, bus->range, bus->dev_count,
+                       sizeof(struct kvm_io_range), kvm_io_bus_sort_cmp);
+       if (range == NULL)
+               return -ENOENT;
+
+       off = range - bus->range;
+
+       while (off > 0 && kvm_io_bus_sort_cmp(&key, &bus->range[off-1]) == 0)
+               off--;
+
+       return off;
+}
+
 /* kvm_io_bus_write - called under kvm->slots_lock */
 int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
                     int len, const void *val)
 {
-       int i;
+       int idx;
        struct kvm_io_bus *bus;
+       struct kvm_io_range range;
+
+       range = (struct kvm_io_range) {
+               .addr = addr,
+               .len = len,
+       };
 
        bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
-       for (i = 0; i < bus->dev_count; i++)
-               if (!kvm_iodevice_write(bus->devs[i], addr, len, val))
+       idx = kvm_io_bus_get_first_dev(bus, addr, len);
+       if (idx < 0)
+               return -EOPNOTSUPP;
+
+       while (idx < bus->dev_count &&
+               kvm_io_bus_sort_cmp(&range, &bus->range[idx]) == 0) {
+               if (!kvm_iodevice_write(bus->range[idx].dev, addr, len, val))
                        return 0;
+               idx++;
+       }
+
        return -EOPNOTSUPP;
 }
 
@@ -2416,19 +2488,33 @@ int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus 
bus_idx, gpa_t addr,
 int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
                    int len, void *val)
 {
-       int i;
+       int idx;
        struct kvm_io_bus *bus;
+       struct kvm_io_range range;
+
+       range = (struct kvm_io_range) {
+               .addr = addr,
+               .len = len,
+       };
 
        bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
-       for (i = 0; i < bus->dev_count; i++)
-               if (!kvm_iodevice_read(bus->devs[i], addr, len, val))
+       idx = kvm_io_bus_get_first_dev(bus, addr, len);
+       if (idx < 0)
+               return -EOPNOTSUPP;
+
+       while (idx < bus->dev_count &&
+               kvm_io_bus_sort_cmp(&range, &bus->range[idx]) == 0) {
+               if (!kvm_iodevice_read(bus->range[idx].dev, addr, len, val))
                        return 0;
+               idx++;
+       }
+
        return -EOPNOTSUPP;
 }
 
 /* Caller must hold slots_lock. */
-int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx,
-                           struct kvm_io_device *dev)
+int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
+                           int len, struct kvm_io_device *dev)
 {
        struct kvm_io_bus *new_bus, *bus;
 
@@ -2440,7 +2526,7 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus 
bus_idx,
        if (!new_bus)
                return -ENOMEM;
        memcpy(new_bus, bus, sizeof(struct kvm_io_bus));
-       new_bus->devs[new_bus->dev_count++] = dev;
+       kvm_io_bus_insert_dev(new_bus, dev, addr, len);
        rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
        synchronize_srcu_expedited(&kvm->srcu);
        kfree(bus);
@@ -2464,9 +2550,11 @@ int kvm_io_bus_unregister_dev(struct kvm *kvm, enum 
kvm_bus bus_idx,
 
        r = -ENOENT;
        for (i = 0; i < new_bus->dev_count; i++)
-               if (new_bus->devs[i] == dev) {
+               if (new_bus->range[i].dev == dev) {
                        r = 0;
-                       new_bus->devs[i] = new_bus->devs[--new_bus->dev_count];
+                       new_bus->range[i] = 
new_bus->range[--new_bus->dev_count];
+                       sort(new_bus->range, new_bus->dev_count, sizeof(struct 
kvm_io_range),
+                               kvm_io_bus_sort_cmp, NULL);
                        break;
                }
 
-- 
1.7.6

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to