vhost-blk is a in kernel virito-blk device accelerator. vhost-blk is
similar with vhost-net. It handles virito-blk's request and completion
in host kernel side.

How to use:
-----------------------------
Load the vhost-blk.ko module in host side and specify the vhost flag.
   $ lkvm run -d disk.img,vhost

Performance evaluation:
-----------------------------
The comparison is between kvm tool with usersapce implementation and kvm
tool with vhost-blk.

1) Fio with libaio ioengine on Fusion IO device
With bio-based IO path, sequential read/write, random read/write
IOPS boost         : 8.4%, 15.3%, 10.4%, 14.6%
Latency improvement: 8.5%, 15.4%, 10.4%, 15.1%

2) Fio with vsync ioengine on Fusion IO device
With bio-based IO path, sequential read/write, random read/write
IOPS boost         : 10.5%, 4.8%, 5.2%, 5.6%
Latency improvement: 11.4%, 5.0%, 5.2%, 5.8%

Signed-off-by: Asias He <[email protected]>
---
 tools/kvm/builtin-run.c            |    2 +
 tools/kvm/disk/core.c              |    2 +
 tools/kvm/include/kvm/disk-image.h |    2 +
 tools/kvm/virtio/blk.c             |  134 +++++++++++++++++++++++++++++++++++-
 4 files changed, 138 insertions(+), 2 deletions(-)

diff --git a/tools/kvm/builtin-run.c b/tools/kvm/builtin-run.c
index 8e1627e..0e213bf 100644
--- a/tools/kvm/builtin-run.c
+++ b/tools/kvm/builtin-run.c
@@ -178,6 +178,8 @@ static int img_name_parser(const struct option *opt, const 
char *arg, int unset)
                                disk_image[image_count].readonly = true;
                        else if (strncmp(sep + 1, "direct", 6) == 0)
                                disk_image[image_count].direct = true;
+                       else if (strncmp(sep + 1, "vhost", 5) == 0)
+                               disk_image[image_count].use_vhost = true;
                        *sep = 0;
                        cur = sep + 1;
                }
diff --git a/tools/kvm/disk/core.c b/tools/kvm/disk/core.c
index 621c940..8aa5091 100644
--- a/tools/kvm/disk/core.c
+++ b/tools/kvm/disk/core.c
@@ -149,6 +149,8 @@ struct disk_image **disk_image__open_all(struct 
disk_image_params *params, int c
                        err = disks[i];
                        goto error;
                }
+               if (params[i].use_vhost)
+                       disks[i]->use_vhost = true;
        }
 
        return disks;
diff --git a/tools/kvm/include/kvm/disk-image.h 
b/tools/kvm/include/kvm/disk-image.h
index 7ae17f8..0a86515 100644
--- a/tools/kvm/include/kvm/disk-image.h
+++ b/tools/kvm/include/kvm/disk-image.h
@@ -41,6 +41,7 @@ struct disk_image_operations {
 
 struct disk_image_params {
        const char *filename;
+       bool use_vhost;
        bool readonly;
        bool direct;
 };
@@ -57,6 +58,7 @@ struct disk_image {
 #ifdef CONFIG_HAS_AIO
        io_context_t                    ctx;
 #endif
+       bool                            use_vhost;
 };
 
 struct disk_image *disk_image__open(const char *filename, bool readonly, bool 
direct);
diff --git a/tools/kvm/virtio/blk.c b/tools/kvm/virtio/blk.c
index beebd24..c1e2e18 100644
--- a/tools/kvm/virtio/blk.c
+++ b/tools/kvm/virtio/blk.c
@@ -12,6 +12,7 @@
 #include "kvm/virtio-pci.h"
 #include "kvm/virtio.h"
 
+#include <linux/vhost.h>
 #include <linux/virtio_ring.h>
 #include <linux/virtio_blk.h>
 #include <linux/kernel.h>
@@ -19,6 +20,8 @@
 #include <linux/types.h>
 #include <pthread.h>
 
+/* TODO: We can remove this after VHOST_BLK_SET_BACKEND goes in linux/vhost.h 
*/
+#define VHOST_BLK_SET_BACKEND _IOW(VHOST_VIRTIO, 0x40, struct vhost_vring_file)
 #define VIRTIO_BLK_MAX_DEV             4
 
 /*
@@ -50,6 +53,8 @@ struct blk_dev {
        struct virt_queue               vqs[NUM_VIRT_QUEUES];
        struct blk_dev_req              reqs[VIRTIO_BLK_QUEUE_SIZE];
 
+       int                             vhost_fd;
+
        pthread_t                       io_thread;
        int                             io_efd;
 
@@ -166,9 +171,12 @@ static void set_guest_features(struct kvm *kvm, void *dev, 
u32 features)
 
 static int init_vq(struct kvm *kvm, void *dev, u32 vq, u32 pfn)
 {
+       struct vhost_vring_state state = { .index = vq };
+       struct vhost_vring_addr addr;
        struct blk_dev *bdev = dev;
        struct virt_queue *queue;
        void *p;
+       int r;
 
        compat__remove_message(compat_id);
 
@@ -178,9 +186,83 @@ static int init_vq(struct kvm *kvm, void *dev, u32 vq, u32 
pfn)
 
        vring_init(&queue->vring, VIRTIO_BLK_QUEUE_SIZE, p, 
VIRTIO_PCI_VRING_ALIGN);
 
+       if (bdev->vhost_fd == 0)
+               return 0;
+
+       state.num = queue->vring.num;
+       r = ioctl(bdev->vhost_fd, VHOST_SET_VRING_NUM, &state);
+       if (r < 0)
+               die_perror("VHOST_SET_VRING_NUM failed");
+       state.num = 0;
+       r = ioctl(bdev->vhost_fd, VHOST_SET_VRING_BASE, &state);
+       if (r < 0)
+               die_perror("VHOST_SET_VRING_BASE failed");
+
+       addr = (struct vhost_vring_addr) {
+               .index = vq,
+               .desc_user_addr = (u64)(unsigned long)queue->vring.desc,
+               .avail_user_addr = (u64)(unsigned long)queue->vring.avail,
+               .used_user_addr = (u64)(unsigned long)queue->vring.used,
+       };
+
+       r = ioctl(bdev->vhost_fd, VHOST_SET_VRING_ADDR, &addr);
+       if (r < 0)
+               die_perror("VHOST_SET_VRING_ADDR failed");
+
        return 0;
 }
 
+static void notify_vq_gsi(struct kvm *kvm, void *dev, u32 vq, u32 gsi)
+{
+       struct vhost_vring_file file;
+       struct blk_dev *bdev = dev;
+       struct kvm_irqfd irq;
+       int r;
+
+       if (bdev->vhost_fd == 0)
+               return;
+
+       irq = (struct kvm_irqfd) {
+               .gsi    = gsi,
+               .fd     = eventfd(0, 0),
+       };
+       file = (struct vhost_vring_file) {
+               .index  = vq,
+               .fd     = irq.fd,
+       };
+
+       r = ioctl(kvm->vm_fd, KVM_IRQFD, &irq);
+       if (r < 0)
+               die_perror("KVM_IRQFD failed");
+
+       r = ioctl(bdev->vhost_fd, VHOST_SET_VRING_CALL, &file);
+       if (r < 0)
+               die_perror("VHOST_SET_VRING_CALL failed");
+
+       file.fd = bdev->disk->fd;
+       r = ioctl(bdev->vhost_fd, VHOST_BLK_SET_BACKEND, &file);
+       if (r != 0)
+               die("VHOST_BLK_SET_BACKEND failed %d", errno);
+
+}
+
+static void notify_vq_eventfd(struct kvm *kvm, void *dev, u32 vq, u32 efd)
+{
+       struct blk_dev *bdev = dev;
+       struct vhost_vring_file file = {
+               .index  = vq,
+               .fd     = efd,
+       };
+       int r;
+
+       if (bdev->vhost_fd == 0)
+               return;
+
+       r = ioctl(bdev->vhost_fd, VHOST_SET_VRING_KICK, &file);
+       if (r < 0)
+               die_perror("VHOST_SET_VRING_KICK failed");
+}
+
 static void *virtio_blk_thread(void *dev)
 {
        struct blk_dev *bdev = dev;
@@ -230,12 +312,56 @@ static struct virtio_ops blk_dev_virtio_ops = (struct 
virtio_ops) {
        .get_host_features      = get_host_features,
        .set_guest_features     = set_guest_features,
        .init_vq                = init_vq,
-       .notify_vq              = notify_vq,
        .get_pfn_vq             = get_pfn_vq,
        .get_size_vq            = get_size_vq,
        .set_size_vq            = set_size_vq,
+       .notify_vq              = notify_vq,
+       .notify_vq_gsi          = notify_vq_gsi,
+       .notify_vq_eventfd      = notify_vq_eventfd,
 };
 
+static void virtio_blk_vhost_init(struct kvm *kvm, struct blk_dev *bdev)
+{
+       u64 features;
+       struct vhost_memory *mem;
+       int r;
+
+       bdev->vhost_fd = open("/dev/vhost-blk", O_RDWR);
+       if (bdev->vhost_fd < 0)
+               die_perror("Failed openning vhost-blk device");
+
+       mem = calloc(1, sizeof(*mem) + sizeof(struct vhost_memory_region));
+       if (mem == NULL)
+               die("Failed allocating memory for vhost memory map");
+
+       mem->nregions = 1;
+       mem->regions[0] = (struct vhost_memory_region) {
+               .guest_phys_addr        = 0,
+               .memory_size            = kvm->ram_size,
+               .userspace_addr         = (unsigned long)kvm->ram_start,
+       };
+
+       r = ioctl(bdev->vhost_fd, VHOST_SET_OWNER);
+       if (r != 0)
+               die_perror("VHOST_SET_OWNER failed");
+
+       r = ioctl(bdev->vhost_fd, VHOST_GET_FEATURES, &features);
+       if (r != 0)
+               die_perror("VHOST_GET_FEATURES failed");
+
+       r = ioctl(bdev->vhost_fd, VHOST_SET_FEATURES, &features);
+       if (r != 0)
+               die_perror("VHOST_SET_FEATURES failed");
+       r = ioctl(bdev->vhost_fd, VHOST_SET_MEM_TABLE, mem);
+       if (r != 0)
+               die_perror("VHOST_SET_MEM_TABLE failed");
+
+       bdev->vdev.use_vhost = true;
+
+       free(mem);
+}
+
+
 static int virtio_blk__init_one(struct kvm *kvm, struct disk_image *disk)
 {
        struct blk_dev *bdev;
@@ -271,7 +397,11 @@ static int virtio_blk__init_one(struct kvm *kvm, struct 
disk_image *disk)
 
        disk_image__set_callback(bdev->disk, virtio_blk_complete);
 
-       pthread_create(&bdev->io_thread, NULL, virtio_blk_thread, bdev);
+       if (disk->use_vhost)
+               virtio_blk_vhost_init(kvm, bdev);
+       else
+               pthread_create(&bdev->io_thread, NULL, virtio_blk_thread, bdev);
+
        if (compat_id == -1)
                compat_id = virtio_compat_add_message("virtio-blk", 
"CONFIG_VIRTIO_BLK");
 
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to