On 12/15/2011 08:15 PM, Sasha Levin wrote:
> When using AIO, submit all requests which exists in the vring in a single
> io_submit instead of one io_submit for each descriptor.
> 
> Benchmarks:
> 
> Short version: 15%+ increase in IOPS, small increase in BW.
> 
> Read IOPS:
> Before:
>   vda: ios=291792/0, merge=0/0, ticks=35229/0, in_queue=31025, util=61.30%

I guess you are reading the wrong IOPS number, the 'ios' is the number
of ios performed by all groups, not the IOPS result. Find the 'iops' ;-)

So, Here is the number without/with this patch.

(seq-read, seq-write, rand-read, rand-write)

Before:
  read : io=98304KB, bw=63015KB/s, iops=15753, runt=  1560msec
  write: io=98304KB, bw=56823KB/s, iops=14205, runt=  1730msec
  read : io=98304KB, bw=62139KB/s, iops=15534, runt=  1582msec
  write: io=98304KB, bw=53836KB/s, iops=13458, runt=  1826msec

After:
  read : io=98304KB, bw=63096KB/s, iops=15774, runt=  1558msec
  write: io=98304KB, bw=55823KB/s, iops=13955, runt=  1761msec
  read : io=98304KB, bw=59148KB/s, iops=14787, runt=  1662msec
  write: io=98304KB, bw=55072KB/s, iops=13768, runt=  1785msec

Submit more io requests in one time is not supposed to increase the iops
or bw so dramatically.

I even tried to submit all read/write ops in one io_submit which still
ends up with very little iops or bw improvement.

> 
> After:
>   vda: ios=333114/0, merge=0/0, ticks=47983/0, in_queue=46630, util=62.22%
> 
> Write IOPS:
> Before:
>   vda: ios=0/271716, merge=0/0, ticks=0/33367, in_queue=26531, util=59.96%
> 
> After:
>   vda: ios=0/327485, merge=0/0, ticks=0/23789, in_queue=22475, util=55.74%
> 
> Read BW:
> Before:
>    READ: io=7526.0MB, aggrb=1246.3MB/s, minb=1275.1MB/s, maxb=1275.1MB/s, 
> mint=6040msec, maxt=6040msec
> After:
>    READ: io=7526.0MB, aggrb=1315.5MB/s, minb=1346.7MB/s, maxb=1346.7MB/s, 
> mint=5723msec, maxt=5723msec
> 
> Write BW:
> Before:
>   WRITE: io=7526.0MB, aggrb=1110.2MB/s, minb=1136.9MB/s, maxb=1136.9MB/s, 
> mint=6779msec, maxt=6779msec
> 
> After:
>   WRITE: io=7526.0MB, aggrb=1113.5MB/s, minb=1140.3MB/s, maxb=1140.3MB/s, 
> mint=6759msec, maxt=6759msec
> 
> Signed-off-by: Sasha Levin <[email protected]>
> ---
>  tools/kvm/disk/core.c              |    2 -
>  tools/kvm/disk/raw.c               |   38 +++++++++++++++++++++++++++++------
>  tools/kvm/include/kvm/disk-image.h |    8 +++++-
>  tools/kvm/util/read-write.c        |    8 +-----
>  tools/kvm/virtio/blk.c             |    3 ++
>  5 files changed, 42 insertions(+), 17 deletions(-)
> 
> diff --git a/tools/kvm/disk/core.c b/tools/kvm/disk/core.c
> index 4915efd..b18014e 100644
> --- a/tools/kvm/disk/core.c
> +++ b/tools/kvm/disk/core.c
> @@ -5,8 +5,6 @@
>  #include <sys/eventfd.h>
>  #include <sys/poll.h>
>  
> -#define AIO_MAX 32
> -
>  int debug_iodelay;
>  
>  #ifdef CONFIG_HAS_AIO
> diff --git a/tools/kvm/disk/raw.c b/tools/kvm/disk/raw.c
> index caa023c..1162fb7 100644
> --- a/tools/kvm/disk/raw.c
> +++ b/tools/kvm/disk/raw.c
> @@ -10,9 +10,9 @@ ssize_t raw_image__read_sector(struct disk_image *disk, u64 
> sector, const struct
>       u64 offset = sector << SECTOR_SHIFT;
>  
>  #ifdef CONFIG_HAS_AIO
> -     struct iocb iocb;
> +     struct iocb *iocb = &disk->iocb[disk->count++];
>  
> -     return aio_preadv(disk->ctx, &iocb, disk->fd, iov, iovcount, offset,
> +     return aio_preadv(disk->ctx, iocb, disk->fd, iov, iovcount, offset,
>                               disk->evt, param);
>  #else
>       return preadv_in_full(disk->fd, iov, iovcount, offset);
> @@ -25,9 +25,9 @@ ssize_t raw_image__write_sector(struct disk_image *disk, 
> u64 sector, const struc
>       u64 offset = sector << SECTOR_SHIFT;
>  
>  #ifdef CONFIG_HAS_AIO
> -     struct iocb iocb;
> +     struct iocb *iocb = &disk->iocb[disk->count++];
>  
> -     return aio_pwritev(disk->ctx, &iocb, disk->fd, iov, iovcount, offset,
> +     return aio_pwritev(disk->ctx, iocb, disk->fd, iov, iovcount, offset,
>                               disk->evt, param);
>  #else
>       return pwritev_in_full(disk->fd, iov, iovcount, offset);
> @@ -79,19 +79,33 @@ int raw_image__close(struct disk_image *disk)
>  
>       close(disk->evt);
>  
> -#ifdef CONFIG_HAS_VIRTIO
> +#ifdef CONFIG_HAS_AIO
>       io_destroy(disk->ctx);
>  #endif
>  
>       return ret;
>  }
>  
> +static int raw_image__aio_submit(struct disk_image *disk)
> +{
> +     int ret;
> +
> +     ret = io_submit(disk->ctx, disk->count, disk->iocb_ptrs);
> +     if (ret > 0)
> +             disk->count = 0;
> +
> +     return ret;
> +}
> +
>  /*
>   * multiple buffer based disk image operations
>   */
>  static struct disk_image_operations raw_image_regular_ops = {
>       .read_sector    = raw_image__read_sector,
>       .write_sector   = raw_image__write_sector,
> +#ifdef CONFIG_HAS_AIO
> +     .submit         = raw_image__aio_submit,
> +#endif
>  };
>  
>  struct disk_image_operations ro_ops = {
> @@ -120,8 +134,13 @@ struct disk_image *raw_image__probe(int fd, struct stat 
> *st, bool readonly)
>  
>                       disk = disk_image__new(fd, st->st_size, 
> &ro_ops_nowrite, DISK_IMAGE_REGULAR);
>  #ifdef CONFIG_HAS_AIO
> -                     if (disk)
> +                     if (disk) {
> +                             int i;
> +
>                               disk->async = 1;
> +                             for (i = 0; i < AIO_MAX; i++)
> +                                     disk->iocb_ptrs[i] = &disk->iocb[i];
> +                     }
>  #endif
>               }
>  
> @@ -132,8 +151,13 @@ struct disk_image *raw_image__probe(int fd, struct stat 
> *st, bool readonly)
>                */
>               disk = disk_image__new(fd, st->st_size, &raw_image_regular_ops, 
> DISK_IMAGE_REGULAR);
>  #ifdef CONFIG_HAS_AIO
> -             if (disk)
> +             if (disk) {
> +                     int i;
> +
>                       disk->async = 1;
> +                     for (i = 0; i < AIO_MAX; i++)
> +                             disk->iocb_ptrs[i] = &disk->iocb[i];
> +             }
>  #endif
>               return disk;
>       }
> diff --git a/tools/kvm/include/kvm/disk-image.h 
> b/tools/kvm/include/kvm/disk-image.h
> index 56c08da..6eba950 100644
> --- a/tools/kvm/include/kvm/disk-image.h
> +++ b/tools/kvm/include/kvm/disk-image.h
> @@ -20,14 +20,14 @@
>  
>  #define SECTOR_SHIFT         9
>  #define SECTOR_SIZE          (1UL << SECTOR_SHIFT)
> +#define AIO_MAX                      256
> +#define MAX_DISK_IMAGES         4
>  
>  enum {
>       DISK_IMAGE_REGULAR,
>       DISK_IMAGE_MMAP,
>  };
>  
> -#define MAX_DISK_IMAGES         4
> -
>  struct disk_image;
>  
>  struct disk_image_operations {
> @@ -37,6 +37,7 @@ struct disk_image_operations {
>                               int iovcount, void *param);
>       int (*flush)(struct disk_image *disk);
>       int (*close)(struct disk_image *disk);
> +     int (*submit)(struct disk_image *disk);
>  };
>  
>  struct disk_image {
> @@ -50,6 +51,9 @@ struct disk_image {
>       int                             evt;
>  #ifdef CONFIG_HAS_AIO
>       io_context_t                    ctx;
> +     struct iocb                     iocb[AIO_MAX];
> +     struct iocb                     *iocb_ptrs[AIO_MAX];
> +     u32                             count;
>  #endif
>  };
>  
> diff --git a/tools/kvm/util/read-write.c b/tools/kvm/util/read-write.c
> index 55473ba..4ccd554 100644
> --- a/tools/kvm/util/read-write.c
> +++ b/tools/kvm/util/read-write.c
> @@ -321,24 +321,20 @@ ssize_t pwritev_in_full(int fd, const struct iovec 
> *iov, int iovcnt, off_t offse
>  int aio_pwritev(io_context_t ctx, struct iocb *iocb, int fd, const struct 
> iovec *iov, int iovcnt,
>               off_t offset, int ev, void *param)
>  {
> -     struct iocb *ios[1] = { iocb };
> -
>       io_prep_pwritev(iocb, fd, iov, iovcnt, offset);
>       io_set_eventfd(iocb, ev);
>       iocb->data = param;
>  
> -     return io_submit(ctx, 1, ios);
> +     return 0;
>  }
>  
>  int aio_preadv(io_context_t ctx, struct iocb *iocb, int fd, const struct 
> iovec *iov, int iovcnt,
>               off_t offset, int ev, void *param)
>  {
> -     struct iocb *ios[1] = { iocb };
> -
>       io_prep_preadv(iocb, fd, iov, iovcnt, offset);
>       io_set_eventfd(iocb, ev);
>       iocb->data = param;
>  
> -     return io_submit(ctx, 1, ios);
> +     return 0;
>  }
>  #endif
> \ No newline at end of file
> diff --git a/tools/kvm/virtio/blk.c b/tools/kvm/virtio/blk.c
> index d1a0197..153b11b 100644
> --- a/tools/kvm/virtio/blk.c
> +++ b/tools/kvm/virtio/blk.c
> @@ -128,6 +128,9 @@ static void virtio_blk_do_io(struct kvm *kvm, struct 
> virt_queue *vq, struct blk_
>  
>               virtio_blk_do_io_request(kvm, req);
>       }
> +
> +     if (bdev->disk->ops->submit)
> +             bdev->disk->ops->submit(bdev->disk);
>  }
>  
>  static void set_config(struct kvm *kvm, void *dev, u8 data, u32 offset)


-- 
Asias He
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to