Re: [Qemu-devel] [RFC PATCH 5/5] vfio/quirks: Enable ioeventfd quirks to be handled by vfio directly

2018-02-10 Thread Peter Xu
On Fri, Feb 09, 2018 at 03:09:33PM -0700, Alex Williamson wrote:
> On Fri, 9 Feb 2018 15:11:45 +0800
> Peter Xu  wrote:
> 
> > On Tue, Feb 06, 2018 at 05:26:46PM -0700, Alex Williamson wrote:
> > > With vfio ioeventfd support, we can program vfio-pci to perform a
> > > specified BAR write when an eventfd is triggered.  This allows the
> > > KVM ioeventfd to be wired directly to vfio-pci, entirely avoiding
> > > userspace handling for these events.  On the same micro-benchmark
> > > where the ioeventfd got us to almost 90% of performance versus
> > > disabling the GeForce quirks, this gets us to within 95%.
> > > 
> > > Signed-off-by: Alex Williamson 
> > > ---
> > >  hw/vfio/pci-quirks.c |   42 --
> > >  1 file changed, 36 insertions(+), 6 deletions(-)
> > > 
> > > diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
> > > index e739efe601b1..35a4d5197e2d 100644
> > > --- a/hw/vfio/pci-quirks.c
> > > +++ b/hw/vfio/pci-quirks.c
> > > @@ -16,6 +16,7 @@
> > >  #include "qemu/range.h"
> > >  #include "qapi/error.h"
> > >  #include "qapi/visitor.h"
> > > +#include 
> > >  #include "hw/nvram/fw_cfg.h"
> > >  #include "pci.h"
> > >  #include "trace.h"
> > > @@ -287,13 +288,27 @@ static VFIOQuirk *vfio_quirk_alloc(int nr_mem)
> > >  return quirk;
> > >  }
> > >  
> > > -static void vfio_ioeventfd_exit(VFIOIOEventFD *ioeventfd)
> > > +static void vfio_ioeventfd_exit(VFIOPCIDevice *vdev, VFIOIOEventFD 
> > > *ioeventfd)
> > >  {
> > > +struct vfio_device_ioeventfd vfio_ioeventfd;
> > > +
> > >  QLIST_REMOVE(ioeventfd, next);
> > > +
> > >  memory_region_del_eventfd(ioeventfd->mr, ioeventfd->addr, 
> > > ioeventfd->size,
> > >ioeventfd->match_data, ioeventfd->data,
> > >>e);
> > > +
> > >  qemu_set_fd_handler(event_notifier_get_fd(>e), NULL, 
> > > NULL, NULL);
> > > +
> > > +vfio_ioeventfd.argsz = sizeof(vfio_ioeventfd);
> > > +vfio_ioeventfd.flags = ioeventfd->size;
> > > +vfio_ioeventfd.data = ioeventfd->data;
> > > +vfio_ioeventfd.offset = ioeventfd->region->fd_offset +
> > > +ioeventfd->region_addr;
> > > +vfio_ioeventfd.fd = -1;
> > > +
> > > +ioctl(vdev->vbasedev.fd, VFIO_DEVICE_IOEVENTFD, _ioeventfd);
> > > +
> > >  event_notifier_cleanup(>e);
> > >  g_free(ioeventfd);
> > >  }
> > > @@ -315,6 +330,8 @@ static VFIOIOEventFD 
> > > *vfio_ioeventfd_init(VFIOPCIDevice *vdev,
> > >hwaddr region_addr)
> > >  {
> > >  VFIOIOEventFD *ioeventfd = g_malloc0(sizeof(*ioeventfd));
> > > +struct vfio_device_ioeventfd vfio_ioeventfd;
> > > +char vfio_enabled = '+';
> > >  
> > >  if (event_notifier_init(>e, 0)) {
> > >  g_free(ioeventfd);
> > > @@ -329,15 +346,28 @@ static VFIOIOEventFD 
> > > *vfio_ioeventfd_init(VFIOPCIDevice *vdev,
> > >  ioeventfd->region = region;
> > >  ioeventfd->region_addr = region_addr;
> > >  
> > > -qemu_set_fd_handler(event_notifier_get_fd(>e),
> > > -vfio_ioeventfd_handler, NULL, ioeventfd);
> > > +vfio_ioeventfd.argsz = sizeof(vfio_ioeventfd);
> > > +vfio_ioeventfd.flags = ioeventfd->size;
> > > +vfio_ioeventfd.data = ioeventfd->data;
> > > +vfio_ioeventfd.offset = ioeventfd->region->fd_offset +
> > > +ioeventfd->region_addr;
> > > +vfio_ioeventfd.fd = event_notifier_get_fd(>e);
> > > +
> > > +if (ioctl(vdev->vbasedev.fd,
> > > +  VFIO_DEVICE_IOEVENTFD, _ioeventfd) != 0) {
> > > +qemu_set_fd_handler(event_notifier_get_fd(>e),
> > > +vfio_ioeventfd_handler, NULL, ioeventfd);
> > > +vfio_enabled = '-';  
> > 
> > Would the performance be even slower if a new QEMU runs on a old
> > kernel due to these ioeventfds (MMIO -> eventfd -> same MMIO again)?
> > If so, shall we only enable this ioeventfd enhancement only if we
> > detected that the kernel supports this new feature (assuming this
> > feature bit won't change after VM starts)?
> 
> No, it's actually still a significant improvement to enable the KVM
> ioeventfd even if we can't enable vfio.  My testing shows that the KVM
> ioeventfd alone accounts for slightly more than half of the total
> improvement, so I don't see any reason to restrict this to depending on
> both ends being available.  Thanks,

The numbers (83%->90%->95%) were mentioned in different patches but I
didn't really catch all of them.  Sorry.

And obviously the userspace code path is different, which I missed
too.  And it makes sense that ioeventfd should always be faster.

Thanks,

-- 
Peter Xu



Re: [Qemu-devel] [RFC PATCH 5/5] vfio/quirks: Enable ioeventfd quirks to be handled by vfio directly

2018-02-09 Thread Alex Williamson
On Fri, 9 Feb 2018 15:11:45 +0800
Peter Xu  wrote:

> On Tue, Feb 06, 2018 at 05:26:46PM -0700, Alex Williamson wrote:
> > With vfio ioeventfd support, we can program vfio-pci to perform a
> > specified BAR write when an eventfd is triggered.  This allows the
> > KVM ioeventfd to be wired directly to vfio-pci, entirely avoiding
> > userspace handling for these events.  On the same micro-benchmark
> > where the ioeventfd got us to almost 90% of performance versus
> > disabling the GeForce quirks, this gets us to within 95%.
> > 
> > Signed-off-by: Alex Williamson 
> > ---
> >  hw/vfio/pci-quirks.c |   42 --
> >  1 file changed, 36 insertions(+), 6 deletions(-)
> > 
> > diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
> > index e739efe601b1..35a4d5197e2d 100644
> > --- a/hw/vfio/pci-quirks.c
> > +++ b/hw/vfio/pci-quirks.c
> > @@ -16,6 +16,7 @@
> >  #include "qemu/range.h"
> >  #include "qapi/error.h"
> >  #include "qapi/visitor.h"
> > +#include 
> >  #include "hw/nvram/fw_cfg.h"
> >  #include "pci.h"
> >  #include "trace.h"
> > @@ -287,13 +288,27 @@ static VFIOQuirk *vfio_quirk_alloc(int nr_mem)
> >  return quirk;
> >  }
> >  
> > -static void vfio_ioeventfd_exit(VFIOIOEventFD *ioeventfd)
> > +static void vfio_ioeventfd_exit(VFIOPCIDevice *vdev, VFIOIOEventFD 
> > *ioeventfd)
> >  {
> > +struct vfio_device_ioeventfd vfio_ioeventfd;
> > +
> >  QLIST_REMOVE(ioeventfd, next);
> > +
> >  memory_region_del_eventfd(ioeventfd->mr, ioeventfd->addr, 
> > ioeventfd->size,
> >ioeventfd->match_data, ioeventfd->data,
> >>e);
> > +
> >  qemu_set_fd_handler(event_notifier_get_fd(>e), NULL, NULL, 
> > NULL);
> > +
> > +vfio_ioeventfd.argsz = sizeof(vfio_ioeventfd);
> > +vfio_ioeventfd.flags = ioeventfd->size;
> > +vfio_ioeventfd.data = ioeventfd->data;
> > +vfio_ioeventfd.offset = ioeventfd->region->fd_offset +
> > +ioeventfd->region_addr;
> > +vfio_ioeventfd.fd = -1;
> > +
> > +ioctl(vdev->vbasedev.fd, VFIO_DEVICE_IOEVENTFD, _ioeventfd);
> > +
> >  event_notifier_cleanup(>e);
> >  g_free(ioeventfd);
> >  }
> > @@ -315,6 +330,8 @@ static VFIOIOEventFD *vfio_ioeventfd_init(VFIOPCIDevice 
> > *vdev,
> >hwaddr region_addr)
> >  {
> >  VFIOIOEventFD *ioeventfd = g_malloc0(sizeof(*ioeventfd));
> > +struct vfio_device_ioeventfd vfio_ioeventfd;
> > +char vfio_enabled = '+';
> >  
> >  if (event_notifier_init(>e, 0)) {
> >  g_free(ioeventfd);
> > @@ -329,15 +346,28 @@ static VFIOIOEventFD 
> > *vfio_ioeventfd_init(VFIOPCIDevice *vdev,
> >  ioeventfd->region = region;
> >  ioeventfd->region_addr = region_addr;
> >  
> > -qemu_set_fd_handler(event_notifier_get_fd(>e),
> > -vfio_ioeventfd_handler, NULL, ioeventfd);
> > +vfio_ioeventfd.argsz = sizeof(vfio_ioeventfd);
> > +vfio_ioeventfd.flags = ioeventfd->size;
> > +vfio_ioeventfd.data = ioeventfd->data;
> > +vfio_ioeventfd.offset = ioeventfd->region->fd_offset +
> > +ioeventfd->region_addr;
> > +vfio_ioeventfd.fd = event_notifier_get_fd(>e);
> > +
> > +if (ioctl(vdev->vbasedev.fd,
> > +  VFIO_DEVICE_IOEVENTFD, _ioeventfd) != 0) {
> > +qemu_set_fd_handler(event_notifier_get_fd(>e),
> > +vfio_ioeventfd_handler, NULL, ioeventfd);
> > +vfio_enabled = '-';  
> 
> Would the performance be even slower if a new QEMU runs on a old
> kernel due to these ioeventfds (MMIO -> eventfd -> same MMIO again)?
> If so, shall we only enable this ioeventfd enhancement only if we
> detected that the kernel supports this new feature (assuming this
> feature bit won't change after VM starts)?

No, it's actually still a significant improvement to enable the KVM
ioeventfd even if we can't enable vfio.  My testing shows that the KVM
ioeventfd alone accounts for slightly more than half of the total
improvement, so I don't see any reason to restrict this to depending on
both ends being available.  Thanks,

Alex



Re: [Qemu-devel] [RFC PATCH 5/5] vfio/quirks: Enable ioeventfd quirks to be handled by vfio directly

2018-02-08 Thread Peter Xu
On Tue, Feb 06, 2018 at 05:26:46PM -0700, Alex Williamson wrote:
> With vfio ioeventfd support, we can program vfio-pci to perform a
> specified BAR write when an eventfd is triggered.  This allows the
> KVM ioeventfd to be wired directly to vfio-pci, entirely avoiding
> userspace handling for these events.  On the same micro-benchmark
> where the ioeventfd got us to almost 90% of performance versus
> disabling the GeForce quirks, this gets us to within 95%.
> 
> Signed-off-by: Alex Williamson 
> ---
>  hw/vfio/pci-quirks.c |   42 --
>  1 file changed, 36 insertions(+), 6 deletions(-)
> 
> diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
> index e739efe601b1..35a4d5197e2d 100644
> --- a/hw/vfio/pci-quirks.c
> +++ b/hw/vfio/pci-quirks.c
> @@ -16,6 +16,7 @@
>  #include "qemu/range.h"
>  #include "qapi/error.h"
>  #include "qapi/visitor.h"
> +#include 
>  #include "hw/nvram/fw_cfg.h"
>  #include "pci.h"
>  #include "trace.h"
> @@ -287,13 +288,27 @@ static VFIOQuirk *vfio_quirk_alloc(int nr_mem)
>  return quirk;
>  }
>  
> -static void vfio_ioeventfd_exit(VFIOIOEventFD *ioeventfd)
> +static void vfio_ioeventfd_exit(VFIOPCIDevice *vdev, VFIOIOEventFD 
> *ioeventfd)
>  {
> +struct vfio_device_ioeventfd vfio_ioeventfd;
> +
>  QLIST_REMOVE(ioeventfd, next);
> +
>  memory_region_del_eventfd(ioeventfd->mr, ioeventfd->addr, 
> ioeventfd->size,
>ioeventfd->match_data, ioeventfd->data,
>>e);
> +
>  qemu_set_fd_handler(event_notifier_get_fd(>e), NULL, NULL, 
> NULL);
> +
> +vfio_ioeventfd.argsz = sizeof(vfio_ioeventfd);
> +vfio_ioeventfd.flags = ioeventfd->size;
> +vfio_ioeventfd.data = ioeventfd->data;
> +vfio_ioeventfd.offset = ioeventfd->region->fd_offset +
> +ioeventfd->region_addr;
> +vfio_ioeventfd.fd = -1;
> +
> +ioctl(vdev->vbasedev.fd, VFIO_DEVICE_IOEVENTFD, _ioeventfd);
> +
>  event_notifier_cleanup(>e);
>  g_free(ioeventfd);
>  }
> @@ -315,6 +330,8 @@ static VFIOIOEventFD *vfio_ioeventfd_init(VFIOPCIDevice 
> *vdev,
>hwaddr region_addr)
>  {
>  VFIOIOEventFD *ioeventfd = g_malloc0(sizeof(*ioeventfd));
> +struct vfio_device_ioeventfd vfio_ioeventfd;
> +char vfio_enabled = '+';
>  
>  if (event_notifier_init(>e, 0)) {
>  g_free(ioeventfd);
> @@ -329,15 +346,28 @@ static VFIOIOEventFD *vfio_ioeventfd_init(VFIOPCIDevice 
> *vdev,
>  ioeventfd->region = region;
>  ioeventfd->region_addr = region_addr;
>  
> -qemu_set_fd_handler(event_notifier_get_fd(>e),
> -vfio_ioeventfd_handler, NULL, ioeventfd);
> +vfio_ioeventfd.argsz = sizeof(vfio_ioeventfd);
> +vfio_ioeventfd.flags = ioeventfd->size;
> +vfio_ioeventfd.data = ioeventfd->data;
> +vfio_ioeventfd.offset = ioeventfd->region->fd_offset +
> +ioeventfd->region_addr;
> +vfio_ioeventfd.fd = event_notifier_get_fd(>e);
> +
> +if (ioctl(vdev->vbasedev.fd,
> +  VFIO_DEVICE_IOEVENTFD, _ioeventfd) != 0) {
> +qemu_set_fd_handler(event_notifier_get_fd(>e),
> +vfio_ioeventfd_handler, NULL, ioeventfd);
> +vfio_enabled = '-';

Would the performance be even slower if a new QEMU runs on a old
kernel due to these ioeventfds (MMIO -> eventfd -> same MMIO again)?
If so, shall we only enable this ioeventfd enhancement only if we
detected that the kernel supports this new feature (assuming this
feature bit won't change after VM starts)?

Then, could we avoid the slow path at all (qemu_set_fd_handler)?

Thanks,

-- 
Peter Xu



Re: [Qemu-devel] [RFC PATCH 5/5] vfio/quirks: Enable ioeventfd quirks to be handled by vfio directly

2018-02-08 Thread Alex Williamson
On Thu, 8 Feb 2018 12:42:15 +0100
Auger Eric  wrote:

> Hi Alex,
> On 07/02/18 01:26, Alex Williamson wrote:
> > With vfio ioeventfd support, we can program vfio-pci to perform a
> > specified BAR write when an eventfd is triggered.  This allows the
> > KVM ioeventfd to be wired directly to vfio-pci, entirely avoiding
> > userspace handling for these events.  On the same micro-benchmark
> > where the ioeventfd got us to almost 90% of performance versus
> > disabling the GeForce quirks, this gets us to within 95%.
> > 
> > Signed-off-by: Alex Williamson 
> > ---
> >  hw/vfio/pci-quirks.c |   42 --
> >  1 file changed, 36 insertions(+), 6 deletions(-)
> > 
> > diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
> > index e739efe601b1..35a4d5197e2d 100644
> > --- a/hw/vfio/pci-quirks.c
> > +++ b/hw/vfio/pci-quirks.c
> > @@ -16,6 +16,7 @@
> >  #include "qemu/range.h"
> >  #include "qapi/error.h"
> >  #include "qapi/visitor.h"
> > +#include 
> >  #include "hw/nvram/fw_cfg.h"
> >  #include "pci.h"
> >  #include "trace.h"
> > @@ -287,13 +288,27 @@ static VFIOQuirk *vfio_quirk_alloc(int nr_mem)
> >  return quirk;
> >  }
> >  
> > -static void vfio_ioeventfd_exit(VFIOIOEventFD *ioeventfd)
> > +static void vfio_ioeventfd_exit(VFIOPCIDevice *vdev, VFIOIOEventFD 
> > *ioeventfd)
> >  {
> > +struct vfio_device_ioeventfd vfio_ioeventfd;
> > +
> >  QLIST_REMOVE(ioeventfd, next);
> > +
> >  memory_region_del_eventfd(ioeventfd->mr, ioeventfd->addr, 
> > ioeventfd->size,
> >ioeventfd->match_data, ioeventfd->data,
> >>e);
> > +
> >  qemu_set_fd_handler(event_notifier_get_fd(>e), NULL, NULL, 
> > NULL);
> > +
> > +vfio_ioeventfd.argsz = sizeof(vfio_ioeventfd);
> > +vfio_ioeventfd.flags = ioeventfd->size;
> > +vfio_ioeventfd.data = ioeventfd->data;
> > +vfio_ioeventfd.offset = ioeventfd->region->fd_offset +
> > +ioeventfd->region_addr;
> > +vfio_ioeventfd.fd = -1;
> > +
> > +ioctl(vdev->vbasedev.fd, VFIO_DEVICE_IOEVENTFD, _ioeventfd);
> > +
> >  event_notifier_cleanup(>e);
> >  g_free(ioeventfd);
> >  }
> > @@ -315,6 +330,8 @@ static VFIOIOEventFD *vfio_ioeventfd_init(VFIOPCIDevice 
> > *vdev,
> >hwaddr region_addr)
> >  {
> >  VFIOIOEventFD *ioeventfd = g_malloc0(sizeof(*ioeventfd));
> > +struct vfio_device_ioeventfd vfio_ioeventfd;
> > +char vfio_enabled = '+';
> >  
> >  if (event_notifier_init(>e, 0)) {
> >  g_free(ioeventfd);
> > @@ -329,15 +346,28 @@ static VFIOIOEventFD 
> > *vfio_ioeventfd_init(VFIOPCIDevice *vdev,
> >  ioeventfd->region = region;
> >  ioeventfd->region_addr = region_addr;
> >  
> > -qemu_set_fd_handler(event_notifier_get_fd(>e),
> > -vfio_ioeventfd_handler, NULL, ioeventfd);
> > +vfio_ioeventfd.argsz = sizeof(vfio_ioeventfd);
> > +vfio_ioeventfd.flags = ioeventfd->size;
> > +vfio_ioeventfd.data = ioeventfd->data;
> > +vfio_ioeventfd.offset = ioeventfd->region->fd_offset +
> > +ioeventfd->region_addr;
> > +vfio_ioeventfd.fd = event_notifier_get_fd(>e);
> > +
> > +if (ioctl(vdev->vbasedev.fd,
> > +  VFIO_DEVICE_IOEVENTFD, _ioeventfd) != 0) {
> > +qemu_set_fd_handler(event_notifier_get_fd(>e),
> > +vfio_ioeventfd_handler, NULL, ioeventfd);
> > +vfio_enabled = '-';
> > +}
> > +
> >  memory_region_add_eventfd(ioeventfd->mr, ioeventfd->addr,
> >ioeventfd->size, ioeventfd->match_data,
> >ioeventfd->data, >e);
> >  
> >  info_report("Enabled automatic ioeventfd acceleration for %s region 
> > %d, "
> > -"offset 0x%"HWADDR_PRIx", data 0x%"PRIx64", size %u",
> > -vdev->vbasedev.name, region->nr, region_addr, data, size);
> > +"offset 0x%"HWADDR_PRIx", data 0x%"PRIx64", size %u, 
> > vfio%c",
> > +vdev->vbasedev.name, region->nr, region_addr, data, size,
> > +vfio_enabled);  
> Not sure if this message is really helpful for the end-user to
> understand what happens. Maybe adding a trace event when everything
> happens as it should and an error_report if we failed setting up the
> vfio kernel handler, explaining the sub-optimal performance that can result.

For right now, I think it is useful.  Maybe when we get a few kernels
beyond when the vfio support is introduced and we know how different
devices are behaving and what ioeventfds get added, it might make sense
to switch to a trace interface.  I don't think we can legitimately
trigger an error_report for a feature which is just an accelerator and
isn't even in upstream kernels yet (though arguably it would be
upstream by the time this gets into QEMU).  For now it 

Re: [Qemu-devel] [RFC PATCH 5/5] vfio/quirks: Enable ioeventfd quirks to be handled by vfio directly

2018-02-08 Thread Auger Eric
Hi Alex,
On 07/02/18 01:26, Alex Williamson wrote:
> With vfio ioeventfd support, we can program vfio-pci to perform a
> specified BAR write when an eventfd is triggered.  This allows the
> KVM ioeventfd to be wired directly to vfio-pci, entirely avoiding
> userspace handling for these events.  On the same micro-benchmark
> where the ioeventfd got us to almost 90% of performance versus
> disabling the GeForce quirks, this gets us to within 95%.
> 
> Signed-off-by: Alex Williamson 
> ---
>  hw/vfio/pci-quirks.c |   42 --
>  1 file changed, 36 insertions(+), 6 deletions(-)
> 
> diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
> index e739efe601b1..35a4d5197e2d 100644
> --- a/hw/vfio/pci-quirks.c
> +++ b/hw/vfio/pci-quirks.c
> @@ -16,6 +16,7 @@
>  #include "qemu/range.h"
>  #include "qapi/error.h"
>  #include "qapi/visitor.h"
> +#include 
>  #include "hw/nvram/fw_cfg.h"
>  #include "pci.h"
>  #include "trace.h"
> @@ -287,13 +288,27 @@ static VFIOQuirk *vfio_quirk_alloc(int nr_mem)
>  return quirk;
>  }
>  
> -static void vfio_ioeventfd_exit(VFIOIOEventFD *ioeventfd)
> +static void vfio_ioeventfd_exit(VFIOPCIDevice *vdev, VFIOIOEventFD 
> *ioeventfd)
>  {
> +struct vfio_device_ioeventfd vfio_ioeventfd;
> +
>  QLIST_REMOVE(ioeventfd, next);
> +
>  memory_region_del_eventfd(ioeventfd->mr, ioeventfd->addr, 
> ioeventfd->size,
>ioeventfd->match_data, ioeventfd->data,
>>e);
> +
>  qemu_set_fd_handler(event_notifier_get_fd(>e), NULL, NULL, 
> NULL);
> +
> +vfio_ioeventfd.argsz = sizeof(vfio_ioeventfd);
> +vfio_ioeventfd.flags = ioeventfd->size;
> +vfio_ioeventfd.data = ioeventfd->data;
> +vfio_ioeventfd.offset = ioeventfd->region->fd_offset +
> +ioeventfd->region_addr;
> +vfio_ioeventfd.fd = -1;
> +
> +ioctl(vdev->vbasedev.fd, VFIO_DEVICE_IOEVENTFD, _ioeventfd);
> +
>  event_notifier_cleanup(>e);
>  g_free(ioeventfd);
>  }
> @@ -315,6 +330,8 @@ static VFIOIOEventFD *vfio_ioeventfd_init(VFIOPCIDevice 
> *vdev,
>hwaddr region_addr)
>  {
>  VFIOIOEventFD *ioeventfd = g_malloc0(sizeof(*ioeventfd));
> +struct vfio_device_ioeventfd vfio_ioeventfd;
> +char vfio_enabled = '+';
>  
>  if (event_notifier_init(>e, 0)) {
>  g_free(ioeventfd);
> @@ -329,15 +346,28 @@ static VFIOIOEventFD *vfio_ioeventfd_init(VFIOPCIDevice 
> *vdev,
>  ioeventfd->region = region;
>  ioeventfd->region_addr = region_addr;
>  
> -qemu_set_fd_handler(event_notifier_get_fd(>e),
> -vfio_ioeventfd_handler, NULL, ioeventfd);
> +vfio_ioeventfd.argsz = sizeof(vfio_ioeventfd);
> +vfio_ioeventfd.flags = ioeventfd->size;
> +vfio_ioeventfd.data = ioeventfd->data;
> +vfio_ioeventfd.offset = ioeventfd->region->fd_offset +
> +ioeventfd->region_addr;
> +vfio_ioeventfd.fd = event_notifier_get_fd(>e);
> +
> +if (ioctl(vdev->vbasedev.fd,
> +  VFIO_DEVICE_IOEVENTFD, _ioeventfd) != 0) {
> +qemu_set_fd_handler(event_notifier_get_fd(>e),
> +vfio_ioeventfd_handler, NULL, ioeventfd);
> +vfio_enabled = '-';
> +}
> +
>  memory_region_add_eventfd(ioeventfd->mr, ioeventfd->addr,
>ioeventfd->size, ioeventfd->match_data,
>ioeventfd->data, >e);
>  
>  info_report("Enabled automatic ioeventfd acceleration for %s region %d, "
> -"offset 0x%"HWADDR_PRIx", data 0x%"PRIx64", size %u",
> -vdev->vbasedev.name, region->nr, region_addr, data, size);
> +"offset 0x%"HWADDR_PRIx", data 0x%"PRIx64", size %u, vfio%c",
> +vdev->vbasedev.name, region->nr, region_addr, data, size,
> +vfio_enabled);
Not sure if this message is really helpful for the end-user to
understand what happens. Maybe adding a trace event when everything
happens as it should and an error_report if we failed setting up the
vfio kernel handler, explaining the sub-optimal performance that can result.

Thanks

Eric
>  
>  return ioeventfd;
>  }
> @@ -1767,7 +1797,7 @@ void vfio_bar_quirk_exit(VFIOPCIDevice *vdev, int nr)
>  
>  QLIST_FOREACH(quirk, >quirks, next) {
>  while (!QLIST_EMPTY(>ioeventfds)) {
> -vfio_ioeventfd_exit(QLIST_FIRST(>ioeventfds));
> +vfio_ioeventfd_exit(vdev, QLIST_FIRST(>ioeventfds));
>  }
>  
>  for (i = 0; i < quirk->nr_mem; i++) {
> 



[Qemu-devel] [RFC PATCH 5/5] vfio/quirks: Enable ioeventfd quirks to be handled by vfio directly

2018-02-06 Thread Alex Williamson
With vfio ioeventfd support, we can program vfio-pci to perform a
specified BAR write when an eventfd is triggered.  This allows the
KVM ioeventfd to be wired directly to vfio-pci, entirely avoiding
userspace handling for these events.  On the same micro-benchmark
where the ioeventfd got us to almost 90% of performance versus
disabling the GeForce quirks, this gets us to within 95%.

Signed-off-by: Alex Williamson 
---
 hw/vfio/pci-quirks.c |   42 --
 1 file changed, 36 insertions(+), 6 deletions(-)

diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
index e739efe601b1..35a4d5197e2d 100644
--- a/hw/vfio/pci-quirks.c
+++ b/hw/vfio/pci-quirks.c
@@ -16,6 +16,7 @@
 #include "qemu/range.h"
 #include "qapi/error.h"
 #include "qapi/visitor.h"
+#include 
 #include "hw/nvram/fw_cfg.h"
 #include "pci.h"
 #include "trace.h"
@@ -287,13 +288,27 @@ static VFIOQuirk *vfio_quirk_alloc(int nr_mem)
 return quirk;
 }
 
-static void vfio_ioeventfd_exit(VFIOIOEventFD *ioeventfd)
+static void vfio_ioeventfd_exit(VFIOPCIDevice *vdev, VFIOIOEventFD *ioeventfd)
 {
+struct vfio_device_ioeventfd vfio_ioeventfd;
+
 QLIST_REMOVE(ioeventfd, next);
+
 memory_region_del_eventfd(ioeventfd->mr, ioeventfd->addr, ioeventfd->size,
   ioeventfd->match_data, ioeventfd->data,
   >e);
+
 qemu_set_fd_handler(event_notifier_get_fd(>e), NULL, NULL, 
NULL);
+
+vfio_ioeventfd.argsz = sizeof(vfio_ioeventfd);
+vfio_ioeventfd.flags = ioeventfd->size;
+vfio_ioeventfd.data = ioeventfd->data;
+vfio_ioeventfd.offset = ioeventfd->region->fd_offset +
+ioeventfd->region_addr;
+vfio_ioeventfd.fd = -1;
+
+ioctl(vdev->vbasedev.fd, VFIO_DEVICE_IOEVENTFD, _ioeventfd);
+
 event_notifier_cleanup(>e);
 g_free(ioeventfd);
 }
@@ -315,6 +330,8 @@ static VFIOIOEventFD *vfio_ioeventfd_init(VFIOPCIDevice 
*vdev,
   hwaddr region_addr)
 {
 VFIOIOEventFD *ioeventfd = g_malloc0(sizeof(*ioeventfd));
+struct vfio_device_ioeventfd vfio_ioeventfd;
+char vfio_enabled = '+';
 
 if (event_notifier_init(>e, 0)) {
 g_free(ioeventfd);
@@ -329,15 +346,28 @@ static VFIOIOEventFD *vfio_ioeventfd_init(VFIOPCIDevice 
*vdev,
 ioeventfd->region = region;
 ioeventfd->region_addr = region_addr;
 
-qemu_set_fd_handler(event_notifier_get_fd(>e),
-vfio_ioeventfd_handler, NULL, ioeventfd);
+vfio_ioeventfd.argsz = sizeof(vfio_ioeventfd);
+vfio_ioeventfd.flags = ioeventfd->size;
+vfio_ioeventfd.data = ioeventfd->data;
+vfio_ioeventfd.offset = ioeventfd->region->fd_offset +
+ioeventfd->region_addr;
+vfio_ioeventfd.fd = event_notifier_get_fd(>e);
+
+if (ioctl(vdev->vbasedev.fd,
+  VFIO_DEVICE_IOEVENTFD, _ioeventfd) != 0) {
+qemu_set_fd_handler(event_notifier_get_fd(>e),
+vfio_ioeventfd_handler, NULL, ioeventfd);
+vfio_enabled = '-';
+}
+
 memory_region_add_eventfd(ioeventfd->mr, ioeventfd->addr,
   ioeventfd->size, ioeventfd->match_data,
   ioeventfd->data, >e);
 
 info_report("Enabled automatic ioeventfd acceleration for %s region %d, "
-"offset 0x%"HWADDR_PRIx", data 0x%"PRIx64", size %u",
-vdev->vbasedev.name, region->nr, region_addr, data, size);
+"offset 0x%"HWADDR_PRIx", data 0x%"PRIx64", size %u, vfio%c",
+vdev->vbasedev.name, region->nr, region_addr, data, size,
+vfio_enabled);
 
 return ioeventfd;
 }
@@ -1767,7 +1797,7 @@ void vfio_bar_quirk_exit(VFIOPCIDevice *vdev, int nr)
 
 QLIST_FOREACH(quirk, >quirks, next) {
 while (!QLIST_EMPTY(>ioeventfds)) {
-vfio_ioeventfd_exit(QLIST_FIRST(>ioeventfds));
+vfio_ioeventfd_exit(vdev, QLIST_FIRST(>ioeventfds));
 }
 
 for (i = 0; i < quirk->nr_mem; i++) {