Re: [Qemu-devel] [PATCH v4 3/3] QEMU-AER: Qemu changes to support AER for VFIO-PCI devices

2013-02-14 Thread Alex Williamson
On Thu, 2013-02-14 at 04:41 -0600, Vijay Mohan Pandarathil wrote:
>   - Create eventfd per vfio device assigned to a guest and register an
>   event handler
> 
>   - This fd is passed to the vfio_pci driver through the SET_IRQ ioctl
> 
>   - When the device encounters an error, the eventfd is signalled
>   and the qemu eventfd handler gets invoked.
> 
>   - In the handler decide what action to take. Current action taken
>   is to stop the guest.
> 
> Signed-off-by: Vijay Mohan Pandarathil 
> ---
>  hw/vfio_pci.c  | 112 
> +
>  linux-headers/linux/vfio.h |   1 +
>  2 files changed, 113 insertions(+)
> 
> diff --git a/hw/vfio_pci.c b/hw/vfio_pci.c
> index c51ae67..05da53b 100644
> --- a/hw/vfio_pci.c
> +++ b/hw/vfio_pci.c
> @@ -38,6 +38,7 @@
>  #include "qemu/error-report.h"
>  #include "qemu/queue.h"
>  #include "qemu/range.h"
> +#include "sysemu/sysemu.h"
>  
>  /* #define DEBUG_VFIO */
>  #ifdef DEBUG_VFIO
> @@ -129,7 +130,9 @@ typedef struct VFIODevice {
>  PCIHostDeviceAddress host;
>  QLIST_ENTRY(VFIODevice) next;
>  struct VFIOGroup *group;
> +EventNotifier err_notifier;
>  bool reset_works;
> +bool pci_aer;
>  } VFIODevice;
>  
>  typedef struct VFIOGroup {
> @@ -1802,6 +1805,7 @@ static int vfio_get_device(VFIOGroup *group, const char 
> *name, VFIODevice *vdev)
>  {
>  struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) };
>  struct vfio_region_info reg_info = { .argsz = sizeof(reg_info) };
> +struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info) };
>  int ret, i;
>  
>  ret = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, name);
> @@ -1901,6 +1905,15 @@ static int vfio_get_device(VFIOGroup *group, const 
> char *name, VFIODevice *vdev)
>  vdev->config_size = reg_info.size;
>  vdev->config_offset = reg_info.offset;
>  
> +irq_info.index = VFIO_PCI_ERR_IRQ_INDEX;
> +
> +ret = ioctl(vdev->fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info);
> +if (ret) {
> +error_report("vfio: Error getting IRQ info: %m\n");

What if we're assigning a Legacy PCI device or running on an old kernel,
neither of these warrant an error_report.

> +goto error;
> +}
> +if (irq_info.count == 1)
> +vdev->pci_aer = true;

This is where I'd actually expect some kind of warning, you're getting
something back that you don't expect (if != 1).

>  error:
>  if (ret) {
>  QLIST_REMOVE(vdev, next);
> @@ -1922,6 +1935,102 @@ static void vfio_put_device(VFIODevice *vdev)
>  }
>  }
>  
> +static void vfio_err_notifier_handler(void *opaque)
> +{
> +VFIODevice *vdev = opaque;
> +
> +if (!event_notifier_test_and_clear(&vdev->err_notifier)) {
> +return;
> +}
> +
> +/*
> + * TBD. Retrieve the error details and decide what action
> + * needs to be taken. One of the actions could be to pass
> + * the error to the guest and have the guest driver recover
> + * from the error. This requires that PCIe capabilities be
> + * exposed to the guest. For now, we just terminate the
> + * guest to contain the error.
> + */
> +
> +error_report("%s (%04x:%02x:%02x.%x)"
> +"Unrecoverable error detected...\n"
> +"Please inestigate/collect any data required and then kill the 
> quest",

s/inestigate/investigate/

This seems like a lot to ask of a user.

> +__func__, vdev->host.domain, vdev->host.bus,
> +vdev->host.slot, vdev->host.function);
> +
> +vm_stop(RUN_STATE_IO_ERROR);

Gleb, were you looking for a new stop condition or is this one ok to
re-use?

> +}
> +
> +static void vfio_register_err_notifier(VFIODevice *vdev)
> +{
> +int ret;
> +int argsz;
> +struct vfio_irq_set *irq_set;
> +int32_t *pfd;
> +
> +if (!vdev->pci_aer) {
> +return;
> +}
> +
> +if (event_notifier_init(&vdev->err_notifier, 0)) {
> +error_report("vfio: Warning: Unable to init event notifier for error 
> detection\n");
> +return;
> +}
> +
> +argsz = sizeof(*irq_set) + sizeof(*pfd);
> +
> +irq_set = g_malloc0(argsz);
> +irq_set->argsz = argsz;
> +irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
> + VFIO_IRQ_SET_ACTION_TRIGGER;
> +irq_set->index = VFIO_PCI_ERR_IRQ_INDEX;
> +irq_set->start = 0;
> +irq_set->count = 1;
> +pfd = (int32_t *)&irq_set->data;
> +
> +*pfd = event_notifier_get_fd(&vdev->err_notifier);
> +qemu_set_fd_handler(*pfd, vfio_err_notifier_handler, NULL, vdev);
> +
> +ret = ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set);
> +if (ret) {
> +error_report("vfio: Failed to set up error notification\n");
> +qemu_set_fd_handler(*pfd, NULL, NULL, vdev);
> +event_notifier_cleanup(&vdev->err_notifier);
> +}
> +g_free(irq_set);
> +}

Why does the register return void?  If it's supported and fails, isn't
that a condition where we'd want to fai

[Qemu-devel] [PATCH v4 3/3] QEMU-AER: Qemu changes to support AER for VFIO-PCI devices

2013-02-14 Thread Vijay Mohan Pandarathil
- Create eventfd per vfio device assigned to a guest and register an
  event handler

- This fd is passed to the vfio_pci driver through the SET_IRQ ioctl

- When the device encounters an error, the eventfd is signalled
  and the qemu eventfd handler gets invoked.

- In the handler decide what action to take. Current action taken
  is to stop the guest.

Signed-off-by: Vijay Mohan Pandarathil 
---
 hw/vfio_pci.c  | 112 +
 linux-headers/linux/vfio.h |   1 +
 2 files changed, 113 insertions(+)

diff --git a/hw/vfio_pci.c b/hw/vfio_pci.c
index c51ae67..05da53b 100644
--- a/hw/vfio_pci.c
+++ b/hw/vfio_pci.c
@@ -38,6 +38,7 @@
 #include "qemu/error-report.h"
 #include "qemu/queue.h"
 #include "qemu/range.h"
+#include "sysemu/sysemu.h"
 
 /* #define DEBUG_VFIO */
 #ifdef DEBUG_VFIO
@@ -129,7 +130,9 @@ typedef struct VFIODevice {
 PCIHostDeviceAddress host;
 QLIST_ENTRY(VFIODevice) next;
 struct VFIOGroup *group;
+EventNotifier err_notifier;
 bool reset_works;
+bool pci_aer;
 } VFIODevice;
 
 typedef struct VFIOGroup {
@@ -1802,6 +1805,7 @@ static int vfio_get_device(VFIOGroup *group, const char 
*name, VFIODevice *vdev)
 {
 struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) };
 struct vfio_region_info reg_info = { .argsz = sizeof(reg_info) };
+struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info) };
 int ret, i;
 
 ret = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, name);
@@ -1901,6 +1905,15 @@ static int vfio_get_device(VFIOGroup *group, const char 
*name, VFIODevice *vdev)
 vdev->config_size = reg_info.size;
 vdev->config_offset = reg_info.offset;
 
+irq_info.index = VFIO_PCI_ERR_IRQ_INDEX;
+
+ret = ioctl(vdev->fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info);
+if (ret) {
+error_report("vfio: Error getting IRQ info: %m\n");
+goto error;
+}
+if (irq_info.count == 1)
+vdev->pci_aer = true;
 error:
 if (ret) {
 QLIST_REMOVE(vdev, next);
@@ -1922,6 +1935,102 @@ static void vfio_put_device(VFIODevice *vdev)
 }
 }
 
+static void vfio_err_notifier_handler(void *opaque)
+{
+VFIODevice *vdev = opaque;
+
+if (!event_notifier_test_and_clear(&vdev->err_notifier)) {
+return;
+}
+
+/*
+ * TBD. Retrieve the error details and decide what action
+ * needs to be taken. One of the actions could be to pass
+ * the error to the guest and have the guest driver recover
+ * from the error. This requires that PCIe capabilities be
+ * exposed to the guest. For now, we just terminate the
+ * guest to contain the error.
+ */
+
+error_report("%s (%04x:%02x:%02x.%x)"
+"Unrecoverable error detected...\n"
+"Please inestigate/collect any data required and then kill the quest",
+__func__, vdev->host.domain, vdev->host.bus,
+vdev->host.slot, vdev->host.function);
+
+vm_stop(RUN_STATE_IO_ERROR);
+}
+
+static void vfio_register_err_notifier(VFIODevice *vdev)
+{
+int ret;
+int argsz;
+struct vfio_irq_set *irq_set;
+int32_t *pfd;
+
+if (!vdev->pci_aer) {
+return;
+}
+
+if (event_notifier_init(&vdev->err_notifier, 0)) {
+error_report("vfio: Warning: Unable to init event notifier for error 
detection\n");
+return;
+}
+
+argsz = sizeof(*irq_set) + sizeof(*pfd);
+
+irq_set = g_malloc0(argsz);
+irq_set->argsz = argsz;
+irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
+ VFIO_IRQ_SET_ACTION_TRIGGER;
+irq_set->index = VFIO_PCI_ERR_IRQ_INDEX;
+irq_set->start = 0;
+irq_set->count = 1;
+pfd = (int32_t *)&irq_set->data;
+
+*pfd = event_notifier_get_fd(&vdev->err_notifier);
+qemu_set_fd_handler(*pfd, vfio_err_notifier_handler, NULL, vdev);
+
+ret = ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set);
+if (ret) {
+error_report("vfio: Failed to set up error notification\n");
+qemu_set_fd_handler(*pfd, NULL, NULL, vdev);
+event_notifier_cleanup(&vdev->err_notifier);
+}
+g_free(irq_set);
+}
+static void vfio_unregister_err_notifier(VFIODevice *vdev)
+{
+int argsz;
+struct vfio_irq_set *irq_set;
+int32_t *pfd;
+int ret;
+
+if (!vdev->pci_aer) {
+return;
+}
+
+argsz = sizeof(*irq_set) + sizeof(*pfd);
+
+irq_set = g_malloc0(argsz);
+irq_set->argsz = argsz;
+irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
+ VFIO_IRQ_SET_ACTION_TRIGGER;
+irq_set->index = VFIO_PCI_ERR_IRQ_INDEX;
+irq_set->start = 0;
+irq_set->count = 1;
+pfd = (int32_t *)&irq_set->data;
+*pfd = -1;
+
+ret = ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set);
+if (ret) {
+error_report("vfio: Failed to de-assign error fd: %d\n", ret);
+}
+g_free(irq_set);
+qemu_set_fd_handler(event_notifier_get_fd(&vdev->err_notifier),
+