[PATCH v7 2/3] VFIO-AER: Vfio-pci driver changes for supporting AER

2013-03-08 Thread Vijay Mohan Pandarathil
- New VFIO_SET_IRQ ioctl option to pass the eventfd that is signaled 
when
  an error occurs in the vfio_pci_device

- Register pci_error_handler for the vfio_pci driver

- When the device encounters an error, the error handler registered by
  the vfio_pci driver gets invoked by the AER infrastructure

- In the error handler, signal the eventfd registered for the device.

- This results in the qemu eventfd handler getting invoked and
  appropriate action taken for the guest.

Signed-off-by: Vijay Mohan Pandarathil 
---
 drivers/vfio/pci/vfio_pci.c | 44 -
 drivers/vfio/pci/vfio_pci_intrs.c   | 64 +
 drivers/vfio/pci/vfio_pci_private.h |  1 +
 include/uapi/linux/vfio.h   |  1 +
 4 files changed, 109 insertions(+), 1 deletion(-)

diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 8189cb6..acfcb1a 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -201,7 +201,9 @@ static int vfio_pci_get_irq_count(struct vfio_pci_device 
*vdev, int irq_type)
 
return (flags & PCI_MSIX_FLAGS_QSIZE) + 1;
}
-   }
+   } else if (irq_type == VFIO_PCI_ERR_IRQ_INDEX)
+   if (pci_is_pcie(vdev->pdev))
+   return 1;
 
return 0;
 }
@@ -317,6 +319,17 @@ static long vfio_pci_ioctl(void *device_data,
if (info.argsz < minsz || info.index >= VFIO_PCI_NUM_IRQS)
return -EINVAL;
 
+   switch (info.index) {
+   case VFIO_PCI_INTX_IRQ_INDEX ... VFIO_PCI_MSIX_IRQ_INDEX:
+   break;
+   case VFIO_PCI_ERR_IRQ_INDEX:
+   if (pci_is_pcie(vdev->pdev))
+   break;
+   /* pass thru to return error */
+   default:
+   return -EINVAL;
+   }
+
info.flags = VFIO_IRQ_INFO_EVENTFD;
 
info.count = vfio_pci_get_irq_count(vdev, info.index);
@@ -551,11 +564,40 @@ static void vfio_pci_remove(struct pci_dev *pdev)
kfree(vdev);
 }
 
+static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev,
+ pci_channel_state_t state)
+{
+   struct vfio_pci_device *vdev;
+   struct vfio_device *device;
+
+   device = vfio_device_get_from_dev(&pdev->dev);
+   if (device == NULL)
+   return PCI_ERS_RESULT_DISCONNECT;
+
+   vdev = vfio_device_data(device);
+   if (vdev == NULL) {
+   vfio_device_put(device);
+   return PCI_ERS_RESULT_DISCONNECT;
+   }
+
+   if (vdev->err_trigger)
+   eventfd_signal(vdev->err_trigger, 1);
+
+   vfio_device_put(device);
+
+   return PCI_ERS_RESULT_CAN_RECOVER;
+}
+
+static struct pci_error_handlers vfio_err_handlers = {
+   .error_detected = vfio_pci_aer_err_detected,
+};
+
 static struct pci_driver vfio_pci_driver = {
.name   = "vfio-pci",
.id_table   = NULL, /* only dynamic ids */
.probe  = vfio_pci_probe,
.remove = vfio_pci_remove,
+   .err_handler= &vfio_err_handlers,
 };
 
 static void __exit vfio_pci_cleanup(void)
diff --git a/drivers/vfio/pci/vfio_pci_intrs.c 
b/drivers/vfio/pci/vfio_pci_intrs.c
index 3639371..b84bf22 100644
--- a/drivers/vfio/pci/vfio_pci_intrs.c
+++ b/drivers/vfio/pci/vfio_pci_intrs.c
@@ -745,6 +745,63 @@ static int vfio_pci_set_msi_trigger(struct vfio_pci_device 
*vdev,
return 0;
 }
 
+static int vfio_pci_set_err_trigger(struct vfio_pci_device *vdev,
+   unsigned index, unsigned start,
+   unsigned count, uint32_t flags, void *data)
+{
+   int32_t fd = *(int32_t *)data;
+   struct pci_dev *pdev = vdev->pdev;
+
+   if ((index != VFIO_PCI_ERR_IRQ_INDEX) ||
+   !(flags & VFIO_IRQ_SET_DATA_TYPE_MASK))
+   return -EINVAL;
+
+   /*
+* device_lock synchronizes setting and checking of
+* err_trigger. The vfio_pci_aer_err_detected() is also
+* called with device_lock held.
+*/
+
+   /* DATA_NONE/DATA_BOOL enables loopback testing */
+
+   if (flags & VFIO_IRQ_SET_DATA_NONE) {
+   device_lock(&pdev->dev);
+   if (vdev->err_trigger)
+   eventfd_signal(vdev->err_trigger, 1);
+   device_unlock(&pdev->dev);
+   return 0;
+   } else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
+   uint8_t trigger = *(uint8_t *)data;
+   device_lock(&pdev->dev);
+   if (trigger && vdev->err_trigger)
+   eventfd_signal(vdev->err_trigger, 1);
+   device_unlock(&a

[PATCH v7 3/3] QEMU-AER: Qemu changes to support AER for VFIO-PCI devices

2013-03-08 Thread Vijay Mohan Pandarathil
- Create eventfd per vfio device assigned to a guest and register an
  event handler

- This fd is passed to the vfio_pci driver through the SET_IRQ ioctl

- When the device encounters an error, the eventfd is signalled
  and the qemu eventfd handler gets invoked.

- In the handler decide what action to take. Current action taken
  is to stop the guest.

Signed-off-by: Vijay Mohan Pandarathil 
---
 hw/vfio_pci.c  | 123 +
 linux-headers/linux/vfio.h |   1 +
 2 files changed, 124 insertions(+)

diff --git a/hw/vfio_pci.c b/hw/vfio_pci.c
index ad9ae36..3c78771 100644
--- a/hw/vfio_pci.c
+++ b/hw/vfio_pci.c
@@ -38,6 +38,7 @@
 #include "qemu/error-report.h"
 #include "qemu/queue.h"
 #include "qemu/range.h"
+#include "sysemu/sysemu.h"
 
 /* #define DEBUG_VFIO */
 #ifdef DEBUG_VFIO
@@ -129,7 +130,9 @@ typedef struct VFIODevice {
 PCIHostDeviceAddress host;
 QLIST_ENTRY(VFIODevice) next;
 struct VFIOGroup *group;
+EventNotifier err_notifier;
 bool reset_works;
+bool pci_aer;
 } VFIODevice;
 
 typedef struct VFIOGroup {
@@ -1802,6 +1805,7 @@ static int vfio_get_device(VFIOGroup *group, const char 
*name, VFIODevice *vdev)
 {
 struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) };
 struct vfio_region_info reg_info = { .argsz = sizeof(reg_info) };
+struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info) };
 int ret, i;
 
 ret = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, name);
@@ -1904,6 +1908,18 @@ static int vfio_get_device(VFIOGroup *group, const char 
*name, VFIODevice *vdev)
 }
 vdev->config_offset = reg_info.offset;
 
+irq_info.index = VFIO_PCI_ERR_IRQ_INDEX;
+
+ret = ioctl(vdev->fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info);
+if (ret) {
+/* This can fail for an old kernel or legacy PCI dev */
+DPRINTF("VFIO_DEVICE_GET_IRQ_INFO failure ret=%d\n", ret);
+ret = 0;
+} else if (irq_info.count == 1) {
+vdev->pci_aer = true;
+} else {
+error_report("vfio: Warning: Could not enable error recovery for the 
device\n");
+}
 error:
 if (ret) {
 QLIST_REMOVE(vdev, next);
@@ -1925,6 +1941,110 @@ static void vfio_put_device(VFIODevice *vdev)
 }
 }
 
+static void vfio_err_notifier_handler(void *opaque)
+{
+VFIODevice *vdev = opaque;
+
+if (!event_notifier_test_and_clear(&vdev->err_notifier)) {
+return;
+}
+
+/*
+ * TBD. Retrieve the error details and decide what action
+ * needs to be taken. One of the actions could be to pass
+ * the error to the guest and have the guest driver recover
+ * from the error. This requires that PCIe capabilities be
+ * exposed to the guest. For now, we just terminate the
+ * guest to contain the error.
+ */
+
+error_report("%s (%04x:%02x:%02x.%x)"
+"Unrecoverable error detected...\n"
+"Please collect any data possible and then kill the guest",
+__func__, vdev->host.domain, vdev->host.bus,
+vdev->host.slot, vdev->host.function);
+
+vm_stop(RUN_STATE_IO_ERROR);
+}
+
+/*
+ * Registers error notifier for devices supporting error recovery.
+ * If we encounter a failure in this function, we report an error
+ * and continue after disabling error recovery support for the
+ * device.
+ */
+static void vfio_register_err_notifier(VFIODevice *vdev)
+{
+int ret;
+int argsz;
+struct vfio_irq_set *irq_set;
+int32_t *pfd;
+
+if (!vdev->pci_aer) {
+return;
+}
+
+if (event_notifier_init(&vdev->err_notifier, 0)) {
+error_report("vfio: Warning: Unable to init event notifier for error 
detection\n");
+vdev->pci_aer = false;
+return;
+}
+
+argsz = sizeof(*irq_set) + sizeof(*pfd);
+
+irq_set = g_malloc0(argsz);
+irq_set->argsz = argsz;
+irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
+ VFIO_IRQ_SET_ACTION_TRIGGER;
+irq_set->index = VFIO_PCI_ERR_IRQ_INDEX;
+irq_set->start = 0;
+irq_set->count = 1;
+pfd = (int32_t *)&irq_set->data;
+
+*pfd = event_notifier_get_fd(&vdev->err_notifier);
+qemu_set_fd_handler(*pfd, vfio_err_notifier_handler, NULL, vdev);
+
+ret = ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set);
+if (ret) {
+error_report("vfio: Failed to set up error notification\n");
+qemu_set_fd_handler(*pfd, NULL, NULL, vdev);
+event_notifier_cleanup(&vdev->err_notifier);
+vdev->pci_aer = false;
+}
+g_free(irq_set);
+}
+static void vfio_unregister_err_notifier(VFIODevice *vdev)
+{
+int argsz;
+struct vfio_irq_set *irq_set;
+int32_t *pfd;
+int ret;
+
+if (!

[PATCH v7 0/3] AER-KVM: Error containment of VFIO devices assigned to KVM guests

2013-03-08 Thread Vijay Mohan Pandarathil
Add support for error containment when a VFIO device assigned to a KVM
guest encounters an error. This is for PCIe devices/drivers that support AER
functionality. When the host OS is notified of an error in a device either
through the firmware first approach or through an interrupt handled by the AER
root port driver, the error handler registered by the vfio-pci driver gets
invoked. The qemu process is signaled through an eventfd registered per
VFIO device by the qemu process. In the eventfd handler, qemu decides on
what action to take. In this implementation, guest is brought down to
contain the error.


v7:
 - Rebased to latest upstream
 - Used device_lock() for synchronising err_trigger access
v6:
 - Rebased to latest upstream
 - Resolved merge conflict with vfio_dev_present()
v5:
 - Rebased to latest upstream stable bits
 - Incorporated v4 feedback
v4:
 - Stop the guest instead of terminating
 - Remove unwanted returns from functions
 - Incorporate other feedback
v3:
 - Removed PCI_AER* flags from device info ioctl.
 - Incorporated feedback
v2:
 - Rebased to latest upstream stable bits
 - Changed the new ioctl to be part of VFIO_SET_IRQs ioctl
 - Added a new patch to get/put reference to a vfio device from struct device
 - Incorporated all other feedback.

---

Vijay Mohan Pandarathil(3):

[PATCH 1/3] VFIO: Wrapper to get reference to vfio_device from device 
[PATCH 2/3] VFIO-AER: Vfio-pci driver changes for supporting AER
[PATCH 3/3] QEMU-AER: Qemu changes to support AER for VFIO-PCI devices

Kernel files changed

 drivers/vfio/vfio.c  | 30 +-
 include/linux/vfio.h |  3 +++
 2 files changed, 32 insertions(+), 1 deletion(-)

 drivers/vfio/pci/vfio_pci.c | 44 -
 drivers/vfio/pci/vfio_pci_intrs.c   | 64 +
 drivers/vfio/pci/vfio_pci_private.h |  1 +
 include/uapi/linux/vfio.h   |  1 +
 4 files changed, 109 insertions(+), 1 deletion(-)

Qemu files changed

 hw/vfio_pci.c  | 123 +
 linux-headers/linux/vfio.h |   1 +
 2 files changed, 124 insertions(+)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v7 1/3] VFIO: Wrapper for getting reference to vfio_device from device

2013-03-08 Thread Vijay Mohan Pandarathil
- Added vfio_device_get_from_dev() as wrapper to get
  reference to vfio_device from struct device.

- Added vfio_device_data() as a wrapper to get device_data from
  vfio_device.

Signed-off-by: Vijay Mohan Pandarathil 
---
 drivers/vfio/vfio.c  | 30 +-
 include/linux/vfio.h |  3 +++
 2 files changed, 32 insertions(+), 1 deletion(-)

diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index fcc12f3..21eddd9 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -392,12 +392,13 @@ static void vfio_device_release(struct kref *kref)
 }
 
 /* Device reference always implies a group reference */
-static void vfio_device_put(struct vfio_device *device)
+void vfio_device_put(struct vfio_device *device)
 {
struct vfio_group *group = device->group;
kref_put_mutex(&device->kref, vfio_device_release, &group->device_lock);
vfio_group_put(group);
 }
+EXPORT_SYMBOL_GPL(vfio_device_put);
 
 static void vfio_device_get(struct vfio_device *device)
 {
@@ -627,6 +628,33 @@ int vfio_add_group_dev(struct device *dev,
 }
 EXPORT_SYMBOL_GPL(vfio_add_group_dev);
 
+/**
+ * Get a reference to the vfio_device for a device that is known to
+ * be bound to a vfio driver.  The driver implicitly holds a
+ * vfio_device reference between vfio_add_group_dev and
+ * vfio_del_group_dev.  We can therefore use drvdata to increment
+ * that reference from the struct device.  This additional
+ * reference must be released by calling vfio_device_put.
+ */
+struct vfio_device *vfio_device_get_from_dev(struct device *dev)
+{
+   struct vfio_device *device = dev_get_drvdata(dev);
+
+   vfio_device_get(device);
+
+   return device;
+}
+EXPORT_SYMBOL_GPL(vfio_device_get_from_dev);
+
+/*
+ * Caller must hold a reference to the vfio_device
+ */
+void *vfio_device_data(struct vfio_device *device)
+{
+   return device->device_data;
+}
+EXPORT_SYMBOL_GPL(vfio_device_data);
+
 /* Given a referenced group, check if it contains the device */
 static bool vfio_dev_present(struct vfio_group *group, struct device *dev)
 {
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index ab9e862..ac8d488 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -45,6 +45,9 @@ extern int vfio_add_group_dev(struct device *dev,
  void *device_data);
 
 extern void *vfio_del_group_dev(struct device *dev);
+extern struct vfio_device *vfio_device_get_from_dev(struct device *dev);
+extern void vfio_device_put(struct vfio_device *device);
+extern void *vfio_device_data(struct vfio_device *device);
 
 /**
  * struct vfio_iommu_driver_ops - VFIO IOMMU driver callbacks
-- 
1.7.11.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v6 3/3] QEMU-AER: Qemu changes to support AER for VFIO-PCI devices

2013-03-01 Thread Vijay Mohan Pandarathil
- Create eventfd per vfio device assigned to a guest and register an
  event handler

- This fd is passed to the vfio_pci driver through the SET_IRQ ioctl

- When the device encounters an error, the eventfd is signalled
  and the qemu eventfd handler gets invoked.

- In the handler decide what action to take. Current action taken
  is to stop the guest.

Signed-off-by: Vijay Mohan Pandarathil 
---
 hw/vfio_pci.c  | 123 +
 linux-headers/linux/vfio.h |   1 +
 2 files changed, 124 insertions(+)

diff --git a/hw/vfio_pci.c b/hw/vfio_pci.c
index ad9ae36..3c78771 100644
--- a/hw/vfio_pci.c
+++ b/hw/vfio_pci.c
@@ -38,6 +38,7 @@
 #include "qemu/error-report.h"
 #include "qemu/queue.h"
 #include "qemu/range.h"
+#include "sysemu/sysemu.h"
 
 /* #define DEBUG_VFIO */
 #ifdef DEBUG_VFIO
@@ -129,7 +130,9 @@ typedef struct VFIODevice {
 PCIHostDeviceAddress host;
 QLIST_ENTRY(VFIODevice) next;
 struct VFIOGroup *group;
+EventNotifier err_notifier;
 bool reset_works;
+bool pci_aer;
 } VFIODevice;
 
 typedef struct VFIOGroup {
@@ -1802,6 +1805,7 @@ static int vfio_get_device(VFIOGroup *group, const char 
*name, VFIODevice *vdev)
 {
 struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) };
 struct vfio_region_info reg_info = { .argsz = sizeof(reg_info) };
+struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info) };
 int ret, i;
 
 ret = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, name);
@@ -1904,6 +1908,18 @@ static int vfio_get_device(VFIOGroup *group, const char 
*name, VFIODevice *vdev)
 }
 vdev->config_offset = reg_info.offset;
 
+irq_info.index = VFIO_PCI_ERR_IRQ_INDEX;
+
+ret = ioctl(vdev->fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info);
+if (ret) {
+/* This can fail for an old kernel or legacy PCI dev */
+DPRINTF("VFIO_DEVICE_GET_IRQ_INFO failure ret=%d\n", ret);
+ret = 0;
+} else if (irq_info.count == 1) {
+vdev->pci_aer = true;
+} else {
+error_report("vfio: Warning: Could not enable error recovery for the 
device\n");
+}
 error:
 if (ret) {
 QLIST_REMOVE(vdev, next);
@@ -1925,6 +1941,110 @@ static void vfio_put_device(VFIODevice *vdev)
 }
 }
 
+static void vfio_err_notifier_handler(void *opaque)
+{
+VFIODevice *vdev = opaque;
+
+if (!event_notifier_test_and_clear(&vdev->err_notifier)) {
+return;
+}
+
+/*
+ * TBD. Retrieve the error details and decide what action
+ * needs to be taken. One of the actions could be to pass
+ * the error to the guest and have the guest driver recover
+ * from the error. This requires that PCIe capabilities be
+ * exposed to the guest. For now, we just terminate the
+ * guest to contain the error.
+ */
+
+error_report("%s (%04x:%02x:%02x.%x)"
+"Unrecoverable error detected...\n"
+"Please collect any data possible and then kill the guest",
+__func__, vdev->host.domain, vdev->host.bus,
+vdev->host.slot, vdev->host.function);
+
+vm_stop(RUN_STATE_IO_ERROR);
+}
+
+/*
+ * Registers error notifier for devices supporting error recovery.
+ * If we encounter a failure in this function, we report an error
+ * and continue after disabling error recovery support for the
+ * device.
+ */
+static void vfio_register_err_notifier(VFIODevice *vdev)
+{
+int ret;
+int argsz;
+struct vfio_irq_set *irq_set;
+int32_t *pfd;
+
+if (!vdev->pci_aer) {
+return;
+}
+
+if (event_notifier_init(&vdev->err_notifier, 0)) {
+error_report("vfio: Warning: Unable to init event notifier for error 
detection\n");
+vdev->pci_aer = false;
+return;
+}
+
+argsz = sizeof(*irq_set) + sizeof(*pfd);
+
+irq_set = g_malloc0(argsz);
+irq_set->argsz = argsz;
+irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
+ VFIO_IRQ_SET_ACTION_TRIGGER;
+irq_set->index = VFIO_PCI_ERR_IRQ_INDEX;
+irq_set->start = 0;
+irq_set->count = 1;
+pfd = (int32_t *)&irq_set->data;
+
+*pfd = event_notifier_get_fd(&vdev->err_notifier);
+qemu_set_fd_handler(*pfd, vfio_err_notifier_handler, NULL, vdev);
+
+ret = ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set);
+if (ret) {
+error_report("vfio: Failed to set up error notification\n");
+qemu_set_fd_handler(*pfd, NULL, NULL, vdev);
+event_notifier_cleanup(&vdev->err_notifier);
+vdev->pci_aer = false;
+}
+g_free(irq_set);
+}
+static void vfio_unregister_err_notifier(VFIODevice *vdev)
+{
+int argsz;
+struct vfio_irq_set *irq_set;
+int32_t *pfd;
+int ret;
+
+if (!

[PATCH v6 2/3] VFIO-AER: Vfio-pci driver changes for supporting AER

2013-03-01 Thread Vijay Mohan Pandarathil
- New VFIO_SET_IRQ ioctl option to pass the eventfd that is signaled 
when
  an error occurs in the vfio_pci_device

- Register pci_error_handler for the vfio_pci driver

- When the device encounters an error, the error handler registered by
  the vfio_pci driver gets invoked by the AER infrastructure

- In the error handler, signal the eventfd registered for the device.

- This results in the qemu eventfd handler getting invoked and
  appropriate action taken for the guest.

Signed-off-by: Vijay Mohan Pandarathil 
---
 drivers/vfio/pci/vfio_pci.c | 44 -
 drivers/vfio/pci/vfio_pci_intrs.c   | 49 +
 drivers/vfio/pci/vfio_pci_private.h |  1 +
 include/uapi/linux/vfio.h   |  1 +
 4 files changed, 94 insertions(+), 1 deletion(-)

diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 8189cb6..acfcb1a 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -201,7 +201,9 @@ static int vfio_pci_get_irq_count(struct vfio_pci_device 
*vdev, int irq_type)
 
return (flags & PCI_MSIX_FLAGS_QSIZE) + 1;
}
-   }
+   } else if (irq_type == VFIO_PCI_ERR_IRQ_INDEX)
+   if (pci_is_pcie(vdev->pdev))
+   return 1;
 
return 0;
 }
@@ -317,6 +319,17 @@ static long vfio_pci_ioctl(void *device_data,
if (info.argsz < minsz || info.index >= VFIO_PCI_NUM_IRQS)
return -EINVAL;
 
+   switch (info.index) {
+   case VFIO_PCI_INTX_IRQ_INDEX ... VFIO_PCI_MSIX_IRQ_INDEX:
+   break;
+   case VFIO_PCI_ERR_IRQ_INDEX:
+   if (pci_is_pcie(vdev->pdev))
+   break;
+   /* pass thru to return error */
+   default:
+   return -EINVAL;
+   }
+
info.flags = VFIO_IRQ_INFO_EVENTFD;
 
info.count = vfio_pci_get_irq_count(vdev, info.index);
@@ -551,11 +564,40 @@ static void vfio_pci_remove(struct pci_dev *pdev)
kfree(vdev);
 }
 
+static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev,
+ pci_channel_state_t state)
+{
+   struct vfio_pci_device *vdev;
+   struct vfio_device *device;
+
+   device = vfio_device_get_from_dev(&pdev->dev);
+   if (device == NULL)
+   return PCI_ERS_RESULT_DISCONNECT;
+
+   vdev = vfio_device_data(device);
+   if (vdev == NULL) {
+   vfio_device_put(device);
+   return PCI_ERS_RESULT_DISCONNECT;
+   }
+
+   if (vdev->err_trigger)
+   eventfd_signal(vdev->err_trigger, 1);
+
+   vfio_device_put(device);
+
+   return PCI_ERS_RESULT_CAN_RECOVER;
+}
+
+static struct pci_error_handlers vfio_err_handlers = {
+   .error_detected = vfio_pci_aer_err_detected,
+};
+
 static struct pci_driver vfio_pci_driver = {
.name   = "vfio-pci",
.id_table   = NULL, /* only dynamic ids */
.probe  = vfio_pci_probe,
.remove = vfio_pci_remove,
+   .err_handler= &vfio_err_handlers,
 };
 
 static void __exit vfio_pci_cleanup(void)
diff --git a/drivers/vfio/pci/vfio_pci_intrs.c 
b/drivers/vfio/pci/vfio_pci_intrs.c
index 3639371..4a29830 100644
--- a/drivers/vfio/pci/vfio_pci_intrs.c
+++ b/drivers/vfio/pci/vfio_pci_intrs.c
@@ -745,6 +745,48 @@ static int vfio_pci_set_msi_trigger(struct vfio_pci_device 
*vdev,
return 0;
 }
 
+static int vfio_pci_set_err_trigger(struct vfio_pci_device *vdev,
+   unsigned index, unsigned start,
+   unsigned count, uint32_t flags, void *data)
+{
+   int32_t fd = *(int32_t *)data;
+
+   if ((index != VFIO_PCI_ERR_IRQ_INDEX) ||
+   !(flags & VFIO_IRQ_SET_DATA_TYPE_MASK))
+   return -EINVAL;
+
+   /* DATA_NONE/DATA_BOOL enables loopback testing */
+
+   if (flags & VFIO_IRQ_SET_DATA_NONE) {
+   if (vdev->err_trigger)
+   eventfd_signal(vdev->err_trigger, 1);
+   return 0;
+   } else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
+   uint8_t trigger = *(uint8_t *)data;
+   if (trigger && vdev->err_trigger)
+   eventfd_signal(vdev->err_trigger, 1);
+   return 0;
+   }
+
+   /* Handle SET_DATA_EVENTFD */
+
+   if (fd == -1) {
+   if (vdev->err_trigger)
+   eventfd_ctx_put(vdev->err_trigger);
+   vdev->err_trigger = NULL;
+   return 0;
+   } else if (fd >= 0) {
+   struct eventfd_ctx *efdctx;
+   efdctx = eventfd_ctx_fdget

[PATCH v6 1/3] VFIO: Wrapper for getting reference to vfio_device from device

2013-03-01 Thread Vijay Mohan Pandarathil
- Added vfio_device_get_from_dev() as wrapper to get
  reference to vfio_device from struct device.

- Added vfio_device_data() as a wrapper to get device_data from
  vfio_device.

Signed-off-by: Vijay Mohan Pandarathil 
---
 drivers/vfio/vfio.c  | 27 ++-
 include/linux/vfio.h |  3 +++
 2 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 28e2d5b..eec6674 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -407,12 +407,13 @@ static void vfio_device_release(struct kref *kref)
 }
 
 /* Device reference always implies a group reference */
-static void vfio_device_put(struct vfio_device *device)
+void vfio_device_put(struct vfio_device *device)
 {
struct vfio_group *group = device->group;
kref_put_mutex(&device->kref, vfio_device_release, &group->device_lock);
vfio_group_put(group);
 }
+EXPORT_SYMBOL_GPL(vfio_device_put);
 
 static void vfio_device_get(struct vfio_device *device)
 {
@@ -642,6 +643,30 @@ int vfio_add_group_dev(struct device *dev,
 }
 EXPORT_SYMBOL_GPL(vfio_add_group_dev);
 
+/**
+ * This does a get on the vfio_device from device.
+ * Callers of this function will have to call vfio_put_device() to
+ * remove the reference.
+ */
+struct vfio_device *vfio_device_get_from_dev(struct device *dev)
+{
+   struct vfio_device *device = dev_get_drvdata(dev);
+
+   vfio_device_get(device);
+
+   return device;
+}
+EXPORT_SYMBOL_GPL(vfio_device_get_from_dev);
+
+/*
+ * Caller must hold a reference to the vfio_device
+ */
+void *vfio_device_data(struct vfio_device *device)
+{
+   return device->device_data;
+}
+EXPORT_SYMBOL_GPL(vfio_device_data);
+
 /* Given a referenced group, check if it contains the device */
 static bool vfio_dev_present(struct vfio_group *group, struct device *dev)
 {
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index ab9e862..ac8d488 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -45,6 +45,9 @@ extern int vfio_add_group_dev(struct device *dev,
  void *device_data);
 
 extern void *vfio_del_group_dev(struct device *dev);
+extern struct vfio_device *vfio_device_get_from_dev(struct device *dev);
+extern void vfio_device_put(struct vfio_device *device);
+extern void *vfio_device_data(struct vfio_device *device);
 
 /**
  * struct vfio_iommu_driver_ops - VFIO IOMMU driver callbacks
-- 
1.7.11.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v6 0/3] AER-KVM: Error containment of VFIO devices assigned to KVM guests

2013-03-01 Thread Vijay Mohan Pandarathil
Add support for error containment when a VFIO device assigned to a KVM
guest encounters an error. This is for PCIe devices/drivers that support AER
functionality. When the host OS is notified of an error in a device either
through the firmware first approach or through an interrupt handled by the AER
root port driver, the error handler registered by the vfio-pci driver gets
invoked. The qemu process is signaled through an eventfd registered per
VFIO device by the qemu process. In the eventfd handler, qemu decides on
what action to take. In this implementation, guest is brought down to
contain the error.


v6:
 - Rebased to latest upstream
 - Resolved merge conflict with vfio_dev_present()
v5:
 - Rebased to latest upstream stable bits
 - Incorporated v4 feedback
v4:
 - Stop the guest instead of terminating
 - Remove unwanted returns from functions
 - Incorporate other feedback
v3:
 - Removed PCI_AER* flags from device info ioctl.
 - Incorporated feedback
v2:
 - Rebased to latest upstream stable bits
 - Changed the new ioctl to be part of VFIO_SET_IRQs ioctl
 - Added a new patch to get/put reference to a vfio device from struct device
 - Incorporated all other feedback.

---

Vijay Mohan Pandarathil(3):

[PATCH 1/3] VFIO: Wrapper to get reference to vfio_device from device 
[PATCH 2/3] VFIO-AER: Vfio-pci driver changes for supporting AER
[PATCH 3/3] QEMU-AER: Qemu changes to support AER for VFIO-PCI devices

Kernel files changed


 drivers/vfio/vfio.c  | 27 ++-
 include/linux/vfio.h |  3 +++
 2 files changed, 29 insertions(+), 1 deletion(-)

 drivers/vfio/pci/vfio_pci.c | 44 -
 drivers/vfio/pci/vfio_pci_intrs.c   | 49 +
 drivers/vfio/pci/vfio_pci_private.h |  1 +
 include/uapi/linux/vfio.h   |  1 +
 4 files changed, 94 insertions(+), 1 deletion(-)

Qemu files changed

 hw/vfio_pci.c  | 123 +
 linux-headers/linux/vfio.h |   1 +
 2 files changed, 124 insertions(+)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v5 3/3] QEMU-AER: Qemu changes to support AER for VFIO-PCI devices

2013-02-23 Thread Vijay Mohan Pandarathil
- Create eventfd per vfio device assigned to a guest and register an
  event handler

- This fd is passed to the vfio_pci driver through the SET_IRQ ioctl

- When the device encounters an error, the eventfd is signalled
  and the qemu eventfd handler gets invoked.

- In the handler decide what action to take. Current action taken
  is to stop the guest.

Signed-off-by: Vijay Mohan Pandarathil 
---
 hw/vfio_pci.c  | 123 +
 linux-headers/linux/vfio.h |   1 +
 2 files changed, 124 insertions(+)

diff --git a/hw/vfio_pci.c b/hw/vfio_pci.c
index ad9ae36..3c78771 100644
--- a/hw/vfio_pci.c
+++ b/hw/vfio_pci.c
@@ -38,6 +38,7 @@
 #include "qemu/error-report.h"
 #include "qemu/queue.h"
 #include "qemu/range.h"
+#include "sysemu/sysemu.h"
 
 /* #define DEBUG_VFIO */
 #ifdef DEBUG_VFIO
@@ -129,7 +130,9 @@ typedef struct VFIODevice {
 PCIHostDeviceAddress host;
 QLIST_ENTRY(VFIODevice) next;
 struct VFIOGroup *group;
+EventNotifier err_notifier;
 bool reset_works;
+bool pci_aer;
 } VFIODevice;
 
 typedef struct VFIOGroup {
@@ -1802,6 +1805,7 @@ static int vfio_get_device(VFIOGroup *group, const char 
*name, VFIODevice *vdev)
 {
 struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) };
 struct vfio_region_info reg_info = { .argsz = sizeof(reg_info) };
+struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info) };
 int ret, i;
 
 ret = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, name);
@@ -1904,6 +1908,18 @@ static int vfio_get_device(VFIOGroup *group, const char 
*name, VFIODevice *vdev)
 }
 vdev->config_offset = reg_info.offset;
 
+irq_info.index = VFIO_PCI_ERR_IRQ_INDEX;
+
+ret = ioctl(vdev->fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info);
+if (ret) {
+/* This can fail for an old kernel or legacy PCI dev */
+DPRINTF("VFIO_DEVICE_GET_IRQ_INFO failure ret=%d\n", ret);
+ret = 0;
+} else if (irq_info.count == 1) {
+vdev->pci_aer = true;
+} else {
+error_report("vfio: Warning: Could not enable error recovery for the 
device\n");
+}
 error:
 if (ret) {
 QLIST_REMOVE(vdev, next);
@@ -1925,6 +1941,110 @@ static void vfio_put_device(VFIODevice *vdev)
 }
 }
 
+static void vfio_err_notifier_handler(void *opaque)
+{
+VFIODevice *vdev = opaque;
+
+if (!event_notifier_test_and_clear(&vdev->err_notifier)) {
+return;
+}
+
+/*
+ * TBD. Retrieve the error details and decide what action
+ * needs to be taken. One of the actions could be to pass
+ * the error to the guest and have the guest driver recover
+ * from the error. This requires that PCIe capabilities be
+ * exposed to the guest. For now, we just terminate the
+ * guest to contain the error.
+ */
+
+error_report("%s (%04x:%02x:%02x.%x)"
+"Unrecoverable error detected...\n"
+"Please collect any data possible and then kill the guest",
+__func__, vdev->host.domain, vdev->host.bus,
+vdev->host.slot, vdev->host.function);
+
+vm_stop(RUN_STATE_IO_ERROR);
+}
+
+/*
+ * Registers error notifier for devices supporting error recovery.
+ * If we encounter a failure in this function, we report an error
+ * and continue after disabling error recovery support for the
+ * device.
+ */
+static void vfio_register_err_notifier(VFIODevice *vdev)
+{
+int ret;
+int argsz;
+struct vfio_irq_set *irq_set;
+int32_t *pfd;
+
+if (!vdev->pci_aer) {
+return;
+}
+
+if (event_notifier_init(&vdev->err_notifier, 0)) {
+error_report("vfio: Warning: Unable to init event notifier for error 
detection\n");
+vdev->pci_aer = false;
+return;
+}
+
+argsz = sizeof(*irq_set) + sizeof(*pfd);
+
+irq_set = g_malloc0(argsz);
+irq_set->argsz = argsz;
+irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
+ VFIO_IRQ_SET_ACTION_TRIGGER;
+irq_set->index = VFIO_PCI_ERR_IRQ_INDEX;
+irq_set->start = 0;
+irq_set->count = 1;
+pfd = (int32_t *)&irq_set->data;
+
+*pfd = event_notifier_get_fd(&vdev->err_notifier);
+qemu_set_fd_handler(*pfd, vfio_err_notifier_handler, NULL, vdev);
+
+ret = ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set);
+if (ret) {
+error_report("vfio: Failed to set up error notification\n");
+qemu_set_fd_handler(*pfd, NULL, NULL, vdev);
+event_notifier_cleanup(&vdev->err_notifier);
+vdev->pci_aer = false;
+}
+g_free(irq_set);
+}
+static void vfio_unregister_err_notifier(VFIODevice *vdev)
+{
+int argsz;
+struct vfio_irq_set *irq_set;
+int32_t *pfd;
+int ret;
+
+if (!

[PATCH v5 2/3] VFIO-AER: Vfio-pci driver changes for supporting AER

2013-02-23 Thread Vijay Mohan Pandarathil
- New VFIO_SET_IRQ ioctl option to pass the eventfd that is signaled 
when
  an error occurs in the vfio_pci_device

- Register pci_error_handler for the vfio_pci driver

- When the device encounters an error, the error handler registered by
  the vfio_pci driver gets invoked by the AER infrastructure

- In the error handler, signal the eventfd registered for the device.

- This results in the qemu eventfd handler getting invoked and
  appropriate action taken for the guest.

Signed-off-by: Vijay Mohan Pandarathil 
---
 drivers/vfio/pci/vfio_pci.c | 44 -
 drivers/vfio/pci/vfio_pci_intrs.c   | 49 +
 drivers/vfio/pci/vfio_pci_private.h |  1 +
 include/uapi/linux/vfio.h   |  1 +
 4 files changed, 94 insertions(+), 1 deletion(-)

diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index b28e66c..cd99321 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -196,7 +196,9 @@ static int vfio_pci_get_irq_count(struct vfio_pci_device 
*vdev, int irq_type)
 
return (flags & PCI_MSIX_FLAGS_QSIZE) + 1;
}
-   }
+   } else if (irq_type == VFIO_PCI_ERR_IRQ_INDEX)
+   if (pci_is_pcie(vdev->pdev))
+   return 1;
 
return 0;
 }
@@ -302,6 +304,17 @@ static long vfio_pci_ioctl(void *device_data,
if (info.argsz < minsz || info.index >= VFIO_PCI_NUM_IRQS)
return -EINVAL;
 
+   switch (info.index) {
+   case VFIO_PCI_INTX_IRQ_INDEX ... VFIO_PCI_MSIX_IRQ_INDEX:
+   break;
+   case VFIO_PCI_ERR_IRQ_INDEX:
+   if (pci_is_pcie(vdev->pdev))
+   break;
+   /* pass thru to return error */
+   default:
+   return -EINVAL;
+   }
+
info.flags = VFIO_IRQ_INFO_EVENTFD;
 
info.count = vfio_pci_get_irq_count(vdev, info.index);
@@ -538,11 +551,40 @@ static void vfio_pci_remove(struct pci_dev *pdev)
kfree(vdev);
 }
 
+static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev,
+ pci_channel_state_t state)
+{
+   struct vfio_pci_device *vdev;
+   struct vfio_device *device;
+
+   device = vfio_device_get_from_dev(&pdev->dev);
+   if (device == NULL)
+   return PCI_ERS_RESULT_DISCONNECT;
+
+   vdev = vfio_device_data(device);
+   if (vdev == NULL) {
+   vfio_device_put(device);
+   return PCI_ERS_RESULT_DISCONNECT;
+   }
+
+   if (vdev->err_trigger)
+   eventfd_signal(vdev->err_trigger, 1);
+
+   vfio_device_put(device);
+
+   return PCI_ERS_RESULT_CAN_RECOVER;
+}
+
+static struct pci_error_handlers vfio_err_handlers = {
+   .error_detected = vfio_pci_aer_err_detected,
+};
+
 static struct pci_driver vfio_pci_driver = {
.name   = "vfio-pci",
.id_table   = NULL, /* only dynamic ids */
.probe  = vfio_pci_probe,
.remove = vfio_pci_remove,
+   .err_handler= &vfio_err_handlers,
 };
 
 static void __exit vfio_pci_cleanup(void)
diff --git a/drivers/vfio/pci/vfio_pci_intrs.c 
b/drivers/vfio/pci/vfio_pci_intrs.c
index 3639371..4a29830 100644
--- a/drivers/vfio/pci/vfio_pci_intrs.c
+++ b/drivers/vfio/pci/vfio_pci_intrs.c
@@ -745,6 +745,48 @@ static int vfio_pci_set_msi_trigger(struct vfio_pci_device 
*vdev,
return 0;
 }
 
+static int vfio_pci_set_err_trigger(struct vfio_pci_device *vdev,
+   unsigned index, unsigned start,
+   unsigned count, uint32_t flags, void *data)
+{
+   int32_t fd = *(int32_t *)data;
+
+   if ((index != VFIO_PCI_ERR_IRQ_INDEX) ||
+   !(flags & VFIO_IRQ_SET_DATA_TYPE_MASK))
+   return -EINVAL;
+
+   /* DATA_NONE/DATA_BOOL enables loopback testing */
+
+   if (flags & VFIO_IRQ_SET_DATA_NONE) {
+   if (vdev->err_trigger)
+   eventfd_signal(vdev->err_trigger, 1);
+   return 0;
+   } else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
+   uint8_t trigger = *(uint8_t *)data;
+   if (trigger && vdev->err_trigger)
+   eventfd_signal(vdev->err_trigger, 1);
+   return 0;
+   }
+
+   /* Handle SET_DATA_EVENTFD */
+
+   if (fd == -1) {
+   if (vdev->err_trigger)
+   eventfd_ctx_put(vdev->err_trigger);
+   vdev->err_trigger = NULL;
+   return 0;
+   } else if (fd >= 0) {
+   struct eventfd_ctx *efdctx;
+   efdctx = eventfd_ctx_fdget

[PATCH v5 1/3] VFIO: Wrapper for getting reference to vfio_device from device

2013-02-23 Thread Vijay Mohan Pandarathil
- Added vfio_device_get_from_dev() as wrapper to get
  reference to vfio_device from struct device.

- Added vfio_device_data() as a wrapper to get device_data from
  vfio_device.

Signed-off-by: Vijay Mohan Pandarathil 
---
 drivers/vfio/vfio.c  | 47 ++-
 include/linux/vfio.h |  3 +++
 2 files changed, 37 insertions(+), 13 deletions(-)

diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 12c264d..863d1d3 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -407,12 +407,13 @@ static void vfio_device_release(struct kref *kref)
 }
 
 /* Device reference always implies a group reference */
-static void vfio_device_put(struct vfio_device *device)
+void vfio_device_put(struct vfio_device *device)
 {
struct vfio_group *group = device->group;
kref_put_mutex(&device->kref, vfio_device_release, &group->device_lock);
vfio_group_put(group);
 }
+EXPORT_SYMBOL_GPL(vfio_device_put);
 
 static void vfio_device_get(struct vfio_device *device)
 {
@@ -642,8 +643,12 @@ int vfio_add_group_dev(struct device *dev,
 }
 EXPORT_SYMBOL_GPL(vfio_add_group_dev);
 
-/* Test whether a struct device is present in our tracking */
-static bool vfio_dev_present(struct device *dev)
+/**
+ * This does a get on the vfio_device from device.
+ * Callers of this function will have to call vfio_put_device() to
+ * remove the reference.
+ */
+struct vfio_device *vfio_device_get_from_dev(struct device *dev)
 {
struct iommu_group *iommu_group;
struct vfio_group *group;
@@ -651,25 +656,41 @@ static bool vfio_dev_present(struct device *dev)
 
iommu_group = iommu_group_get(dev);
if (!iommu_group)
-   return false;
+   return NULL;
 
group = vfio_group_get_from_iommu(iommu_group);
if (!group) {
iommu_group_put(iommu_group);
-   return false;
+   return NULL;
}
 
device = vfio_group_get_device(group, dev);
-   if (!device) {
-   vfio_group_put(group);
-   iommu_group_put(iommu_group);
-   return false;
-   }
-
-   vfio_device_put(device);
vfio_group_put(group);
iommu_group_put(iommu_group);
-   return true;
+   return device;
+}
+EXPORT_SYMBOL_GPL(vfio_device_get_from_dev);
+
+/*
+ * Caller must hold a reference to the vfio_device
+ */
+void *vfio_device_data(struct vfio_device *device)
+{
+   return device->device_data;
+}
+EXPORT_SYMBOL_GPL(vfio_device_data);
+
+/* Test whether a struct device is present in our tracking */
+static bool vfio_dev_present(struct device *dev)
+{
+   struct vfio_device *device;
+
+   device = vfio_device_get_from_dev(dev);
+   if (device) {
+   vfio_device_put(device);
+   return true;
+   } else
+   return false;
 }
 
 /*
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index ab9e862..ac8d488 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -45,6 +45,9 @@ extern int vfio_add_group_dev(struct device *dev,
  void *device_data);
 
 extern void *vfio_del_group_dev(struct device *dev);
+extern struct vfio_device *vfio_device_get_from_dev(struct device *dev);
+extern void vfio_device_put(struct vfio_device *device);
+extern void *vfio_device_data(struct vfio_device *device);
 
 /**
  * struct vfio_iommu_driver_ops - VFIO IOMMU driver callbacks
-- 
1.7.11.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v5 0/3] AER-KVM: Error containment of VFIO devices assigned to KVM guests

2013-02-23 Thread Vijay Mohan Pandarathil
Add support for error containment when a VFIO device assigned to a KVM
guest encounters an error. This is for PCIe devices/drivers that support AER
functionality. When the host OS is notified of an error in a device either
through the firmware first approach or through an interrupt handled by the AER
root port driver, the error handler registered by the vfio-pci driver gets
invoked. The qemu process is signaled through an eventfd registered per
VFIO device by the qemu process. In the eventfd handler, qemu decides on
what action to take. In this implementation, guest is brought down to
contain the error.


v5:
 - Rebased to latest upstream stable bits
 - Incorporated v4 feedback
v4:
 - Stop the guest instead of terminating
 - Remove unwanted returns from functions
 - Incorporate other feedback
v3:
 - Removed PCI_AER* flags from device info ioctl.
 - Incorporated feedback
v2:
 - Rebased to latest upstream stable bits
 - Changed the new ioctl to be part of VFIO_SET_IRQs ioctl
 - Added a new patch to get/put reference to a vfio device from struct device
 - Incorporated all other feedback.

---

Vijay Mohan Pandarathil(3):

[PATCH 1/3] VFIO: Wrapper to get reference to vfio_device from device 
[PATCH 2/3] VFIO-AER: Vfio-pci driver changes for supporting AER
[PATCH 3/3] QEMU-AER: Qemu changes to support AER for VFIO-PCI devices

Kernel files changed

 drivers/vfio/vfio.c  | 47 ++-
 include/linux/vfio.h |  3 +++
 2 files changed, 37 insertions(+), 13 deletions(-)

 drivers/vfio/pci/vfio_pci.c | 44 -
 drivers/vfio/pci/vfio_pci_intrs.c   | 49 +
 drivers/vfio/pci/vfio_pci_private.h |  1 +
 include/uapi/linux/vfio.h   |  1 +
 4 files changed, 94 insertions(+), 1 deletion(-)

Qemu files changed

 hw/vfio_pci.c  | 123 +
 linux-headers/linux/vfio.h |   1 +
 2 files changed, 124 insertions(+)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v4 1/3] VFIO: Wrapper for getting reference to vfio_device from device

2013-02-14 Thread Vijay Mohan Pandarathil
- Added vfio_device_get_from_dev() as wrapper to get
  reference to vfio_device from struct device.

- Added vfio_device_data() as a wrapper to get device_data from
  vfio_device.

Signed-off-by: Vijay Mohan Pandarathil 
---
 drivers/vfio/vfio.c  | 47 ++-
 include/linux/vfio.h |  3 +++
 2 files changed, 37 insertions(+), 13 deletions(-)

diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index 12c264d..863d1d3 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -407,12 +407,13 @@ static void vfio_device_release(struct kref *kref)
 }
 
 /* Device reference always implies a group reference */
-static void vfio_device_put(struct vfio_device *device)
+void vfio_device_put(struct vfio_device *device)
 {
struct vfio_group *group = device->group;
kref_put_mutex(&device->kref, vfio_device_release, &group->device_lock);
vfio_group_put(group);
 }
+EXPORT_SYMBOL_GPL(vfio_device_put);
 
 static void vfio_device_get(struct vfio_device *device)
 {
@@ -642,8 +643,12 @@ int vfio_add_group_dev(struct device *dev,
 }
 EXPORT_SYMBOL_GPL(vfio_add_group_dev);
 
-/* Test whether a struct device is present in our tracking */
-static bool vfio_dev_present(struct device *dev)
+/**
+ * This does a get on the vfio_device from device.
+ * Callers of this function will have to call vfio_put_device() to
+ * remove the reference.
+ */
+struct vfio_device *vfio_device_get_from_dev(struct device *dev)
 {
struct iommu_group *iommu_group;
struct vfio_group *group;
@@ -651,25 +656,41 @@ static bool vfio_dev_present(struct device *dev)
 
iommu_group = iommu_group_get(dev);
if (!iommu_group)
-   return false;
+   return NULL;
 
group = vfio_group_get_from_iommu(iommu_group);
if (!group) {
iommu_group_put(iommu_group);
-   return false;
+   return NULL;
}
 
device = vfio_group_get_device(group, dev);
-   if (!device) {
-   vfio_group_put(group);
-   iommu_group_put(iommu_group);
-   return false;
-   }
-
-   vfio_device_put(device);
vfio_group_put(group);
iommu_group_put(iommu_group);
-   return true;
+   return device;
+}
+EXPORT_SYMBOL_GPL(vfio_device_get_from_dev);
+
+/*
+ * Caller must hold a reference to the vfio_device
+ */
+void *vfio_device_data(struct vfio_device *device)
+{
+   return device->device_data;
+}
+EXPORT_SYMBOL_GPL(vfio_device_data);
+
+/* Test whether a struct device is present in our tracking */
+static bool vfio_dev_present(struct device *dev)
+{
+   struct vfio_device *device;
+
+   device = vfio_device_get_from_dev(dev);
+   if (device) {
+   vfio_device_put(device);
+   return true;
+   } else
+   return false;
 }
 
 /*
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index ab9e862..ac8d488 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -45,6 +45,9 @@ extern int vfio_add_group_dev(struct device *dev,
  void *device_data);
 
 extern void *vfio_del_group_dev(struct device *dev);
+extern struct vfio_device *vfio_device_get_from_dev(struct device *dev);
+extern void vfio_device_put(struct vfio_device *device);
+extern void *vfio_device_data(struct vfio_device *device);
 
 /**
  * struct vfio_iommu_driver_ops - VFIO IOMMU driver callbacks
-- 
1.7.11.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH v4 2/3] VFIO-AER: Vfio-pci driver changes for supporting AER

2013-02-14 Thread Vijay Mohan Pandarathil
- New VFIO_SET_IRQ ioctl option to pass the eventfd that is signaled 
when
  an error occurs in the vfio_pci_device

- Register pci_error_handler for the vfio_pci driver

- When the device encounters an error, the error handler registered by
  the vfio_pci driver gets invoked by the AER infrastructure

- In the error handler, signal the eventfd registered for the device.

- This results in the qemu eventfd handler getting invoked and
  appropriate action taken for the guest.

Signed-off-by: Vijay Mohan Pandarathil 
---
 drivers/vfio/pci/vfio_pci.c | 44 +-
 drivers/vfio/pci/vfio_pci_intrs.c   | 47 +
 drivers/vfio/pci/vfio_pci_private.h |  1 +
 include/uapi/linux/vfio.h   |  1 +
 4 files changed, 92 insertions(+), 1 deletion(-)

diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index b28e66c..d70bd58 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -196,7 +196,9 @@ static int vfio_pci_get_irq_count(struct vfio_pci_device 
*vdev, int irq_type)
 
return (flags & PCI_MSIX_FLAGS_QSIZE) + 1;
}
-   }
+   } else if (irq_type == VFIO_PCI_ERR_IRQ_INDEX)
+   if (pci_is_pcie(vdev->pdev))
+   return 1;
 
return 0;
 }
@@ -302,6 +304,17 @@ static long vfio_pci_ioctl(void *device_data,
if (info.argsz < minsz || info.index >= VFIO_PCI_NUM_IRQS)
return -EINVAL;
 
+   switch (info.index) {
+   case VFIO_PCI_INTX_IRQ_INDEX ... VFIO_PCI_MSIX_IRQ_INDEX:
+   break;
+   case VFIO_PCI_ERR_IRQ_INDEX:
+   if (pci_is_pcie(vdev->pdev))
+   break;
+   /* pass thru to return error */
+   default:
+   return -EINVAL;
+   }
+
info.flags = VFIO_IRQ_INFO_EVENTFD;
 
info.count = vfio_pci_get_irq_count(vdev, info.index);
@@ -538,11 +551,40 @@ static void vfio_pci_remove(struct pci_dev *pdev)
kfree(vdev);
 }
 
+static pci_ers_result_t vfio_pci_aer_err_detected(struct pci_dev *pdev,
+ pci_channel_state_t state)
+{
+   struct vfio_pci_device *vpdev;
+   struct vfio_device *vdev;
+
+   vdev = vfio_device_get_from_dev(&pdev->dev);
+   if (vdev == NULL)
+   return PCI_ERS_RESULT_DISCONNECT;
+
+   vpdev = vfio_device_data(vdev);
+   if (vpdev == NULL) {
+   vfio_device_put(vdev);
+   return PCI_ERS_RESULT_DISCONNECT;
+   }
+
+   if (vpdev->err_trigger)
+   eventfd_signal(vpdev->err_trigger, 1);
+
+   vfio_device_put(vdev);
+
+   return PCI_ERS_RESULT_CAN_RECOVER;
+}
+
+static struct pci_error_handlers vfio_err_handlers = {
+   .error_detected = vfio_pci_aer_err_detected,
+};
+
 static struct pci_driver vfio_pci_driver = {
.name   = "vfio-pci",
.id_table   = NULL, /* only dynamic ids */
.probe  = vfio_pci_probe,
.remove = vfio_pci_remove,
+   .err_handler= &vfio_err_handlers,
 };
 
 static void __exit vfio_pci_cleanup(void)
diff --git a/drivers/vfio/pci/vfio_pci_intrs.c 
b/drivers/vfio/pci/vfio_pci_intrs.c
index 3639371..8036f3a 100644
--- a/drivers/vfio/pci/vfio_pci_intrs.c
+++ b/drivers/vfio/pci/vfio_pci_intrs.c
@@ -745,6 +745,46 @@ static int vfio_pci_set_msi_trigger(struct vfio_pci_device 
*vdev,
return 0;
 }
 
+static int vfio_pci_set_err_trigger(struct vfio_pci_device *vdev,
+   unsigned index, unsigned start,
+   unsigned count, uint32_t flags, void *data)
+{
+   int32_t fd = *(int32_t *)data;
+
+   if ((index != VFIO_PCI_ERR_IRQ_INDEX) ||
+   !(flags & VFIO_IRQ_SET_DATA_TYPE_MASK))
+   return -EINVAL;
+
+   /* DATA_NONE/DATA_BOOL enables loopback testing */
+
+   if (flags & VFIO_IRQ_SET_DATA_NONE) {
+   if (vdev->err_trigger)
+   eventfd_signal(vdev->err_trigger, 1);
+   return 0;
+   } else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
+   uint8_t trigger = *(uint8_t *)data;
+   if (trigger && vdev->err_trigger)
+   eventfd_signal(vdev->err_trigger, 1);
+   return 0;
+   }
+
+   /* Handle SET_DATA_EVENTFD */
+
+   if (fd == -1) {
+   if (vdev->err_trigger)
+   eventfd_ctx_put(vdev->err_trigger);
+   vdev->err_trigger = NULL;
+   return 0;
+   } else if (fd >= 0) {
+   struct eventfd_ctx *efdctx;
+   efdctx = eventfd_ctx_fdget

[PATCH v4 3/3] QEMU-AER: Qemu changes to support AER for VFIO-PCI devices

2013-02-14 Thread Vijay Mohan Pandarathil
- Create eventfd per vfio device assigned to a guest and register an
  event handler

- This fd is passed to the vfio_pci driver through the SET_IRQ ioctl

- When the device encounters an error, the eventfd is signalled
  and the qemu eventfd handler gets invoked.

- In the handler decide what action to take. Current action taken
  is to stop the guest.

Signed-off-by: Vijay Mohan Pandarathil 
---
 hw/vfio_pci.c  | 112 +
 linux-headers/linux/vfio.h |   1 +
 2 files changed, 113 insertions(+)

diff --git a/hw/vfio_pci.c b/hw/vfio_pci.c
index c51ae67..05da53b 100644
--- a/hw/vfio_pci.c
+++ b/hw/vfio_pci.c
@@ -38,6 +38,7 @@
 #include "qemu/error-report.h"
 #include "qemu/queue.h"
 #include "qemu/range.h"
+#include "sysemu/sysemu.h"
 
 /* #define DEBUG_VFIO */
 #ifdef DEBUG_VFIO
@@ -129,7 +130,9 @@ typedef struct VFIODevice {
 PCIHostDeviceAddress host;
 QLIST_ENTRY(VFIODevice) next;
 struct VFIOGroup *group;
+EventNotifier err_notifier;
 bool reset_works;
+bool pci_aer;
 } VFIODevice;
 
 typedef struct VFIOGroup {
@@ -1802,6 +1805,7 @@ static int vfio_get_device(VFIOGroup *group, const char 
*name, VFIODevice *vdev)
 {
 struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) };
 struct vfio_region_info reg_info = { .argsz = sizeof(reg_info) };
+struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info) };
 int ret, i;
 
 ret = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, name);
@@ -1901,6 +1905,15 @@ static int vfio_get_device(VFIOGroup *group, const char 
*name, VFIODevice *vdev)
 vdev->config_size = reg_info.size;
 vdev->config_offset = reg_info.offset;
 
+irq_info.index = VFIO_PCI_ERR_IRQ_INDEX;
+
+ret = ioctl(vdev->fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info);
+if (ret) {
+error_report("vfio: Error getting IRQ info: %m\n");
+goto error;
+}
+if (irq_info.count == 1)
+vdev->pci_aer = true;
 error:
 if (ret) {
 QLIST_REMOVE(vdev, next);
@@ -1922,6 +1935,102 @@ static void vfio_put_device(VFIODevice *vdev)
 }
 }
 
+static void vfio_err_notifier_handler(void *opaque)
+{
+VFIODevice *vdev = opaque;
+
+if (!event_notifier_test_and_clear(&vdev->err_notifier)) {
+return;
+}
+
+/*
+ * TBD. Retrieve the error details and decide what action
+ * needs to be taken. One of the actions could be to pass
+ * the error to the guest and have the guest driver recover
+ * from the error. This requires that PCIe capabilities be
+ * exposed to the guest. For now, we just terminate the
+ * guest to contain the error.
+ */
+
+error_report("%s (%04x:%02x:%02x.%x)"
+"Unrecoverable error detected...\n"
+"Please inestigate/collect any data required and then kill the quest",
+__func__, vdev->host.domain, vdev->host.bus,
+vdev->host.slot, vdev->host.function);
+
+vm_stop(RUN_STATE_IO_ERROR);
+}
+
+static void vfio_register_err_notifier(VFIODevice *vdev)
+{
+int ret;
+int argsz;
+struct vfio_irq_set *irq_set;
+int32_t *pfd;
+
+if (!vdev->pci_aer) {
+return;
+}
+
+if (event_notifier_init(&vdev->err_notifier, 0)) {
+error_report("vfio: Warning: Unable to init event notifier for error 
detection\n");
+return;
+}
+
+argsz = sizeof(*irq_set) + sizeof(*pfd);
+
+irq_set = g_malloc0(argsz);
+irq_set->argsz = argsz;
+irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
+ VFIO_IRQ_SET_ACTION_TRIGGER;
+irq_set->index = VFIO_PCI_ERR_IRQ_INDEX;
+irq_set->start = 0;
+irq_set->count = 1;
+pfd = (int32_t *)&irq_set->data;
+
+*pfd = event_notifier_get_fd(&vdev->err_notifier);
+qemu_set_fd_handler(*pfd, vfio_err_notifier_handler, NULL, vdev);
+
+ret = ioctl(vdev->fd, VFIO_DEVICE_SET_IRQS, irq_set);
+if (ret) {
+error_report("vfio: Failed to set up error notification\n");
+qemu_set_fd_handler(*pfd, NULL, NULL, vdev);
+event_notifier_cleanup(&vdev->err_notifier);
+}
+g_free(irq_set);
+}
+static void vfio_unregister_err_notifier(VFIODevice *vdev)
+{
+int argsz;
+struct vfio_irq_set *irq_set;
+int32_t *pfd;
+int ret;
+
+if (!vdev->pci_aer) {
+return;
+}
+
+argsz = sizeof(*irq_set) + sizeof(*pfd);
+
+irq_set = g_malloc0(argsz);
+irq_set->argsz = argsz;
+irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD |
+ VFIO_IRQ_SET_ACTION_TRIGGER;
+irq_set->index = VFIO_PCI_ERR_IRQ_INDEX;
+irq_set->start = 0;
+irq_set->count = 1;
+pfd = (int32_t *)&irq_set->data;
+*pfd = -1;
+
+ret = ioctl(vdev->fd, 

[PATCH v4 0/3] AER-KVM: Error containment of VFIO devices assigned to KVM guests

2013-02-14 Thread Vijay Mohan Pandarathil
Add support for error containment when a VFIO device assigned to a KVM
guest encounters an error. This is for PCIe devices/drivers that support AER
functionality. When the host OS is notified of an error in a device either
through the firmware first approach or through an interrupt handled by the AER
root port driver, the error handler registered by the vfio-pci driver gets
invoked. The qemu process is signaled through an eventfd registered per
VFIO device by the qemu process. In the eventfd handler, qemu decides on
what action to take. In this implementation, guest is brought down to
contain the error.


v4:
 - Stop the guest instead of terminating
 - Remove unwanted returns from functions
 - Incorporate other feedback
v3:
 - Removed PCI_AER* flags from device info ioctl.
 - Incorporated feedback
v2:
 - Rebased to latest upstream stable bits
 - Changed the new ioctl to be part of VFIO_SET_IRQs ioctl
 - Added a new patch to get/put reference to a vfio device from struct device
 - Incorporated all other feedback.

---

Vijay Mohan Pandarathil(3):

[PATCH 1/3] VFIO: Wrapper to get reference to vfio_device from device 
[PATCH 2/3] VFIO-AER: Vfio-pci driver changes for supporting AER
[PATCH 3/3] QEMU-AER: Qemu changes to support AER for VFIO-PCI devices

Kernel files changed

 drivers/vfio/vfio.c  | 47 ++-
 include/linux/vfio.h |  3 +++
 2 files changed, 37 insertions(+), 13 deletions(-)

 drivers/vfio/pci/vfio_pci.c | 44 +-
 drivers/vfio/pci/vfio_pci_intrs.c   | 47 +
 drivers/vfio/pci/vfio_pci_private.h |  1 +
 include/uapi/linux/vfio.h   |  1 +
 4 files changed, 92 insertions(+), 1 deletion(-)

Qemu files changed

 hw/vfio_pci.c  | 112 +
 linux-headers/linux/vfio.h |   1 +
 2 files changed, 113 insertions(+)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/