device_lock is much too prone to lockups.  For instance if we have a
pending .remove then device_lock is already held.  If userspace
attempts to modify AER signaling after that point, a deadlock occurs.
eventfd setup/teardown is already protected in vfio with the igate
mutex.  AER is not a high performance interrupt, so we can also use
the same mutex to protect signaling versus setup races.

Signed-off-by: Alex Williamson <[email protected]>
---
 drivers/vfio/pci/vfio_pci.c       |    4 ++++
 drivers/vfio/pci/vfio_pci_intrs.c |   17 -----------------
 2 files changed, 4 insertions(+), 17 deletions(-)

diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 576e34e..468a9fb 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -872,9 +872,13 @@ static pci_ers_result_t vfio_pci_aer_err_detected(struct 
pci_dev *pdev,
                return PCI_ERS_RESULT_DISCONNECT;
        }
 
+       mutex_lock(&vdev->igate);
+
        if (vdev->err_trigger)
                eventfd_signal(vdev->err_trigger, 1);
 
+       mutex_unlock(&vdev->igate);
+
        vfio_device_put(device);
 
        return PCI_ERS_RESULT_CAN_RECOVER;
diff --git a/drivers/vfio/pci/vfio_pci_intrs.c 
b/drivers/vfio/pci/vfio_pci_intrs.c
index 641bc87..2103576 100644
--- a/drivers/vfio/pci/vfio_pci_intrs.c
+++ b/drivers/vfio/pci/vfio_pci_intrs.c
@@ -749,54 +749,37 @@ static int vfio_pci_set_err_trigger(struct 
vfio_pci_device *vdev,
                                    unsigned count, uint32_t flags, void *data)
 {
        int32_t fd = *(int32_t *)data;
-       struct pci_dev *pdev = vdev->pdev;
 
        if ((index != VFIO_PCI_ERR_IRQ_INDEX) ||
            !(flags & VFIO_IRQ_SET_DATA_TYPE_MASK))
                return -EINVAL;
 
-       /*
-        * device_lock synchronizes setting and checking of
-        * err_trigger. The vfio_pci_aer_err_detected() is also
-        * called with device_lock held.
-        */
-
        /* DATA_NONE/DATA_BOOL enables loopback testing */
-
        if (flags & VFIO_IRQ_SET_DATA_NONE) {
-               device_lock(&pdev->dev);
                if (vdev->err_trigger)
                        eventfd_signal(vdev->err_trigger, 1);
-               device_unlock(&pdev->dev);
                return 0;
        } else if (flags & VFIO_IRQ_SET_DATA_BOOL) {
                uint8_t trigger = *(uint8_t *)data;
-               device_lock(&pdev->dev);
                if (trigger && vdev->err_trigger)
                        eventfd_signal(vdev->err_trigger, 1);
-               device_unlock(&pdev->dev);
                return 0;
        }
 
        /* Handle SET_DATA_EVENTFD */
-
        if (fd == -1) {
-               device_lock(&pdev->dev);
                if (vdev->err_trigger)
                        eventfd_ctx_put(vdev->err_trigger);
                vdev->err_trigger = NULL;
-               device_unlock(&pdev->dev);
                return 0;
        } else if (fd >= 0) {
                struct eventfd_ctx *efdctx;
                efdctx = eventfd_ctx_fdget(fd);
                if (IS_ERR(efdctx))
                        return PTR_ERR(efdctx);
-               device_lock(&pdev->dev);
                if (vdev->err_trigger)
                        eventfd_ctx_put(vdev->err_trigger);
                vdev->err_trigger = efdctx;
-               device_unlock(&pdev->dev);
                return 0;
        } else
                return -EINVAL;

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to