Implement suspend operation for vduse devices, so vhost-vdpa will offer
that backend feature and userspace can effectively suspend the device.

This is a must before get virtqueue indexes (base) for live migration,
since the device could modify them after userland gets them.

Signed-off-by: Eugenio Pérez <[email protected]>
---
This series depends on
https://lore.kernel.org/lkml/[email protected]
---
 drivers/vdpa/vdpa_user/vduse_dev.c | 86 +++++++++++++++++++++++++++++-
 include/uapi/linux/vduse.h         |  4 ++
 2 files changed, 88 insertions(+), 2 deletions(-)

diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c 
b/drivers/vdpa/vdpa_user/vduse_dev.c
index 59d9c4718d86..bdcc114e2710 100644
--- a/drivers/vdpa/vdpa_user/vduse_dev.c
+++ b/drivers/vdpa/vdpa_user/vduse_dev.c
@@ -54,7 +54,8 @@
 #define IRQ_UNBOUND -1
 
 /* Supported VDUSE features */
-static const uint64_t vduse_features = BIT_U64(VDUSE_F_QUEUE_READY);
+static const uint64_t vduse_features = BIT_U64(VDUSE_F_QUEUE_READY) |
+                                      BIT_U64(VDUSE_F_SUSPEND);
 
 /*
  * VDUSE instance have not asked the vduse API version, so assume 0.
@@ -85,6 +86,7 @@ struct vduse_virtqueue {
        int irq_effective_cpu;
        struct cpumask irq_affinity;
        struct kobject kobj;
+       struct vduse_dev *dev;
 };
 
 struct vduse_dev;
@@ -134,6 +136,7 @@ struct vduse_dev {
        int minor;
        bool broken;
        bool connected;
+       bool suspended;
        u64 api_version;
        u64 device_features;
        u64 driver_features;
@@ -480,6 +483,7 @@ static void vduse_dev_reset(struct vduse_dev *dev)
 
        down_write(&dev->rwsem);
 
+       dev->suspended = false;
        dev->status = 0;
        dev->driver_features = 0;
        dev->generation++;
@@ -559,6 +563,10 @@ static void vduse_vdpa_kick_vq(struct vdpa_device *vdpa, 
u16 idx)
        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
        struct vduse_virtqueue *vq = dev->vqs[idx];
 
+       guard(rwsem_read)(&vq->dev->rwsem);
+       if (vq->dev->suspended)
+               return;
+
        if (!eventfd_signal_allowed()) {
                schedule_work(&vq->kick);
                return;
@@ -896,6 +904,27 @@ static int vduse_vdpa_set_map(struct vdpa_device *vdpa,
        return 0;
 }
 
+static int vduse_vdpa_suspend(struct vdpa_device *vdpa)
+{
+       struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+       struct vduse_dev_msg msg = { 0 };
+       int ret;
+
+       msg.req.type = VDUSE_SUSPEND;
+
+       ret = vduse_dev_msg_sync(dev, &msg);
+       if (ret == 0) {
+               scoped_guard(rwsem_write, &dev->rwsem)
+                       dev->suspended = true;
+
+               cancel_work_sync(&dev->inject);
+               for (u32 i = 0; i < dev->vq_num; i++)
+                       cancel_work_sync(&dev->vqs[i]->inject);
+       }
+
+       return ret;
+}
+
 static void vduse_vdpa_free(struct vdpa_device *vdpa)
 {
        struct vduse_dev *dev = vdpa_to_vduse(vdpa);
@@ -937,6 +966,41 @@ static const struct vdpa_config_ops vduse_vdpa_config_ops 
= {
        .free                   = vduse_vdpa_free,
 };
 
+static const struct vdpa_config_ops vduse_vdpa_config_ops_with_suspend = {
+       .set_vq_address         = vduse_vdpa_set_vq_address,
+       .kick_vq                = vduse_vdpa_kick_vq,
+       .set_vq_cb              = vduse_vdpa_set_vq_cb,
+       .set_vq_num             = vduse_vdpa_set_vq_num,
+       .get_vq_size            = vduse_vdpa_get_vq_size,
+       .get_vq_group           = vduse_get_vq_group,
+       .set_vq_ready           = vduse_vdpa_set_vq_ready,
+       .get_vq_ready           = vduse_vdpa_get_vq_ready,
+       .set_vq_state           = vduse_vdpa_set_vq_state,
+       .get_vq_state           = vduse_vdpa_get_vq_state,
+       .get_vq_align           = vduse_vdpa_get_vq_align,
+       .get_device_features    = vduse_vdpa_get_device_features,
+       .set_driver_features    = vduse_vdpa_set_driver_features,
+       .get_driver_features    = vduse_vdpa_get_driver_features,
+       .set_config_cb          = vduse_vdpa_set_config_cb,
+       .get_vq_num_max         = vduse_vdpa_get_vq_num_max,
+       .get_device_id          = vduse_vdpa_get_device_id,
+       .get_vendor_id          = vduse_vdpa_get_vendor_id,
+       .get_status             = vduse_vdpa_get_status,
+       .set_status             = vduse_vdpa_set_status,
+       .get_config_size        = vduse_vdpa_get_config_size,
+       .get_config             = vduse_vdpa_get_config,
+       .set_config             = vduse_vdpa_set_config,
+       .get_generation         = vduse_vdpa_get_generation,
+       .set_vq_affinity        = vduse_vdpa_set_vq_affinity,
+       .get_vq_affinity        = vduse_vdpa_get_vq_affinity,
+       .reset                  = vduse_vdpa_reset,
+       .set_map                = vduse_vdpa_set_map,
+       .set_group_asid         = vduse_set_group_asid,
+       .get_vq_map             = vduse_get_vq_map,
+       .suspend                = vduse_vdpa_suspend,
+       .free                   = vduse_vdpa_free,
+};
+
 static void vduse_dev_sync_single_for_device(union virtio_map token,
                                             dma_addr_t dma_addr, size_t size,
                                             enum dma_data_direction dir)
@@ -1148,6 +1212,10 @@ static void vduse_dev_irq_inject(struct work_struct 
*work)
 {
        struct vduse_dev *dev = container_of(work, struct vduse_dev, inject);
 
+       guard(rwsem_read)(&dev->rwsem);
+       if (dev->suspended)
+               return;
+
        spin_lock_bh(&dev->irq_lock);
        if (dev->config_cb.callback)
                dev->config_cb.callback(dev->config_cb.private);
@@ -1159,6 +1227,10 @@ static void vduse_vq_irq_inject(struct work_struct *work)
        struct vduse_virtqueue *vq = container_of(work,
                                        struct vduse_virtqueue, inject);
 
+       guard(rwsem_read)(&vq->dev->rwsem);
+       if (vq->dev->suspended)
+               return;
+
        spin_lock_bh(&vq->irq_lock);
        if (vq->ready && vq->cb.callback)
                vq->cb.callback(vq->cb.private);
@@ -1189,6 +1261,9 @@ static int vduse_dev_queue_irq_work(struct vduse_dev *dev,
        int ret = -EINVAL;
 
        down_read(&dev->rwsem);
+       if (dev->suspended)
+               return ret;
+
        if (!(dev->status & VIRTIO_CONFIG_S_DRIVER_OK))
                goto unlock;
 
@@ -1839,6 +1914,7 @@ static int vduse_dev_init_vqs(struct vduse_dev *dev, u32 
vq_align, u32 vq_num)
                }
 
                dev->vqs[i]->index = i;
+               dev->vqs[i]->dev = dev;
                dev->vqs[i]->irq_effective_cpu = IRQ_UNBOUND;
                INIT_WORK(&dev->vqs[i]->inject, vduse_vq_irq_inject);
                INIT_WORK(&dev->vqs[i]->kick, vduse_vq_kick_work);
@@ -2311,12 +2387,18 @@ static struct vduse_mgmt_dev *vduse_mgmt;
 static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name)
 {
        struct vduse_vdpa *vdev;
+       const struct vdpa_config_ops *ops;
 
        if (dev->vdev)
                return -EEXIST;
 
+       if (dev->vduse_features & BIT_U64(VDUSE_F_SUSPEND))
+               ops = &vduse_vdpa_config_ops_with_suspend;
+       else
+               ops = &vduse_vdpa_config_ops;
+
        vdev = vdpa_alloc_device(struct vduse_vdpa, vdpa, dev->dev,
-                                &vduse_vdpa_config_ops, &vduse_map_ops,
+                                ops, &vduse_map_ops,
                                 dev->ngroups, dev->nas, name, true);
        if (IS_ERR(vdev))
                return PTR_ERR(vdev);
diff --git a/include/uapi/linux/vduse.h b/include/uapi/linux/vduse.h
index d39734cef6d3..95b93bc6bac5 100644
--- a/include/uapi/linux/vduse.h
+++ b/include/uapi/linux/vduse.h
@@ -21,6 +21,9 @@
 /* The VDUSE instance expects a request for vq ready */
 #define VDUSE_F_QUEUE_READY    0
 
+/* The VDUSE instance expects a request for suspend */
+#define VDUSE_F_SUSPEND                1
+
 /*
  * Get the version of VDUSE API that kernel supported (VDUSE_API_VERSION).
  * This is used for future extension.
@@ -338,6 +341,7 @@ enum vduse_req_type {
        VDUSE_UPDATE_IOTLB,
        VDUSE_SET_VQ_GROUP_ASID,
        VDUSE_SET_VQ_READY,
+       VDUSE_SUSPEND,
 };
 
 /**
-- 
2.53.0


Reply via email to