On Tue, May 25, 2021 at 01:05:58PM -0500, Mike Christie wrote:
> This patch allows userspace to create workers and bind them to vqs, so you
> can have N workers per dev and also share N workers with M vqs. The next
> patch will allow sharing across devices.
> 
> Signed-off-by: Mike Christie <michael.chris...@oracle.com>
> ---
>  drivers/vhost/vhost.c            | 94 +++++++++++++++++++++++++++++++-
>  drivers/vhost/vhost.h            |  3 +
>  include/uapi/linux/vhost.h       |  6 ++
>  include/uapi/linux/vhost_types.h | 12 ++++
>  4 files changed, 113 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index 345ade0af133..981e9bac7a31 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -30,6 +30,7 @@
>  #include <linux/interval_tree_generic.h>
>  #include <linux/nospec.h>
>  #include <linux/kcov.h>
> +#include <linux/hashtable.h>
>  
>  #include "vhost.h"
>  
> @@ -42,6 +43,9 @@ module_param(max_iotlb_entries, int, 0444);
>  MODULE_PARM_DESC(max_iotlb_entries,
>       "Maximum number of iotlb entries. (default: 2048)");
>  
> +static DEFINE_HASHTABLE(vhost_workers, 5);
> +static DEFINE_SPINLOCK(vhost_workers_lock);
> +
>  enum {
>       VHOST_MEMORY_F_LOG = 0x1,
>  };
> @@ -617,8 +621,17 @@ static void vhost_detach_mm(struct vhost_dev *dev)
>       dev->mm = NULL;
>  }
>  
> -static void vhost_worker_free(struct vhost_worker *worker)
> +static void vhost_worker_put(struct vhost_worker *worker)
>  {
> +     spin_lock(&vhost_workers_lock);
> +     if (!refcount_dec_and_test(&worker->refcount)) {
> +             spin_unlock(&vhost_workers_lock);
> +             return;
> +     }
> +
> +     hash_del(&worker->h_node);
> +     spin_unlock(&vhost_workers_lock);
> +
>       WARN_ON(!llist_empty(&worker->work_list));
>       kthread_stop(worker->task);
>       kfree(worker);
> @@ -632,7 +645,7 @@ static void vhost_workers_free(struct vhost_dev *dev)
>               return;
>  
>       for (i = 0; i < dev->num_workers; i++)
> -             vhost_worker_free(dev->workers[i]);
> +             vhost_worker_put(dev->workers[i]);
>  
>       kfree(dev->workers);
>       dev->num_workers = 0;
> @@ -652,6 +665,8 @@ static struct vhost_worker *vhost_worker_create(struct 
> vhost_dev *dev)
>       worker->id = dev->num_workers;
>       worker->dev = dev;
>       init_llist_head(&worker->work_list);
> +     INIT_HLIST_NODE(&worker->h_node);
> +     refcount_set(&worker->refcount, 1);
>  
>       task = kthread_create(vhost_worker, worker, "vhost-%d", current->pid);
>       if (IS_ERR(task))
> @@ -664,6 +679,9 @@ static struct vhost_worker *vhost_worker_create(struct 
> vhost_dev *dev)
>       if (ret)
>               goto stop_worker;
>  
> +     spin_lock(&vhost_workers_lock);
> +     hash_add(vhost_workers, &worker->h_node, worker->task->pid);
> +     spin_unlock(&vhost_workers_lock);
>       return worker;
>  
>  stop_worker:
> @@ -673,6 +691,67 @@ static struct vhost_worker *vhost_worker_create(struct 
> vhost_dev *dev)
>       return NULL;
>  }
>  
> +static struct vhost_worker *vhost_worker_find(struct vhost_dev *dev, pid_t 
> pid)
> +{
> +     struct vhost_worker *worker, *found_worker = NULL;
> +
> +     spin_lock(&vhost_workers_lock);
> +     hash_for_each_possible(vhost_workers, worker, h_node, pid) {
> +             if (worker->task->pid == pid) {
> +                     /* tmp - next patch allows sharing across devs */
> +                     if (worker->dev != dev)
> +                             break;
> +
> +                     found_worker = worker;
> +                     refcount_inc(&worker->refcount);
> +                     break;
> +             }
> +     }
> +     spin_unlock(&vhost_workers_lock);
> +     return found_worker;
> +}
> +
> +/* Caller must have device mutex */
> +static int vhost_vq_set_worker(struct vhost_virtqueue *vq,
> +                            struct vhost_vring_worker *info)
> +{
> +     struct vhost_dev *dev = vq->dev;
> +     struct vhost_worker *worker;
> +
> +     if (vq->worker) {
> +             /* TODO - support changing while works are running */
> +             return -EBUSY;
> +     }
> +
> +     if (info->pid == VHOST_VRING_NEW_WORKER) {
> +             worker = vhost_worker_create(dev);

The maximum number of kthreads created is limited by
vhost_dev_init(nvqs)? For example VHOST_SCSI_MAX_VQ 128.

IIUC kthread_create is not limited by or accounted against the current
task, so I'm a little worried that a process can create a lot of
kthreads.

I haven't investigated other kthread_create() users reachable from
userspace applications to see how they bound the number of threads
effectively.

Any thoughts?

> +             if (!worker)
> +                     return -ENOMEM;
> +
> +             info->pid = worker->task->pid;
> +     } else {
> +             worker = vhost_worker_find(dev, info->pid);
> +             if (!worker)
> +                     return -ENODEV;
> +     }
> +
> +     if (!dev->workers) {
> +             dev->workers = kcalloc(vq->dev->nvqs,
> +                                    sizeof(struct vhost_worker *),
> +                                    GFP_KERNEL);

Another candidate for GFP_KERNEL_ACCOUNT.

> +             if (!dev->workers) {
> +                     vhost_worker_put(worker);
> +                     return -ENOMEM;
> +             }
> +     }
> +
> +     vq->worker = worker;
> +
> +     dev->workers[dev->num_workers] = worker;
> +     dev->num_workers++;

Hmm...should we really append to workers[] in the vhost_worker_find()
case?

> +     return 0;
> +}
> +
>  /* Caller must have device mutex */
>  static int vhost_worker_try_create_def(struct vhost_dev *dev)
>  {
> @@ -1680,6 +1759,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned 
> int ioctl, void __user *arg
>       struct eventfd_ctx *ctx = NULL;
>       u32 __user *idxp = argp;
>       struct vhost_virtqueue *vq;
> +     struct vhost_vring_worker w;
>       struct vhost_vring_state s;
>       struct vhost_vring_file f;
>       u32 idx;
> @@ -1794,6 +1874,15 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned 
> int ioctl, void __user *arg
>               if (copy_to_user(argp, &s, sizeof(s)))
>                       r = -EFAULT;
>               break;
> +     case VHOST_SET_VRING_WORKER:
> +             if (copy_from_user(&w, argp, sizeof(w))) {
> +                     r = -EFAULT;
> +                     break;
> +             }
> +             r = vhost_vq_set_worker(vq, &w);
> +             if (!r && copy_to_user(argp, &w, sizeof(w)))
> +                     r = -EFAULT;
> +             break;
>       default:
>               r = -ENOIOCTLCMD;
>       }
> @@ -2726,6 +2815,7 @@ EXPORT_SYMBOL_GPL(vhost_set_backend_features);
>  
>  static int __init vhost_init(void)
>  {
> +     hash_init(vhost_workers);
>       return 0;
>  }
>  
> diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
> index 0a252dd45101..75b884ad1f17 100644
> --- a/drivers/vhost/vhost.h
> +++ b/drivers/vhost/vhost.h
> @@ -14,6 +14,7 @@
>  #include <linux/atomic.h>
>  #include <linux/vhost_iotlb.h>
>  #include <linux/irqbypass.h>
> +#include <linux/refcount.h>
>  
>  struct vhost_work;
>  typedef void (*vhost_work_fn_t)(struct vhost_work *work);
> @@ -28,6 +29,8 @@ struct vhost_work {
>  struct vhost_worker {
>       struct task_struct      *task;
>       struct llist_head       work_list;
> +     struct hlist_node       h_node;

h_node is a generic name. If you're willing to use a longer name then
vhost_workers_node would make it clear which hlist this is associated
with.

> +     refcount_t              refcount;
>       struct vhost_dev        *dev;
>       int                     id;
>  };
> diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h
> index c998860d7bbc..ce32119cb139 100644
> --- a/include/uapi/linux/vhost.h
> +++ b/include/uapi/linux/vhost.h
> @@ -70,6 +70,12 @@
>  #define VHOST_VRING_BIG_ENDIAN 1
>  #define VHOST_SET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x13, struct 
> vhost_vring_state)
>  #define VHOST_GET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x14, struct 
> vhost_vring_state)
> +/* Create/bind a vhost worker to a virtqueue. If pid > 0 and matches an 
> existing
> + * vhost_worker thread it will be bound to the vq. If pid is
> + * VHOST_VRING_NEW_WORKER, then a new worker will be created and bound to the
> + * vq.
> + */

Please document when this ioctl must be called (before kick is set up).

> +#define VHOST_SET_VRING_WORKER _IOWR(VHOST_VIRTIO, 0x15, struct 
> vhost_vring_worker)
>  
>  /* The following ioctls use eventfd file descriptors to signal and poll
>   * for events. */
> diff --git a/include/uapi/linux/vhost_types.h 
> b/include/uapi/linux/vhost_types.h
> index f7f6a3a28977..5113baa8bc3e 100644
> --- a/include/uapi/linux/vhost_types.h
> +++ b/include/uapi/linux/vhost_types.h
> @@ -47,6 +47,18 @@ struct vhost_vring_addr {
>       __u64 log_guest_addr;
>  };
>  
> +#define VHOST_VRING_NEW_WORKER -1
> +
> +struct vhost_vring_worker {
> +     unsigned int index;
> +     /*
> +      * The pid of the vhost worker that the vq will be bound to. If
> +      * pid is VHOST_VRING_NEW_WORKER a new worker will be created and it's

s/it's/its/

> +      * pid will be returned in pid.
> +      */
> +     __kernel_pid_t pid;
> +};
> +
>  /* no alignment requirement */
>  struct vhost_iotlb_msg {
>       __u64 iova;
> -- 
> 2.25.1
> 

Attachment: signature.asc
Description: PGP signature

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

Reply via email to