Josef and Jens,

I am nacking my patch.

It looks like nbd only supported sockets that supported the shutdown
method, so I will fix the test and do a patch for nbd to warn when
unsupported sockets are passed in.


On 10/14/2019 01:36 AM, Mike Christie wrote:
> This fixes a regression added with:
> 
> commit e9e006f5fcf2bab59149cb38a48a4817c1b538b4
> Author: Mike Christie <mchri...@redhat.com>
> Date:   Sun Aug 4 14:10:06 2019 -0500
> 
>     nbd: fix max number of supported devs
> 
> Before the patch, if we got a signal in the NBD_DO_IT ioctl, we would
> call sock_shutdown then return immediately. The patch added a
> flush_workqueue call in that error handler path, so we now wait for
> the recv_work to complete. The problem is that some sockets do not
> implemnent a shutdown callout, so when sock_shutdown is called the
> flush_workqueue call is stuck waiting for the workqueue to break out
> of the sock_recvmsg call.
> 
> This patch just moves where we create/destroy the recv workqueue to the
> nbd_device and removes the flush_workqueue call, so we have the behavior
> we had before where when using the ioctl interface and we get a signal the
> recv works will be left running until module removal time when the devices
> are removed.
> 
> For the next kernel, I think we can do something more invasive like switch
> from workqueues to threads and use signals to break out of the recvmsg call
> (we had recently removed the last signal use for socket shutdown so I was
> not sure if we wanted to do this), or figure out a list of sockets families
> nbd supports and implement shutdown functions for them (I did not get a reply
> for that here https://lkml.org/lkml/2019/10/1/1323 so I'm assuming no one
> really knows and and I am still digging into that).
> 
> Reported-by: syzbot+24c12fa8d218ed260...@syzkaller.appspotmail.com
> Fixes: e9e006f5fcf2 ("nbd: fix max number of supported devs") 
> Signed-off-by: Mike Christie <mchri...@redhat.com>
> 
> ---
>  drivers/block/nbd.c | 33 ++++++++++++++++-----------------
>  1 file changed, 16 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
> index ac07e8c94c79..ee40799712d4 100644
> --- a/drivers/block/nbd.c
> +++ b/drivers/block/nbd.c
> @@ -222,6 +222,8 @@ static void nbd_dev_remove(struct nbd_device *nbd)
>       struct gendisk *disk = nbd->disk;
>       struct request_queue *q;
>  
> +     destroy_workqueue(nbd->recv_workq);
> +
>       if (disk) {
>               q = disk->queue;
>               del_gendisk(disk);
> @@ -1177,10 +1180,6 @@ static void nbd_config_put(struct nbd_device *nbd)
>               kfree(nbd->config);
>               nbd->config = NULL;
>  
> -             if (nbd->recv_workq)
> -                     destroy_workqueue(nbd->recv_workq);
> -             nbd->recv_workq = NULL;
> -
>               nbd->tag_set.timeout = 0;
>               nbd->disk->queue->limits.discard_granularity = 0;
>               nbd->disk->queue->limits.discard_alignment = 0;
> @@ -1209,14 +1208,6 @@ static int nbd_start_device(struct nbd_device *nbd)
>               return -EINVAL;
>       }
>  
> -     nbd->recv_workq = alloc_workqueue("knbd%d-recv",
> -                                       WQ_MEM_RECLAIM | WQ_HIGHPRI |
> -                                       WQ_UNBOUND, 0, nbd->index);
> -     if (!nbd->recv_workq) {
> -             dev_err(disk_to_dev(nbd->disk), "Could not allocate knbd recv 
> work queue.\n");
> -             return -ENOMEM;
> -     }
> -
>       blk_mq_update_nr_hw_queues(&nbd->tag_set, config->num_connections);
>       nbd->task_recv = current;
>  
> @@ -1267,10 +1258,8 @@ static int nbd_start_device_ioctl(struct nbd_device 
> *nbd, struct block_device *b
>       mutex_unlock(&nbd->config_lock);
>       ret = wait_event_interruptible(config->recv_wq,
>                                        atomic_read(&config->recv_threads) == 
> 0);
> -     if (ret) {
> +     if (ret)
>               sock_shutdown(nbd);
> -             flush_workqueue(nbd->recv_workq);
> -     }
>       mutex_lock(&nbd->config_lock);
>       nbd_bdev_reset(bdev);
>       /* user requested, ignore socket errors */
> @@ -1656,9 +1645,17 @@ static int nbd_dev_add(int index)
>       nbd->tag_set.driver_data = nbd;
>       nbd->destroy_complete = NULL;
>  
> +     nbd->recv_workq = alloc_workqueue("knbd%d-recv",
> +                                       WQ_MEM_RECLAIM | WQ_HIGHPRI |
> +                                       WQ_UNBOUND, 0, nbd->index);
> +     if (!nbd->recv_workq) {
> +             dev_err(disk_to_dev(nbd->disk), "Could not allocate knbd recv 
> work queue.\n");
> +             goto out_free_idr;
> +     }
> +
>       err = blk_mq_alloc_tag_set(&nbd->tag_set);
>       if (err)
> -             goto out_free_idr;
> +             goto out_free_wq;
>  
>       q = blk_mq_init_queue(&nbd->tag_set);
>       if (IS_ERR(q)) {
> @@ -1695,6 +1692,8 @@ static int nbd_dev_add(int index)
>  
>  out_free_tags:
>       blk_mq_free_tag_set(&nbd->tag_set);
> +out_free_wq:
> +     destroy_workqueue(nbd->recv_workq);
>  out_free_idr:
>       idr_remove(&nbd_index_idr, index);
>  out_free_disk:
> @@ -1949,7 +1948,7 @@ static void nbd_disconnect_and_put(struct nbd_device 
> *nbd)
>       mutex_unlock(&nbd->config_lock);
>       /*
>        * Make sure recv thread has finished, so it does not drop the last
> -      * config ref and try to destroy the workqueue from inside the work
> +      * nbd_device ref and try to destroy the workqueue from inside the work
>        * queue.
>        */
>       flush_workqueue(nbd->recv_workq);
> 

Reply via email to