When running multipath on a VM if all available paths go down
the driver can schedule large amounts of storvsc_remove_lun
work items to the same lun. In response to the failing paths
typically storvsc responds by taking host->scan_mutex and issuing
a TUR per lun. If there has been heavy IO to the failed device
all the failed IOs are returned from the host. A remove lun work
item is issued per failed IO. If the outstanding TURs have not been
completed in a timely manner the scan_mutex is never released or
released too late. Consequently the many remove lun work items are
not completed as scsi_remove_device also tries to take host->scan_mutex.
This results in dragging the VM down and sometimes completely.

This patch only allows one remove lun to be issued to a particular
lun while it is an instantiated member of the scsi stack.

Changes since v1:

Use single threaded workqueue to serialize work in
storvsc_handle_error [Christoph Hellwig]

Signed-off-by: Cathy Avery <cav...@redhat.com>
---
 drivers/scsi/storvsc_drv.c | 27 ++++++++++++++++++++++-----
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index 5e7200f..6febcdb 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -486,6 +486,8 @@ struct hv_host_device {
        unsigned int port;
        unsigned char path;
        unsigned char target;
+       struct workqueue_struct *handle_error_wq;
+       char work_q_name[20];
 };
 
 struct storvsc_scan_work {
@@ -922,6 +924,7 @@ static void storvsc_handle_error(struct vmscsi_request 
*vm_srb,
 {
        struct storvsc_scan_work *wrk;
        void (*process_err_fn)(struct work_struct *work);
+       struct hv_host_device *host_dev = shost_priv(host);
        bool do_work = false;
 
        switch (SRB_STATUS(vm_srb->srb_status)) {
@@ -988,7 +991,7 @@ static void storvsc_handle_error(struct vmscsi_request 
*vm_srb,
        wrk->lun = vm_srb->lun;
        wrk->tgt_id = vm_srb->target_id;
        INIT_WORK(&wrk->work, process_err_fn);
-       schedule_work(&wrk->work);
+       queue_work(host_dev->handle_error_wq, &wrk->work);
 }
 
 
@@ -1803,10 +1806,19 @@ static int storvsc_probe(struct hv_device *device,
        if (stor_device->num_sc != 0)
                host->nr_hw_queues = stor_device->num_sc + 1;
 
+       /*
+        * Set the error handler work queue.
+        */
+       snprintf(host_dev->work_q_name, sizeof(host_dev->work_q_name),
+                "storvsc_error_wq_%d", host->host_no);
+       host_dev->handle_error_wq =
+                       create_singlethread_workqueue(host_dev->work_q_name);
+       if (!host_dev->handle_error_wq)
+               goto err_out2;
        /* Register the HBA and start the scsi bus scan */
        ret = scsi_add_host(host, &device->device);
        if (ret != 0)
-               goto err_out2;
+               goto err_out3;
 
        if (!dev_is_ide) {
                scsi_scan_host(host);
@@ -1815,7 +1827,7 @@ static int storvsc_probe(struct hv_device *device,
                         device->dev_instance.b[4]);
                ret = scsi_add_device(host, 0, target, 0);
                if (ret)
-                       goto err_out3;
+                       goto err_out4;
        }
 #if IS_ENABLED(CONFIG_SCSI_FC_ATTRS)
        if (host->transportt == fc_transport_template) {
@@ -1827,14 +1839,17 @@ static int storvsc_probe(struct hv_device *device,
                fc_host_port_name(host) = stor_device->port_name;
                stor_device->rport = fc_remote_port_add(host, 0, &ids);
                if (!stor_device->rport)
-                       goto err_out3;
+                       goto err_out4;
        }
 #endif
        return 0;
 
-err_out3:
+err_out4:
        scsi_remove_host(host);
 
+err_out3:
+       destroy_workqueue(host_dev->handle_error_wq);
+
 err_out2:
        /*
         * Once we have connected with the host, we would need to
@@ -1858,6 +1873,7 @@ static int storvsc_remove(struct hv_device *dev)
 {
        struct storvsc_device *stor_device = hv_get_drvdata(dev);
        struct Scsi_Host *host = stor_device->host;
+       struct hv_host_device *host_dev = shost_priv(host);
 
 #if IS_ENABLED(CONFIG_SCSI_FC_ATTRS)
        if (host->transportt == fc_transport_template) {
@@ -1865,6 +1881,7 @@ static int storvsc_remove(struct hv_device *dev)
                fc_remove_host(host);
        }
 #endif
+       destroy_workqueue(host_dev->handle_error_wq);
        scsi_remove_host(host);
        storvsc_dev_remove(dev);
        scsi_host_put(host);
-- 
2.5.0

Reply via email to