Re: [PATCH 03/31] Protect SCSI device state changes with a mutex

2017-05-24 Thread Bart Van Assche
On Wed, 2017-05-24 at 07:51 +0200, Hannes Reinecke wrote:
> On 05/24/2017 02:33 AM, Bart Van Assche wrote:
> > Enable this mechanism for all scsi_target_*block() callers but not
> > for the scsi_internal_device_unblock() calls from the mpt3sas driver
> > because that driver can call scsi_internal_device_unblock() from
> > atomic context.
> > 
> > Signed-off-by: Bart Van Assche 
> > Cc: Christoph Hellwig 
> > Cc: Hannes Reinecke 
> > Cc: Johannes Thumshirn 
> > ---
> >  drivers/scsi/scsi_error.c |  8 +++-
> >  drivers/scsi/scsi_lib.c   | 27 +--
> >  drivers/scsi/scsi_scan.c  | 16 +---
> >  drivers/scsi/scsi_sysfs.c | 24 +++-
> >  drivers/scsi/scsi_transport_srp.c |  7 ---
> >  drivers/scsi/sd.c |  7 +--
> >  include/scsi/scsi_device.h|  1 +
> >  7 files changed, 66 insertions(+), 24 deletions(-)
> > 
> 
> [ .. ]
> > diff --git a/drivers/scsi/scsi_transport_srp.c 
> > b/drivers/scsi/scsi_transport_srp.c
> > index 3c5d89852e9f..f617021c94f7 100644
> > --- a/drivers/scsi/scsi_transport_srp.c
> > +++ b/drivers/scsi/scsi_transport_srp.c
> > @@ -554,11 +554,12 @@ int srp_reconnect_rport(struct srp_rport *rport)
> >  * invoking scsi_target_unblock() won't change the state of
> >  * these devices into running so do that explicitly.
> >  */
> > -   spin_lock_irq(shost->host_lock);
> > -   __shost_for_each_device(sdev, shost)
> > +   shost_for_each_device(sdev, shost) {
> > +   mutex_lock(>state_mutex);
> > if (sdev->sdev_state == SDEV_OFFLINE)
> > sdev->sdev_state = SDEV_RUNNING;
> > -   spin_unlock_irq(shost->host_lock);
> > +   mutex_unlock(>state_mutex);
> > +   }
> > } else if (rport->state == SRP_RPORT_RUNNING) {
> > /*
> >  * srp_reconnect_rport() has been invoked with fast_io_fail
> 
> Why do you drop the host lock here? I thought that the host lock is
> needed to protect shost_for_each_device()?

Hello Hannes,

The only purpose of holding the host lock was to protect the SCSI device list
iteration by __shost_for_each_device(). shost_for_each_device() obtains that
lock itself. From :

#define shost_for_each_device(sdev, shost) \
for ((sdev) = __scsi_iterate_devices((shost), NULL); \
 (sdev); \
 (sdev) = __scsi_iterate_devices((shost), (sdev)))

>From drivers/scsi/scsi.c:

struct scsi_device *__scsi_iterate_devices(struct Scsi_Host *shost,
   struct scsi_device *prev)
{
struct list_head *list = (prev ? >siblings : >__devices);
struct scsi_device *next = NULL;
unsigned long flags;

spin_lock_irqsave(shost->host_lock, flags);
while (list->next != >__devices) {
next = list_entry(list->next, struct scsi_device, siblings);
/* skip devices that we can't get a reference to */
if (!scsi_device_get(next))
break;
next = NULL;
list = list->next;
}
spin_unlock_irqrestore(shost->host_lock, flags);

if (prev)
scsi_device_put(prev);
return next;
}

Bart.

Re: [PATCH 03/31] Protect SCSI device state changes with a mutex

2017-05-23 Thread Hannes Reinecke
On 05/24/2017 02:33 AM, Bart Van Assche wrote:
> Enable this mechanism for all scsi_target_*block() callers but not
> for the scsi_internal_device_unblock() calls from the mpt3sas driver
> because that driver can call scsi_internal_device_unblock() from
> atomic context.
> 
> Signed-off-by: Bart Van Assche 
> Cc: Christoph Hellwig 
> Cc: Hannes Reinecke 
> Cc: Johannes Thumshirn 
> ---
>  drivers/scsi/scsi_error.c |  8 +++-
>  drivers/scsi/scsi_lib.c   | 27 +--
>  drivers/scsi/scsi_scan.c  | 16 +---
>  drivers/scsi/scsi_sysfs.c | 24 +++-
>  drivers/scsi/scsi_transport_srp.c |  7 ---
>  drivers/scsi/sd.c |  7 +--
>  include/scsi/scsi_device.h|  1 +
>  7 files changed, 66 insertions(+), 24 deletions(-)
> 
[ .. ]
> diff --git a/drivers/scsi/scsi_transport_srp.c 
> b/drivers/scsi/scsi_transport_srp.c
> index 3c5d89852e9f..f617021c94f7 100644
> --- a/drivers/scsi/scsi_transport_srp.c
> +++ b/drivers/scsi/scsi_transport_srp.c
> @@ -554,11 +554,12 @@ int srp_reconnect_rport(struct srp_rport *rport)
>* invoking scsi_target_unblock() won't change the state of
>* these devices into running so do that explicitly.
>*/
> - spin_lock_irq(shost->host_lock);
> - __shost_for_each_device(sdev, shost)
> + shost_for_each_device(sdev, shost) {
> + mutex_lock(>state_mutex);
>   if (sdev->sdev_state == SDEV_OFFLINE)
>   sdev->sdev_state = SDEV_RUNNING;
> - spin_unlock_irq(shost->host_lock);
> + mutex_unlock(>state_mutex);
> + }
>   } else if (rport->state == SRP_RPORT_RUNNING) {
>   /*
>* srp_reconnect_rport() has been invoked with fast_io_fail
Why do you drop the host lock here? I thought that the host lock is
needed to protect shost_for_each_device()?

Cheers,

Hannes
-- 
Dr. Hannes ReineckeTeamlead Storage & Networking
h...@suse.de   +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
HRB 21284 (AG Nürnberg)


[PATCH 03/31] Protect SCSI device state changes with a mutex

2017-05-23 Thread Bart Van Assche
Enable this mechanism for all scsi_target_*block() callers but not
for the scsi_internal_device_unblock() calls from the mpt3sas driver
because that driver can call scsi_internal_device_unblock() from
atomic context.

Signed-off-by: Bart Van Assche 
Cc: Christoph Hellwig 
Cc: Hannes Reinecke 
Cc: Johannes Thumshirn 
---
 drivers/scsi/scsi_error.c |  8 +++-
 drivers/scsi/scsi_lib.c   | 27 +--
 drivers/scsi/scsi_scan.c  | 16 +---
 drivers/scsi/scsi_sysfs.c | 24 +++-
 drivers/scsi/scsi_transport_srp.c |  7 ---
 drivers/scsi/sd.c |  7 +--
 include/scsi/scsi_device.h|  1 +
 7 files changed, 66 insertions(+), 24 deletions(-)

diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index ecc07dab893d..ac3196420435 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -1628,11 +1628,17 @@ static void scsi_eh_offline_sdevs(struct list_head 
*work_q,
  struct list_head *done_q)
 {
struct scsi_cmnd *scmd, *next;
+   struct scsi_device *sdev;
 
list_for_each_entry_safe(scmd, next, work_q, eh_entry) {
sdev_printk(KERN_INFO, scmd->device, "Device offlined - "
"not ready after error recovery\n");
-   scsi_device_set_state(scmd->device, SDEV_OFFLINE);
+   sdev = scmd->device;
+
+   mutex_lock(>state_mutex);
+   scsi_device_set_state(sdev, SDEV_OFFLINE);
+   mutex_unlock(>state_mutex);
+
scsi_eh_finish_cmd(scmd, done_q);
}
return;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 7ed71db8c38a..3d82cbe605cd 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -2870,7 +2870,12 @@ static void scsi_wait_for_queuecommand(struct 
scsi_device *sdev)
 int
 scsi_device_quiesce(struct scsi_device *sdev)
 {
-   int err = scsi_device_set_state(sdev, SDEV_QUIESCE);
+   int err;
+
+   mutex_lock(>state_mutex);
+   err = scsi_device_set_state(sdev, SDEV_QUIESCE);
+   mutex_unlock(>state_mutex);
+
if (err)
return err;
 
@@ -2898,10 +2903,11 @@ void scsi_device_resume(struct scsi_device *sdev)
 * so assume the state is being managed elsewhere (for example
 * device deleted during suspend)
 */
-   if (sdev->sdev_state != SDEV_QUIESCE ||
-   scsi_device_set_state(sdev, SDEV_RUNNING))
-   return;
-   scsi_run_queue(sdev->request_queue);
+   mutex_lock(>state_mutex);
+   if (sdev->sdev_state == SDEV_QUIESCE &&
+   scsi_device_set_state(sdev, SDEV_RUNNING) == 0)
+   scsi_run_queue(sdev->request_queue);
+   mutex_unlock(>state_mutex);
 }
 EXPORT_SYMBOL(scsi_device_resume);
 
@@ -3000,6 +3006,7 @@ static int scsi_internal_device_block(struct scsi_device 
*sdev)
struct request_queue *q = sdev->request_queue;
int err;
 
+   mutex_lock(>state_mutex);
err = scsi_internal_device_block_nowait(sdev);
if (err == 0) {
if (q->mq_ops)
@@ -3007,6 +3014,8 @@ static int scsi_internal_device_block(struct scsi_device 
*sdev)
else
scsi_wait_for_queuecommand(sdev);
}
+   mutex_unlock(>state_mutex);
+
return err;
 }
  
@@ -3077,7 +3086,13 @@ EXPORT_SYMBOL_GPL(scsi_internal_device_unblock_nowait);
 static int scsi_internal_device_unblock(struct scsi_device *sdev,
enum scsi_device_state new_state)
 {
-   return scsi_internal_device_unblock_nowait(sdev, new_state);
+   int ret;
+
+   mutex_lock(>state_mutex);
+   ret = scsi_internal_device_unblock_nowait(sdev, new_state);
+   mutex_unlock(>state_mutex);
+
+   return ret;
 }
 
 static void
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 6f7128f49c30..e6de4eee97a3 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -231,6 +231,7 @@ static struct scsi_device *scsi_alloc_sdev(struct 
scsi_target *starget,
sdev->id = starget->id;
sdev->lun = lun;
sdev->channel = starget->channel;
+   mutex_init(>state_mutex);
sdev->sdev_state = SDEV_CREATED;
INIT_LIST_HEAD(>siblings);
INIT_LIST_HEAD(>same_target_siblings);
@@ -943,16 +944,17 @@ static int scsi_add_lun(struct scsi_device *sdev, 
unsigned char *inq_result,
 
/* set the device running here so that slave configure
 * may do I/O */
+   mutex_lock(>state_mutex);
ret = scsi_device_set_state(sdev, SDEV_RUNNING);
-   if (ret) {
+   if (ret)
ret = scsi_device_set_state(sdev, SDEV_BLOCK);
+   mutex_unlock(>state_mutex);
 
-   if (ret) {
-