[PATCH] Avoid that SCSI device removal through sysfs triggers a deadlock

2018-06-26 Thread Bart Van Assche
This patch avoids that self-removal triggers the following deadlock:

==
WARNING: possible circular locking dependency detected
4.18.0-rc2-dbg+ #5 Not tainted
--
modprobe/6539 is trying to acquire lock:
8323c4cd (kn->count#202){}, at: kernfs_remove_by_name_ns+0x45/0x90

but task is already holding lock:
a6ec2c69 (>scan_mutex){+.+.}, at: scsi_remove_host+0x21/0x150 
[scsi_mod]

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #1 (>scan_mutex){+.+.}:
   __mutex_lock+0xfe/0xc70
   mutex_lock_nested+0x1b/0x20
   scsi_remove_device+0x26/0x40 [scsi_mod]
   sdev_store_delete+0x27/0x30 [scsi_mod]
   dev_attr_store+0x3e/0x50
   sysfs_kf_write+0x87/0xa0
   kernfs_fop_write+0x190/0x230
   __vfs_write+0xd2/0x3b0
   vfs_write+0x101/0x270
   ksys_write+0xab/0x120
   __x64_sys_write+0x43/0x50
   do_syscall_64+0x77/0x230
   entry_SYSCALL_64_after_hwframe+0x49/0xbe

-> #0 (kn->count#202){}:
   lock_acquire+0xd2/0x260
   __kernfs_remove+0x424/0x4a0
   kernfs_remove_by_name_ns+0x45/0x90
   remove_files.isra.1+0x3a/0x90
   sysfs_remove_group+0x5c/0xc0
   sysfs_remove_groups+0x39/0x60
   device_remove_attrs+0x82/0xb0
   device_del+0x251/0x580
   __scsi_remove_device+0x19f/0x1d0 [scsi_mod]
   scsi_forget_host+0x37/0xb0 [scsi_mod]
   scsi_remove_host+0x9b/0x150 [scsi_mod]
   sdebug_driver_remove+0x4b/0x150 [scsi_debug]
   device_release_driver_internal+0x241/0x360
   device_release_driver+0x12/0x20
   bus_remove_device+0x1bc/0x290
   device_del+0x259/0x580
   device_unregister+0x1a/0x70
   sdebug_remove_adapter+0x8b/0xf0 [scsi_debug]
   scsi_debug_exit+0x76/0xe8 [scsi_debug]
   __x64_sys_delete_module+0x1c1/0x280
   do_syscall_64+0x77/0x230
   entry_SYSCALL_64_after_hwframe+0x49/0xbe

other info that might help us debug this:

 Possible unsafe locking scenario:

   CPU0CPU1
   
  lock(>scan_mutex);
   lock(kn->count#202);
   lock(>scan_mutex);
  lock(kn->count#202);

 *** DEADLOCK ***

2 locks held by modprobe/6539:
 #0: efaf9298 (>mutex){}, at: 
device_release_driver_internal+0x68/0x360
 #1: a6ec2c69 (>scan_mutex){+.+.}, at: 
scsi_remove_host+0x21/0x150 [scsi_mod]

stack backtrace:
CPU: 10 PID: 6539 Comm: modprobe Not tainted 4.18.0-rc2-dbg+ #5
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 
1.0.0-prebuilt.qemu-project.org 04/01/2014
Call Trace:
 dump_stack+0xa4/0xf5
 print_circular_bug.isra.34+0x213/0x221
 __lock_acquire+0x1a7e/0x1b50
 lock_acquire+0xd2/0x260
 __kernfs_remove+0x424/0x4a0
 kernfs_remove_by_name_ns+0x45/0x90
 remove_files.isra.1+0x3a/0x90
 sysfs_remove_group+0x5c/0xc0
 sysfs_remove_groups+0x39/0x60
 device_remove_attrs+0x82/0xb0
 device_del+0x251/0x580
 __scsi_remove_device+0x19f/0x1d0 [scsi_mod]
 scsi_forget_host+0x37/0xb0 [scsi_mod]
 scsi_remove_host+0x9b/0x150 [scsi_mod]
 sdebug_driver_remove+0x4b/0x150 [scsi_debug]
 device_release_driver_internal+0x241/0x360
 device_release_driver+0x12/0x20
 bus_remove_device+0x1bc/0x290
 device_del+0x259/0x580
 device_unregister+0x1a/0x70
 sdebug_remove_adapter+0x8b/0xf0 [scsi_debug]
 scsi_debug_exit+0x76/0xe8 [scsi_debug]
 __x64_sys_delete_module+0x1c1/0x280
 do_syscall_64+0x77/0x230
 entry_SYSCALL_64_after_hwframe+0x49/0xbe

See also https://www.mail-archive.com/linux-scsi@vger.kernel.org/msg54525.html.

Suggested-by: Eric W. Biederman 
Fixes: ac0ece9174ac ("scsi: use device_remove_file_self() instead of 
device_schedule_callback()")
Signed-off-by: Bart Van Assche 
Cc: Eric W. Biederman 
Cc: Tejun Heo 
Cc: Hannes Reinecke 
Cc: Johannes Thumshirn 
Cc: Ingo Molnar 
Cc: Oleg Nesterov 
Cc: 
---
 drivers/scsi/scsi_sysfs.c | 48 +++
 kernel/task_work.c|  1 +
 2 files changed, 45 insertions(+), 4 deletions(-)

diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 7943b762c12d..f14e92f1d292 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -12,6 +12,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -718,14 +719,53 @@ store_rescan_field (struct device *dev, struct 
device_attribute *attr,
 }
 static DEVICE_ATTR(rescan, S_IWUSR, NULL, store_rescan_field);
 
+struct remove_dev_work {
+   struct callback_headhead;
+   struct scsi_device  *sdev;
+};
+
+static void delete_sdev(struct callback_head *head)
+{
+   struct remove_dev_work *work = container_of(head, typeof(*work), head);
+   struct scsi_device *sdev = work->sdev;
+
+   scsi_remove_device(sdev);
+   kfree(work);
+   scsi_device_put(sdev);
+}
+
 static ssize_t
 sdev_store_delete(struct device *dev, struct device_attribute 

Re: [PATCH] Avoid that SCSI device removal through sysfs triggers a deadlock

2016-11-10 Thread Eric W. Biederman
James Bottomley  writes:

> On Tue, 2016-11-08 at 20:10 -0600, Eric W. Biederman wrote:
>> James Bottomley  writes:
>> 
>> > On Tue, 2016-11-08 at 18:57 -0600, Eric W. Biederman wrote:
> [...]
>> > > I am pretty certain that if you are going to make 
>> > > kernfs_remove_self and kernfs_remove_by_name_ns to be safe to 
>> > > race against each other, not just the KERNFS_SUICIDAL check, but 
>> > > the wait when KERNFS_SUICIDAL is set needs to be added ot
>> > > kernfs_remove_by_name_ns.
>> > 
>> > I don't think you can do that: waiting for SUICIDED would introduce
>> > another potential lock entanglement.  I'm reasonably happy that the
>> > deactivation offset coupled with kernfs_drain in the non self 
>> > remove path means that the necessary cleanup is done when the 
>> > directory itself is removed.  That seems to be a common pattern in 
>> > all non-self removes.
>> 
>> But if we don't I am pretty certain there will be asynchrounous 
>> behavior in some cases that could potentially confuse userspace.
>
> But the original behaviour kernfs_remove_self() eliminated was the
> asynchronous callback.  If we go back to that, we're definitely going
> to introduce far more asynchronous behaviour.

Not from a userspace perspective if we use task_work_add(current,...).

In that case we simply get to do an asynchrnous looking thing before we
return to userspace.  So an asynchronous form, but not actually
asynchronous actions.

>> Which is partly why I would like to kill kernfs_remove_self.
>
> I took a look at it.  It's definitely not cleanly revertible given
> what's gone on since.  Even just trying to excise it is going to be
> hard given all the tentacles it has.

Well changing the users and removing the code from kernfs and sysfs
doesn't look hard.  There are only 5-7 users of this insane and broken
remove self thing.

I think even if we do the naive thing and don't use any helpers in
kernfs, sysfs, or the device subsystem we could easily wind up with less
code in the kernel.  Certainly it will be code that is simpler and
easier to get right.

>> Using task_work_add(current, ...) as I posted earlier let's us retain
>> the synchronous property of the current API.
>> 
>> While we debate the details I am happy to look at scsi as a special 
>> case and solve for scsi.  Then when we have the details worked out we 
>> can go fix the other cases.  Given my preliminary patch in my last 
>> reply it looks very straight forward to fix this sanely.
>
> I don't think there's any urgency to fix SCSI.  You can only really
> trigger this by hammering the device and host remove paths, which isn't
> what users normally do ... as the fact it's been in the field for 2.5
> years with no apparent problems shows.

Sure little urgency.  But scsi makes a good concrete example for which
a reproducer is known.  So it is a good test ground.

> I'd like Greg and Tejun to weigh in on this before we start doing
> something, since they created the initial problem.

Fair.  Although I wouldn't be surprised if we don't hear anything short
of a concrete patch that changes everything.

Eric

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Avoid that SCSI device removal through sysfs triggers a deadlock

2016-11-10 Thread James Bottomley
On Tue, 2016-11-08 at 20:10 -0600, Eric W. Biederman wrote:
> James Bottomley  writes:
> 
> > On Tue, 2016-11-08 at 18:57 -0600, Eric W. Biederman wrote:
[...]
> > > I am pretty certain that if you are going to make 
> > > kernfs_remove_self and kernfs_remove_by_name_ns to be safe to 
> > > race against each other, not just the KERNFS_SUICIDAL check, but 
> > > the wait when KERNFS_SUICIDAL is set needs to be added ot
> > > kernfs_remove_by_name_ns.
> > 
> > I don't think you can do that: waiting for SUICIDED would introduce
> > another potential lock entanglement.  I'm reasonably happy that the
> > deactivation offset coupled with kernfs_drain in the non self 
> > remove path means that the necessary cleanup is done when the 
> > directory itself is removed.  That seems to be a common pattern in 
> > all non-self removes.
> 
> But if we don't I am pretty certain there will be asynchrounous 
> behavior in some cases that could potentially confuse userspace.

But the original behaviour kernfs_remove_self() eliminated was the
asynchronous callback.  If we go back to that, we're definitely going
to introduce far more asynchronous behaviour.

> Which is partly why I would like to kill kernfs_remove_self.

I took a look at it.  It's definitely not cleanly revertible given
what's gone on since.  Even just trying to excise it is going to be
hard given all the tentacles it has.

> > > And I suspect if you add the appropriate lockdep annotations to 
> > > that mess you will find yourself in a similar but related ABBA
> > > deadlock.
> > 
> > I can't prove the negative, but as long as there's no waiting on 
> > the SUICIDED/AL flags in the non-self remove path, I believe we're 
> > safe with the current patch.
> > 
> > > Which is why I would like a simpler easier to understand 
> > > mechanism if we can.
> > 
> > I don't disagree: If you want to clean out the Augean Stables, I 
> > can lend you the thigh length rubber boots and the gas mask. 
> >  However, I think that what we're currently proposing: a simple 
> > patch to make device_remove_file_self() actually work for everyone, 
> > along with stringent testing is the better approach.
> > 
> > After all, if you look at
> > 
> > commit ac0ece9174aca9aa895ce0accc54f1f8ff12d117
> > Author: Tejun Heo 
> > Date:   Mon Feb 3 14:03:03 2014 -0500
> > 
> > scsi: use device_remove_file_self() instead of
> > device_schedule_callback()
> > 
> > You'll see Tejun added all this stuff just to remove the async 
> > callback we originally had.  Simply restoring the async callback 
> > back makes us quite considerably worse off because the 
> > device_remove_file_self() mechanism is in use elsewhere.  We need 
> > either to fix it and move on or junk it and go back to the
> > original.
> 
> I vote we junk it and go back to something that resembles the 
> original.  there are only about 5 other callers so this isn't that
> bad to do.
> 
> Tejun's work clearly added this deadlock 2 and a half years ago, and
> it was nasty enough no one noticed until recently.
> 
> Using task_work_add(current, ...) as I posted earlier let's us retain
> the synchronous property of the current API.
> 
> While we debate the details I am happy to look at scsi as a special 
> case and solve for scsi.  Then when we have the details worked out we 
> can go fix the other cases.  Given my preliminary patch in my last 
> reply it looks very straight forward to fix this sanely.

I don't think there's any urgency to fix SCSI.  You can only really
trigger this by hammering the device and host remove paths, which isn't
what users normally do ... as the fact it's been in the field for 2.5
years with no apparent problems shows.

I'd like Greg and Tejun to weigh in on this before we start doing
something, since they created the initial problem.

James

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Avoid that SCSI device removal through sysfs triggers a deadlock

2016-11-08 Thread Eric W. Biederman
James Bottomley  writes:

> On Tue, 2016-11-08 at 18:57 -0600, Eric W. Biederman wrote:
>> James Bottomley  writes:
>> 
>> > On Tue, 2016-11-08 at 13:13 -0600, Eric W. Biederman wrote:
> [...]
>> > > Is it really the dropping of the lock that is causing this?
>> > > I don't see that when I read those traces.
>> > 
>> > No, it's an ABBA lock inversion that causes this.  The traces are
>> > somewhat dense, but they say it here:
>> > 
>> >  Possible unsafe locking scenario:
>> >CPU0CPU1
>> >
>> >   lock(s_active#336);
>> >lock(>scan_mutex);
>> >lock(s_active#336);
>> >   lock(>scan_mutex);
>> > 
>> >  *** DEADLOCK ***
>> > 
>> > The detailed explanation of this is here:
>> > 
>> > http://marc.info/?l=linux-scsi=147855187425596
>> > 
>> > The fix is ensuring that the CPU1 thread doesn't get into taking
>> > s_active if CPU0 already has it using the KERNFS_SUICIDED/AL flag 
>> > as an indicator.
>> 
>> So.  The kernfs code does not look safe to have kernfs_remove_self
>> and kernfs_remove_by_name_ns racing against each other I agree.
>> 
>> The kernfs_remove_self path turns KERNFS_SUICIDAL into another 
>> blocking lock by another name, and without lockdep annotations so I 
>> don't know that it is safe.
>
> Yes ... the number of hand rolled locks in that code make it super hard
> to follow.
>
>> The relevant bit from kernfs_remove_self is:
>> 
>>  if (!(kn->flags & KERNFS_SUICIDAL)) {
>>  kn->flags |= KERNFS_SUICIDAL;
>>  __kernfs_remove(kn);
>>  kn->flags |= KERNFS_SUICIDED;
>>  ret = true;
>>  } else {
>>  wait_queue_head_t *waitq = _root(kn)
>> ->deactivate_waitq;
>>  DEFINE_WAIT(wait);
>> 
>>  while (true) {
>>  prepare_to_wait(waitq, ,
>> TASK_UNINTERRUPTIBLE);
>> 
>>  if ((kn->flags & KERNFS_SUICIDED) &&
>>  atomic_read(>active) ==
>> KN_DEACTIVATED_BIAS)
>>  break;
>> 
>>  mutex_unlock(_mutex);
>>  schedule();
>>  mutex_lock(_mutex);
>>  }
>>  finish_wait(waitq, );
>>  WARN_ON_ONCE(!RB_EMPTY_NODE(>rb));
>>  ret = false;
>>  }
>> 
>> I am pretty certain that if you are going to make kernfs_remove_self 
>> and kernfs_remove_by_name_ns to be safe to race against each other, 
>> not just the KERNFS_SUICIDAL check, but the wait when KERNFS_SUICIDAL 
>> is set needs to be added ot kernfs_remove_by_name_ns.
>
> I don't think you can do that: waiting for SUICIDED would introduce
> another potential lock entanglement.  I'm reasonably happy that the
> deactivation offset coupled with kernfs_drain in the non self remove
> path means that the necessary cleanup is done when the directory itself
> is removed.  That seems to be a common pattern in all non-self
> removes.

But if we don't I am pretty certain there will be asynchrounous behavior
in some cases that could potentially confuse userspace.

Which is partly why I would like to kill kernfs_remove_self.

>> And I suspect if you add the appropriate lockdep annotations to that 
>> mess you will find yourself in a similar but related ABBA deadlock.
>
> I can't prove the negative, but as long as there's no waiting on the
> SUICIDED/AL flags in the non-self remove path, I believe we're safe
> with the current patch.
>
>> Which is why I would like a simpler easier to understand mechanism if
>> we can.
>
> I don't disagree: If you want to clean out the Augean Stables, I can
> lend you the thigh length rubber boots and the gas mask.  However, I
> think that what we're currently proposing: a simple patch to make
> device_remove_file_self() actually work for everyone, along with
> stringent testing is the better approach.
>
> After all, if you look at
>
> commit ac0ece9174aca9aa895ce0accc54f1f8ff12d117
> Author: Tejun Heo 
> Date:   Mon Feb 3 14:03:03 2014 -0500
>
> scsi: use device_remove_file_self() instead of device_schedule_callback()
>
> You'll see Tejun added all this stuff just to remove the async callback
> we originally had.  Simply restoring the async callback back makes us
> quite considerably worse off because the device_remove_file_self()
> mechanism is in use elsewhere.  We need either to fix it and move on or
> junk it and go back to the original.

I vote we junk it and go back to something that resembles the original.
there are only about 5 other callers so this isn't that bad to do.

Tejun's work clearly added this deadlock 2 and a half years ago, and
it was nasty enough no one noticed until recently.

Using task_work_add(current, ...) as I posted earlier let's us retain
the synchronous property of the current API.

While we debate the details I am happy to look at 

Re: [PATCH] Avoid that SCSI device removal through sysfs triggers a deadlock

2016-11-08 Thread James Bottomley
On Tue, 2016-11-08 at 18:57 -0600, Eric W. Biederman wrote:
> James Bottomley  writes:
> 
> > On Tue, 2016-11-08 at 13:13 -0600, Eric W. Biederman wrote:
[...]
> > > Is it really the dropping of the lock that is causing this?
> > > I don't see that when I read those traces.
> > 
> > No, it's an ABBA lock inversion that causes this.  The traces are
> > somewhat dense, but they say it here:
> > 
> >  Possible unsafe locking scenario:
> >CPU0CPU1
> >
> >   lock(s_active#336);
> >lock(>scan_mutex);
> >lock(s_active#336);
> >   lock(>scan_mutex);
> > 
> >  *** DEADLOCK ***
> > 
> > The detailed explanation of this is here:
> > 
> > http://marc.info/?l=linux-scsi=147855187425596
> > 
> > The fix is ensuring that the CPU1 thread doesn't get into taking
> > s_active if CPU0 already has it using the KERNFS_SUICIDED/AL flag 
> > as an indicator.
> 
> So.  The kernfs code does not look safe to have kernfs_remove_self
> and kernfs_remove_by_name_ns racing against each other I agree.
> 
> The kernfs_remove_self path turns KERNFS_SUICIDAL into another 
> blocking lock by another name, and without lockdep annotations so I 
> don't know that it is safe.

Yes ... the number of hand rolled locks in that code make it super hard
to follow.

> The relevant bit from kernfs_remove_self is:
> 
>   if (!(kn->flags & KERNFS_SUICIDAL)) {
>   kn->flags |= KERNFS_SUICIDAL;
>   __kernfs_remove(kn);
>   kn->flags |= KERNFS_SUICIDED;
>   ret = true;
>   } else {
>   wait_queue_head_t *waitq = _root(kn)
> ->deactivate_waitq;
>   DEFINE_WAIT(wait);
> 
>   while (true) {
>   prepare_to_wait(waitq, ,
> TASK_UNINTERRUPTIBLE);
> 
>   if ((kn->flags & KERNFS_SUICIDED) &&
>   atomic_read(>active) ==
> KN_DEACTIVATED_BIAS)
>   break;
> 
>   mutex_unlock(_mutex);
>   schedule();
>   mutex_lock(_mutex);
>   }
>   finish_wait(waitq, );
>   WARN_ON_ONCE(!RB_EMPTY_NODE(>rb));
>   ret = false;
>   }
> 
> I am pretty certain that if you are going to make kernfs_remove_self 
> and kernfs_remove_by_name_ns to be safe to race against each other, 
> not just the KERNFS_SUICIDAL check, but the wait when KERNFS_SUICIDAL 
> is set needs to be added ot kernfs_remove_by_name_ns.

I don't think you can do that: waiting for SUICIDED would introduce
another potential lock entanglement.  I'm reasonably happy that the
deactivation offset coupled with kernfs_drain in the non self remove
path means that the necessary cleanup is done when the directory itself
is removed.  That seems to be a common pattern in all non-self removes.

> And I suspect if you add the appropriate lockdep annotations to that 
> mess you will find yourself in a similar but related ABBA deadlock.

I can't prove the negative, but as long as there's no waiting on the
SUICIDED/AL flags in the non-self remove path, I believe we're safe
with the current patch.

> Which is why I would like a simpler easier to understand mechanism if
> we can.

I don't disagree: If you want to clean out the Augean Stables, I can
lend you the thigh length rubber boots and the gas mask.  However, I
think that what we're currently proposing: a simple patch to make
device_remove_file_self() actually work for everyone, along with
stringent testing is the better approach.

After all, if you look at

commit ac0ece9174aca9aa895ce0accc54f1f8ff12d117
Author: Tejun Heo 
Date:   Mon Feb 3 14:03:03 2014 -0500

scsi: use device_remove_file_self() instead of device_schedule_callback()

You'll see Tejun added all this stuff just to remove the async callback
we originally had.  Simply restoring the async callback back makes us
quite considerably worse off because the device_remove_file_self()
mechanism is in use elsewhere.  We need either to fix it and move on or
junk it and go back to the original.

James

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Avoid that SCSI device removal through sysfs triggers a deadlock

2016-11-08 Thread Eric W. Biederman
Bart Van Assche  writes:

> On 11/08/2016 11:15 AM, Eric W. Biederman wrote:
>> James Bottomley  writes:
>>
>>> On Tue, 2016-11-08 at 08:52 -0800, Bart Van Assche wrote:
 On 11/08/2016 07:28 AM, James Bottomley wrote:
> On Mon, 2016-11-07 at 16:32 -0800, Bart Van Assche wrote:
>> diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
>> index cf4c636..44ec536 100644
>> --- a/fs/kernfs/dir.c
>> +++ b/fs/kernfs/dir.c
>> @@ -1410,7 +1410,7 @@ int kernfs_remove_by_name_ns(struct
>> kernfs_node
>> *parent, const char *name,
>>  mutex_lock(_mutex);
>>
>>  kn = kernfs_find_ns(parent, name, ns);
>> -if (kn)
>> +if (kn && !(kn->flags & KERNFS_SUICIDED))
>
> Actually, wrong flag, you need KERNFS_SUICIDAL.  The reason is that
> kernfs_mutex is actually dropped half way through __kernfs_remove,
> so KERNFS_SUICIDED is not set atomically with this mutex.

 Hello James,

 Sorry but what you wrote is not correct.
>>>
>>> I think you agree it is dropped.  I don't need to add the bit about the
>>> reacquisition because the race is mediated by the first acquisition not
>>> the second one, if you mediate on KERNFS_SUICIDAL, you only need to
>>> worry about this because the mediation is in the first acquisition.  If
>>> you mediate on KERNFS_SUICIDED, you need to explain that the final
>>> thing that means the race can't happen is the unbreak in the sysfs
>>> delete path re-acquiring s_active ... the explanation of what's going
>>> on and why gets about 2x more complex.
>>
>> Is it really the dropping of the lock that is causing this?
>> I don't see that when I read those traces.
>>
>> I am going to put my vote in for doing all of the self removal
>> sem-asynchronously by using task_work_add, and killing
>> device_remove_self, sysfs_remove_self, kernfs_remove_self.  Using
>> task_work_add remains synchronous with userspace so userspace should not
>> care, and we get the benefit of not having two different variants of the
>> same code racing with each other.
>>
>> It might take a little more work but will leave code that is much more
>> maintainable in the long run.
>
> Hello Eric,
>
> I'm completely in favor of keeping code maintainable. But what's not clear to 
> me
> is whether asynchronous I/O can be submitted to a sysfs attribute? If so, on
> what context will task work queued through task_work_add() be executed if an 
> aio
> write is used to write into a sysfs attribute?

I don't believe so.  I believe the filesystem has to opt into aio.  In
either case I don't believe we have to worry about aio because either it
won't be supported or it will be a synchronous write to sysfs.

> Additionally, can a process enter the exiting state after writing into the 
> sysfs
> delete attribute started and before task_work_add() has been called? I'm 
> asking
> this because in that case task_work_add() will fail.

Not before task_work_add has been called because task_work_add is what
will be called synchronously from the write function of the delete
attribute.

Furthermore task_work_run is guaranteed to be called before you return
to userspace.  It is remotely possible that someone sends the process a
SIGKILL while the delete attribute is being written.  In that case
task_work_run should run before the task gets too deep into the exiting
state.

But the SIGKILL will not be processed until the write syscall finishes
and the kernel returns to the edge of userspace.

So I can't imagine anything that would make task_work_add be a poor
choice in these circumstances.

And basically that fix for just the scsi bits I think is as simple as:

 drivers/scsi/scsi_sysfs.c  | 20 +---
 include/scsi/scsi_device.h |  1 +
 2 files changed, 18 insertions(+), 3 deletions(-)
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 07349270535d..fee88cde7c12 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -12,6 +12,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -705,13 +706,26 @@ store_rescan_field (struct device *dev, struct 
device_attribute *attr,
 }
 static DEVICE_ATTR(rescan, S_IWUSR, NULL, store_rescan_field);
 
+static void sdev_remove_self(struct callback_head *cb)
+{
+   struct scsi_device *sdev =
+   container_of(cb, struct scsi_device, delete_work);
+
+   scsi_remove_device(sdev);
+}
+
 static ssize_t
 sdev_store_delete(struct device *dev, struct device_attribute *attr,
  const char *buf, size_t count)
 {
-   if (device_remove_file_self(dev, attr))
-   scsi_remove_device(to_scsi_device(dev));
-   return count;
+   struct scsi_device *sdev = to_scsi_device(dev);
+   ssize_t err;
+
+   init_task_work(>delete_work, sdev_remove_self);
+   err = task_work_add(current, >delete_work, false);
+   if (!err)
+   err = 

Re: [PATCH] Avoid that SCSI device removal through sysfs triggers a deadlock

2016-11-08 Thread Eric W. Biederman
James Bottomley  writes:

> On Tue, 2016-11-08 at 13:13 -0600, Eric W. Biederman wrote:
>> James Bottomley  writes:
>> 
>> > On Tue, 2016-11-08 at 08:52 -0800, Bart Van Assche wrote:
>> > > On 11/08/2016 07:28 AM, James Bottomley wrote:
>> > > > On Mon, 2016-11-07 at 16:32 -0800, Bart Van Assche wrote:
>> > > > > diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
>> > > > > index cf4c636..44ec536 100644
>> > > > > --- a/fs/kernfs/dir.c
>> > > > > +++ b/fs/kernfs/dir.c
>> > > > > @@ -1410,7 +1410,7 @@ int kernfs_remove_by_name_ns(struct
>> > > > > kernfs_node
>> > > > > *parent, const char *name,
>> > > > >  mutex_lock(_mutex);
>> > > > > 
>> > > > >  kn = kernfs_find_ns(parent, name, ns);
>> > > > > -if (kn)
>> > > > > +if (kn && !(kn->flags & KERNFS_SUICIDED))
>> > > > 
>> > > > Actually, wrong flag, you need KERNFS_SUICIDAL.  The reason is
>> > > > that
>> > > > kernfs_mutex is actually dropped half way through
>> > > > __kernfs_remove, 
>> > > > so KERNFS_SUICIDED is not set atomically with this mutex.
>> > > 
>> > > Hello James,
>> > > 
>> > > Sorry but what you wrote is not correct.
>> > 
>> > I think you agree it is dropped.  I don't need to add the bit about 
>> > the reacquisition because the race is mediated by the first 
>> > acquisition not the second one, if you mediate on KERNFS_SUICIDAL, 
>> > you only need to worry about this because the mediation is in the 
>> > first acquisition.   If you mediate on KERNFS_SUICIDED, you need to 
>> > explain that the final thing that means the race can't happen is 
>> > the unbreak in the sysfs delete path re-acquiring s_active ... the 
>> > explanation of what's going on and why gets about 2x more complex.
>> 
>> Is it really the dropping of the lock that is causing this?
>> I don't see that when I read those traces.
>
> No, it's an ABBA lock inversion that causes this.  The traces are
> somewhat dense, but they say it here:
>
>  Possible unsafe locking scenario:
>CPU0CPU1
>
>   lock(s_active#336);
>lock(>scan_mutex);
>lock(s_active#336);
>   lock(>scan_mutex);
>
>  *** DEADLOCK ***
>
> The detailed explanation of this is here:
>
> http://marc.info/?l=linux-scsi=147855187425596
>
> The fix is ensuring that the CPU1 thread doesn't get into taking
> s_active if CPU0 already has it using the KERNFS_SUICIDED/AL flag as an
> indicator.

So.  The kernfs code does not look safe to have kernfs_remove_self
and kernfs_remove_by_name_ns racing against each other I agree.

The kernfs_remove_self path turns KERNFS_SUICIDAL into another blocking
lock by another name, and without lockdep annotations so I don't know
that it is safe.

The relevant bit from kernfs_remove_self is:

if (!(kn->flags & KERNFS_SUICIDAL)) {
kn->flags |= KERNFS_SUICIDAL;
__kernfs_remove(kn);
kn->flags |= KERNFS_SUICIDED;
ret = true;
} else {
wait_queue_head_t *waitq = _root(kn)->deactivate_waitq;
DEFINE_WAIT(wait);

while (true) {
prepare_to_wait(waitq, , TASK_UNINTERRUPTIBLE);

if ((kn->flags & KERNFS_SUICIDED) &&
atomic_read(>active) == KN_DEACTIVATED_BIAS)
break;

mutex_unlock(_mutex);
schedule();
mutex_lock(_mutex);
}
finish_wait(waitq, );
WARN_ON_ONCE(!RB_EMPTY_NODE(>rb));
ret = false;
}

I am pretty certain that if you are going to make kernfs_remove_self and
kernfs_remove_by_name_ns to be safe to race against each other, not just
the KERNFS_SUICIDAL check, but the wait when KERNFS_SUICIDAL is set
needs to be added ot kernfs_remove_by_name_ns.

And I suspect if you add the appropriate lockdep annotations to that mess
you will find yourself in a similar but related ABBA deadlock.

Which is why I would like a simpler easier to understand mechanism if we can.

Eric

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Avoid that SCSI device removal through sysfs triggers a deadlock

2016-11-08 Thread James Bottomley
On Tue, 2016-11-08 at 13:13 -0600, Eric W. Biederman wrote:
> James Bottomley  writes:
> 
> > On Tue, 2016-11-08 at 08:52 -0800, Bart Van Assche wrote:
> > > On 11/08/2016 07:28 AM, James Bottomley wrote:
> > > > On Mon, 2016-11-07 at 16:32 -0800, Bart Van Assche wrote:
> > > > > diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
> > > > > index cf4c636..44ec536 100644
> > > > > --- a/fs/kernfs/dir.c
> > > > > +++ b/fs/kernfs/dir.c
> > > > > @@ -1410,7 +1410,7 @@ int kernfs_remove_by_name_ns(struct
> > > > > kernfs_node
> > > > > *parent, const char *name,
> > > > >   mutex_lock(_mutex);
> > > > > 
> > > > >   kn = kernfs_find_ns(parent, name, ns);
> > > > > - if (kn)
> > > > > + if (kn && !(kn->flags & KERNFS_SUICIDED))
> > > > 
> > > > Actually, wrong flag, you need KERNFS_SUICIDAL.  The reason is
> > > > that
> > > > kernfs_mutex is actually dropped half way through
> > > > __kernfs_remove, 
> > > > so KERNFS_SUICIDED is not set atomically with this mutex.
> > > 
> > > Hello James,
> > > 
> > > Sorry but what you wrote is not correct.
> > 
> > I think you agree it is dropped.  I don't need to add the bit about 
> > the reacquisition because the race is mediated by the first 
> > acquisition not the second one, if you mediate on KERNFS_SUICIDAL, 
> > you only need to worry about this because the mediation is in the 
> > first acquisition.   If you mediate on KERNFS_SUICIDED, you need to 
> > explain that the final thing that means the race can't happen is 
> > the unbreak in the sysfs delete path re-acquiring s_active ... the 
> > explanation of what's going on and why gets about 2x more complex.
> 
> Is it really the dropping of the lock that is causing this?
> I don't see that when I read those traces.

No, it's an ABBA lock inversion that causes this.  The traces are
somewhat dense, but they say it here:

 Possible unsafe locking scenario:
   CPU0CPU1
   
  lock(s_active#336);
   lock(>scan_mutex);
   lock(s_active#336);
  lock(>scan_mutex);

 *** DEADLOCK ***

The detailed explanation of this is here:

http://marc.info/?l=linux-scsi=147855187425596

The fix is ensuring that the CPU1 thread doesn't get into taking
s_active if CPU0 already has it using the KERNFS_SUICIDED/AL flag as an
indicator.

James

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Avoid that SCSI device removal through sysfs triggers a deadlock

2016-11-08 Thread Bart Van Assche

On 11/08/2016 11:15 AM, Eric W. Biederman wrote:

James Bottomley  writes:


On Tue, 2016-11-08 at 08:52 -0800, Bart Van Assche wrote:

On 11/08/2016 07:28 AM, James Bottomley wrote:

On Mon, 2016-11-07 at 16:32 -0800, Bart Van Assche wrote:

diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index cf4c636..44ec536 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -1410,7 +1410,7 @@ int kernfs_remove_by_name_ns(struct
kernfs_node
*parent, const char *name,
mutex_lock(_mutex);

kn = kernfs_find_ns(parent, name, ns);
-   if (kn)
+   if (kn && !(kn->flags & KERNFS_SUICIDED))


Actually, wrong flag, you need KERNFS_SUICIDAL.  The reason is that
kernfs_mutex is actually dropped half way through __kernfs_remove,
so KERNFS_SUICIDED is not set atomically with this mutex.


Hello James,

Sorry but what you wrote is not correct.


I think you agree it is dropped.  I don't need to add the bit about the
reacquisition because the race is mediated by the first acquisition not
the second one, if you mediate on KERNFS_SUICIDAL, you only need to
worry about this because the mediation is in the first acquisition.  If
you mediate on KERNFS_SUICIDED, you need to explain that the final
thing that means the race can't happen is the unbreak in the sysfs
delete path re-acquiring s_active ... the explanation of what's going
on and why gets about 2x more complex.


Is it really the dropping of the lock that is causing this?
I don't see that when I read those traces.

I am going to put my vote in for doing all of the self removal
sem-asynchronously by using task_work_add, and killing
device_remove_self, sysfs_remove_self, kernfs_remove_self.  Using
task_work_add remains synchronous with userspace so userspace should not
care, and we get the benefit of not having two different variants of the
same code racing with each other.

It might take a little more work but will leave code that is much more
maintainable in the long run.


Hello Eric,

I'm completely in favor of keeping code maintainable. But what's not 
clear to me is whether asynchronous I/O can be submitted to a sysfs 
attribute? If so, on what context will task work queued through 
task_work_add() be executed if an aio write is used to write into a 
sysfs attribute?


Additionally, can a process enter the exiting state after writing into 
the sysfs delete attribute started and before task_work_add() has been 
called? I'm asking this because in that case task_work_add() will fail.


Thanks,

Bart.
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Avoid that SCSI device removal through sysfs triggers a deadlock

2016-11-08 Thread Eric W. Biederman
James Bottomley  writes:

> On Tue, 2016-11-08 at 08:52 -0800, Bart Van Assche wrote:
>> On 11/08/2016 07:28 AM, James Bottomley wrote:
>> > On Mon, 2016-11-07 at 16:32 -0800, Bart Van Assche wrote:
>> > > diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
>> > > index cf4c636..44ec536 100644
>> > > --- a/fs/kernfs/dir.c
>> > > +++ b/fs/kernfs/dir.c
>> > > @@ -1410,7 +1410,7 @@ int kernfs_remove_by_name_ns(struct
>> > > kernfs_node
>> > > *parent, const char *name,
>> > >  mutex_lock(_mutex);
>> > > 
>> > >  kn = kernfs_find_ns(parent, name, ns);
>> > > -if (kn)
>> > > +if (kn && !(kn->flags & KERNFS_SUICIDED))
>> > 
>> > Actually, wrong flag, you need KERNFS_SUICIDAL.  The reason is that
>> > kernfs_mutex is actually dropped half way through __kernfs_remove, 
>> > so KERNFS_SUICIDED is not set atomically with this mutex.
>> 
>> Hello James,
>> 
>> Sorry but what you wrote is not correct.
>
> I think you agree it is dropped.  I don't need to add the bit about the
> reacquisition because the race is mediated by the first acquisition not
> the second one, if you mediate on KERNFS_SUICIDAL, you only need to
> worry about this because the mediation is in the first acquisition.  If
> you mediate on KERNFS_SUICIDED, you need to explain that the final
> thing that means the race can't happen is the unbreak in the sysfs
> delete path re-acquiring s_active ... the explanation of what's going
> on and why gets about 2x more complex.

Is it really the dropping of the lock that is causing this?
I don't see that when I read those traces.

I am going to put my vote in for doing all of the self removal
sem-asynchronously by using task_work_add, and killing
device_remove_self, sysfs_remove_self, kernfs_remove_self.  Using
task_work_add remains synchronous with userspace so userspace should not
care, and we get the benefit of not having two different variants of the
same code racing with each other.

It might take a little more work but will leave code that is much more
maintainable in the long run.

Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Avoid that SCSI device removal through sysfs triggers a deadlock

2016-11-08 Thread James Bottomley
On Tue, 2016-11-08 at 08:52 -0800, Bart Van Assche wrote:
> On 11/08/2016 07:28 AM, James Bottomley wrote:
> > On Mon, 2016-11-07 at 16:32 -0800, Bart Van Assche wrote:
> > > diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
> > > index cf4c636..44ec536 100644
> > > --- a/fs/kernfs/dir.c
> > > +++ b/fs/kernfs/dir.c
> > > @@ -1410,7 +1410,7 @@ int kernfs_remove_by_name_ns(struct
> > > kernfs_node
> > > *parent, const char *name,
> > >   mutex_lock(_mutex);
> > > 
> > >   kn = kernfs_find_ns(parent, name, ns);
> > > - if (kn)
> > > + if (kn && !(kn->flags & KERNFS_SUICIDED))
> > 
> > Actually, wrong flag, you need KERNFS_SUICIDAL.  The reason is that
> > kernfs_mutex is actually dropped half way through __kernfs_remove, 
> > so KERNFS_SUICIDED is not set atomically with this mutex.
> 
> Hello James,
> 
> Sorry but what you wrote is not correct.

I think you agree it is dropped.  I don't need to add the bit about the
reacquisition because the race is mediated by the first acquisition not
the second one, if you mediate on KERNFS_SUICIDAL, you only need to
worry about this because the mediation is in the first acquisition.  If
you mediate on KERNFS_SUICIDED, you need to explain that the final
thing that means the race can't happen is the unbreak in the sysfs
delete path re-acquiring s_active ... the explanation of what's going
on and why gets about 2x more complex.

James

>  __kernfs_remove() calls kernfs_drain(). That last function not only 
> drops but also reacquires kernfs_mutex. So both KERNFS_SUICIDAL and 
> KERNFS_SUICIDED are set while holding kernfs_mutex.


--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Avoid that SCSI device removal through sysfs triggers a deadlock

2016-11-08 Thread Bart Van Assche

On 11/08/2016 07:28 AM, James Bottomley wrote:

On Mon, 2016-11-07 at 16:32 -0800, Bart Van Assche wrote:

diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index cf4c636..44ec536 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -1410,7 +1410,7 @@ int kernfs_remove_by_name_ns(struct kernfs_node
*parent, const char *name,
mutex_lock(_mutex);

kn = kernfs_find_ns(parent, name, ns);
-   if (kn)
+   if (kn && !(kn->flags & KERNFS_SUICIDED))


Actually, wrong flag, you need KERNFS_SUICIDAL.  The reason is that
kernfs_mutex is actually dropped half way through __kernfs_remove, so
KERNFS_SUICIDED is not set atomically with this mutex.


Hello James,

Sorry but what you wrote is not correct. __kernfs_remove() calls 
kernfs_drain(). That last function not only drops but also reacquires 
kernfs_mutex. So both KERNFS_SUICIDAL and KERNFS_SUICIDED are set while 
holding kernfs_mutex.


Bart.

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Avoid that SCSI device removal through sysfs triggers a deadlock

2016-11-08 Thread James Bottomley
On Tue, 2016-11-08 at 08:01 +0100, Greg KH wrote:
> On Mon, Nov 07, 2016 at 04:32:18PM -0800, Bart Van Assche wrote:
> > The SCSI core holds scan_mutex around SCSI device addition and
> > removal operations. sysfs serializes attribute read and write
> > operations against attribute removal through s_active. Avoid that
> > grabbing scan_mutex during self-removal of a SCSI device triggers
> > a deadlock by not calling __kernfs_remove() from
> > kernfs_remove_by_name_ns() in case of self-removal. This patch
> > avoids that self-removal triggers the following deadlock:
> > 
> > ===
> > [ INFO: possible circular locking dependency detected ]
> > 4.9.0-rc1-dbg+ #4 Not tainted
> > ---
> > test_02_sdev_de/12586 is trying to acquire lock:
> >  (>scan_mutex){+.+.+.}, at: []
> > scsi_remove_device+0x1e/0x40
> > but task is already holding lock:
> >  (s_active#336){.+}, at: []
> > kernfs_remove_self+0xde/0x140
> > which lock already depends on the new lock.
> > 
> > the existing dependency chain (in reverse order) is:
> > -> #1 (s_active#336){.+}:
> > [] lock_acquire+0xe9/0x1d0
> > [] __kernfs_remove+0x24a/0x310
> > [] kernfs_remove_by_name_ns+0x40/0x90
> > [] remove_files.isra.1+0x30/0x70
> > [] sysfs_remove_group+0x3f/0x90
> > [] sysfs_remove_groups+0x29/0x40
> > [] device_remove_attrs+0x59/0x80
> > [] device_del+0x125/0x240
> > [] __scsi_remove_device+0x143/0x180
> > [] scsi_forget_host+0x64/0x70
> > [] scsi_remove_host+0x75/0x120
> > [] 0xa035dbbb
> > [] process_one_work+0x1f5/0x690
> > [] worker_thread+0x49/0x490
> > [] kthread+0xeb/0x110
> > [] ret_from_fork+0x27/0x40
> > 
> > -> #0 (>scan_mutex){+.+.+.}:
> > [] __lock_acquire+0x10fc/0x1270
> > [] lock_acquire+0xe9/0x1d0
> > [] mutex_lock_nested+0x5f/0x360
> > [] scsi_remove_device+0x1e/0x40
> > [] sdev_store_delete+0x22/0x30
> > [] dev_attr_store+0x13/0x20
> > [] sysfs_kf_write+0x40/0x50
> > [] kernfs_fop_write+0x137/0x1c0
> > [] __vfs_write+0x23/0x140
> > [] vfs_write+0xb0/0x190
> > [] SyS_write+0x44/0xa0
> > [] entry_SYSCALL_64_fastpath+0x18/0xad
> > 
> > other info that might help us debug this:
> > 
> >  Possible unsafe locking scenario:
> >CPU0CPU1
> >
> >   lock(s_active#336);
> >lock(>scan_mutex);
> >lock(s_active#336);
> >   lock(>scan_mutex);
> > 
> >  *** DEADLOCK ***
> > 3 locks held by test_02_sdev_de/12586:
> >  #0:  (sb_writers#4){.+.+.+}, at: []
> > vfs_write+0x178/0x190
> >  #1:  (>mutex){+.+.+.}, at: []
> > kernfs_fop_write+0x101/0x1c0
> >  #2:  (s_active#336){.+}, at: []
> > kernfs_remove_self+0xde/0x140
> > 
> > stack backtrace:
> > CPU: 4 PID: 12586 Comm: test_02_sdev_de Not tainted 4.9.0-rc1-dbg+
> > #4
> > Call Trace:
> >  [] dump_stack+0x68/0x93
> >  [] print_circular_bug+0x1be/0x210
> >  [] __lock_acquire+0x10fc/0x1270
> >  [] lock_acquire+0xe9/0x1d0
> >  [] mutex_lock_nested+0x5f/0x360
> >  [] scsi_remove_device+0x1e/0x40
> >  [] sdev_store_delete+0x22/0x30
> >  [] dev_attr_store+0x13/0x20
> >  [] sysfs_kf_write+0x40/0x50
> >  [] kernfs_fop_write+0x137/0x1c0
> >  [] __vfs_write+0x23/0x140
> >  [] vfs_write+0xb0/0x190
> >  [] SyS_write+0x44/0xa0
> >  [] entry_SYSCALL_64_fastpath+0x18/0xad
> > 
> > References: http://www.spinics.net/lists/linux-scsi/msg86551.html
> > Signed-off-by: Bart Van Assche 
> > Cc: Greg Kroah-Hartman 
> > Cc: Eric Biederman 
> > Cc: Hannes Reinecke 
> > Cc: Johannes Thumshirn 
> > Cc: Sagi Grimberg 
> > Cc: 
> > ---
> >  fs/kernfs/dir.c | 2 +-
> >  1 file changed, 1 insertion(+), 1 deletion(-)
> > 
> > diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
> > index cf4c636..44ec536 100644
> > --- a/fs/kernfs/dir.c
> > +++ b/fs/kernfs/dir.c
> > @@ -1410,7 +1410,7 @@ int kernfs_remove_by_name_ns(struct
> > kernfs_node *parent, const char *name,
> > mutex_lock(_mutex);
> >  
> > kn = kernfs_find_ns(parent, name, ns);
> > -   if (kn)
> > +   if (kn && !(kn->flags & KERNFS_SUICIDED))
> > __kernfs_remove(kn);
> 
> Really?  What changed recently to require this?  I thought we fixed
> these issues a long time ago in kernfs :(

It's a rare but obvious race between the self delete and non self
delete paths.  You can trigger it in SCSI by racing device delete with
HBA delete.  This is the detailed explanation:

http://marc.info/?l=linux-scsi=147855187425596

Which we should probably have in a condensed form in the changelog

James


--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Avoid that SCSI device removal through sysfs triggers a deadlock

2016-11-08 Thread James Bottomley
On Mon, 2016-11-07 at 16:32 -0800, Bart Van Assche wrote:
> The SCSI core holds scan_mutex around SCSI device addition and
> removal operations. sysfs serializes attribute read and write
> operations against attribute removal through s_active. Avoid that
> grabbing scan_mutex during self-removal of a SCSI device triggers
> a deadlock by not calling __kernfs_remove() from
> kernfs_remove_by_name_ns() in case of self-removal. This patch
> avoids that self-removal triggers the following deadlock:
> 
> ===
> [ INFO: possible circular locking dependency detected ]
> 4.9.0-rc1-dbg+ #4 Not tainted
> ---
> test_02_sdev_de/12586 is trying to acquire lock:
>  (>scan_mutex){+.+.+.}, at: []
> scsi_remove_device+0x1e/0x40
> but task is already holding lock:
>  (s_active#336){.+}, at: []
> kernfs_remove_self+0xde/0x140
> which lock already depends on the new lock.
> 
> the existing dependency chain (in reverse order) is:
> -> #1 (s_active#336){.+}:
> [] lock_acquire+0xe9/0x1d0
> [] __kernfs_remove+0x24a/0x310
> [] kernfs_remove_by_name_ns+0x40/0x90
> [] remove_files.isra.1+0x30/0x70
> [] sysfs_remove_group+0x3f/0x90
> [] sysfs_remove_groups+0x29/0x40
> [] device_remove_attrs+0x59/0x80
> [] device_del+0x125/0x240
> [] __scsi_remove_device+0x143/0x180
> [] scsi_forget_host+0x64/0x70
> [] scsi_remove_host+0x75/0x120
> [] 0xa035dbbb
> [] process_one_work+0x1f5/0x690
> [] worker_thread+0x49/0x490
> [] kthread+0xeb/0x110
> [] ret_from_fork+0x27/0x40
> 
> -> #0 (>scan_mutex){+.+.+.}:
> [] __lock_acquire+0x10fc/0x1270
> [] lock_acquire+0xe9/0x1d0
> [] mutex_lock_nested+0x5f/0x360
> [] scsi_remove_device+0x1e/0x40
> [] sdev_store_delete+0x22/0x30
> [] dev_attr_store+0x13/0x20
> [] sysfs_kf_write+0x40/0x50
> [] kernfs_fop_write+0x137/0x1c0
> [] __vfs_write+0x23/0x140
> [] vfs_write+0xb0/0x190
> [] SyS_write+0x44/0xa0
> [] entry_SYSCALL_64_fastpath+0x18/0xad
> 
> other info that might help us debug this:
> 
>  Possible unsafe locking scenario:
>CPU0CPU1
>
>   lock(s_active#336);
>lock(>scan_mutex);
>lock(s_active#336);
>   lock(>scan_mutex);
> 
>  *** DEADLOCK ***
> 3 locks held by test_02_sdev_de/12586:
>  #0:  (sb_writers#4){.+.+.+}, at: []
> vfs_write+0x178/0x190
>  #1:  (>mutex){+.+.+.}, at: []
> kernfs_fop_write+0x101/0x1c0
>  #2:  (s_active#336){.+}, at: []
> kernfs_remove_self+0xde/0x140
> 
> stack backtrace:
> CPU: 4 PID: 12586 Comm: test_02_sdev_de Not tainted 4.9.0-rc1-dbg+ #4
> Call Trace:
>  [] dump_stack+0x68/0x93
>  [] print_circular_bug+0x1be/0x210
>  [] __lock_acquire+0x10fc/0x1270
>  [] lock_acquire+0xe9/0x1d0
>  [] mutex_lock_nested+0x5f/0x360
>  [] scsi_remove_device+0x1e/0x40
>  [] sdev_store_delete+0x22/0x30
>  [] dev_attr_store+0x13/0x20
>  [] sysfs_kf_write+0x40/0x50
>  [] kernfs_fop_write+0x137/0x1c0
>  [] __vfs_write+0x23/0x140
>  [] vfs_write+0xb0/0x190
>  [] SyS_write+0x44/0xa0
>  [] entry_SYSCALL_64_fastpath+0x18/0xad
> 
> References: http://www.spinics.net/lists/linux-scsi/msg86551.html
> Signed-off-by: Bart Van Assche 
> Cc: Greg Kroah-Hartman 
> Cc: Eric Biederman 
> Cc: Hannes Reinecke 
> Cc: Johannes Thumshirn 
> Cc: Sagi Grimberg 
> Cc: 
> ---
>  fs/kernfs/dir.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
> index cf4c636..44ec536 100644
> --- a/fs/kernfs/dir.c
> +++ b/fs/kernfs/dir.c
> @@ -1410,7 +1410,7 @@ int kernfs_remove_by_name_ns(struct kernfs_node
> *parent, const char *name,
>   mutex_lock(_mutex);
>  
>   kn = kernfs_find_ns(parent, name, ns);
> - if (kn)
> + if (kn && !(kn->flags & KERNFS_SUICIDED))

Actually, wrong flag, you need KERNFS_SUICIDAL.  The reason is that
kernfs_mutex is actually dropped half way through __kernfs_remove, so
KERNFS_SUICIDED is not set atomically with this mutex.

James


--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Avoid that SCSI device removal through sysfs triggers a deadlock

2016-11-07 Thread Greg KH
On Mon, Nov 07, 2016 at 04:32:18PM -0800, Bart Van Assche wrote:
> The SCSI core holds scan_mutex around SCSI device addition and
> removal operations. sysfs serializes attribute read and write
> operations against attribute removal through s_active. Avoid that
> grabbing scan_mutex during self-removal of a SCSI device triggers
> a deadlock by not calling __kernfs_remove() from
> kernfs_remove_by_name_ns() in case of self-removal. This patch
> avoids that self-removal triggers the following deadlock:
> 
> ===
> [ INFO: possible circular locking dependency detected ]
> 4.9.0-rc1-dbg+ #4 Not tainted
> ---
> test_02_sdev_de/12586 is trying to acquire lock:
>  (>scan_mutex){+.+.+.}, at: [] 
> scsi_remove_device+0x1e/0x40
> but task is already holding lock:
>  (s_active#336){.+}, at: [] 
> kernfs_remove_self+0xde/0x140
> which lock already depends on the new lock.
> 
> the existing dependency chain (in reverse order) is:
> -> #1 (s_active#336){.+}:
> [] lock_acquire+0xe9/0x1d0
> [] __kernfs_remove+0x24a/0x310
> [] kernfs_remove_by_name_ns+0x40/0x90
> [] remove_files.isra.1+0x30/0x70
> [] sysfs_remove_group+0x3f/0x90
> [] sysfs_remove_groups+0x29/0x40
> [] device_remove_attrs+0x59/0x80
> [] device_del+0x125/0x240
> [] __scsi_remove_device+0x143/0x180
> [] scsi_forget_host+0x64/0x70
> [] scsi_remove_host+0x75/0x120
> [] 0xa035dbbb
> [] process_one_work+0x1f5/0x690
> [] worker_thread+0x49/0x490
> [] kthread+0xeb/0x110
> [] ret_from_fork+0x27/0x40
> 
> -> #0 (>scan_mutex){+.+.+.}:
> [] __lock_acquire+0x10fc/0x1270
> [] lock_acquire+0xe9/0x1d0
> [] mutex_lock_nested+0x5f/0x360
> [] scsi_remove_device+0x1e/0x40
> [] sdev_store_delete+0x22/0x30
> [] dev_attr_store+0x13/0x20
> [] sysfs_kf_write+0x40/0x50
> [] kernfs_fop_write+0x137/0x1c0
> [] __vfs_write+0x23/0x140
> [] vfs_write+0xb0/0x190
> [] SyS_write+0x44/0xa0
> [] entry_SYSCALL_64_fastpath+0x18/0xad
> 
> other info that might help us debug this:
> 
>  Possible unsafe locking scenario:
>CPU0CPU1
>
>   lock(s_active#336);
>lock(>scan_mutex);
>lock(s_active#336);
>   lock(>scan_mutex);
> 
>  *** DEADLOCK ***
> 3 locks held by test_02_sdev_de/12586:
>  #0:  (sb_writers#4){.+.+.+}, at: [] vfs_write+0x178/0x190
>  #1:  (>mutex){+.+.+.}, at: [] 
> kernfs_fop_write+0x101/0x1c0
>  #2:  (s_active#336){.+}, at: [] 
> kernfs_remove_self+0xde/0x140
> 
> stack backtrace:
> CPU: 4 PID: 12586 Comm: test_02_sdev_de Not tainted 4.9.0-rc1-dbg+ #4
> Call Trace:
>  [] dump_stack+0x68/0x93
>  [] print_circular_bug+0x1be/0x210
>  [] __lock_acquire+0x10fc/0x1270
>  [] lock_acquire+0xe9/0x1d0
>  [] mutex_lock_nested+0x5f/0x360
>  [] scsi_remove_device+0x1e/0x40
>  [] sdev_store_delete+0x22/0x30
>  [] dev_attr_store+0x13/0x20
>  [] sysfs_kf_write+0x40/0x50
>  [] kernfs_fop_write+0x137/0x1c0
>  [] __vfs_write+0x23/0x140
>  [] vfs_write+0xb0/0x190
>  [] SyS_write+0x44/0xa0
>  [] entry_SYSCALL_64_fastpath+0x18/0xad
> 
> References: http://www.spinics.net/lists/linux-scsi/msg86551.html
> Signed-off-by: Bart Van Assche 
> Cc: Greg Kroah-Hartman 
> Cc: Eric Biederman 
> Cc: Hannes Reinecke 
> Cc: Johannes Thumshirn 
> Cc: Sagi Grimberg 
> Cc: 
> ---
>  fs/kernfs/dir.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
> index cf4c636..44ec536 100644
> --- a/fs/kernfs/dir.c
> +++ b/fs/kernfs/dir.c
> @@ -1410,7 +1410,7 @@ int kernfs_remove_by_name_ns(struct kernfs_node 
> *parent, const char *name,
>   mutex_lock(_mutex);
>  
>   kn = kernfs_find_ns(parent, name, ns);
> - if (kn)
> + if (kn && !(kn->flags & KERNFS_SUICIDED))
>   __kernfs_remove(kn);

Really?  What changed recently to require this?  I thought we fixed
these issues a long time ago in kernfs :(

thanks,

greg k-h
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Avoid that SCSI device removal through sysfs triggers a deadlock

2016-11-07 Thread Bart Van Assche
The SCSI core holds scan_mutex around SCSI device addition and
removal operations. sysfs serializes attribute read and write
operations against attribute removal through s_active. Avoid that
grabbing scan_mutex during self-removal of a SCSI device triggers
a deadlock by not calling __kernfs_remove() from
kernfs_remove_by_name_ns() in case of self-removal. This patch
avoids that self-removal triggers the following deadlock:

===
[ INFO: possible circular locking dependency detected ]
4.9.0-rc1-dbg+ #4 Not tainted
---
test_02_sdev_de/12586 is trying to acquire lock:
 (>scan_mutex){+.+.+.}, at: [] 
scsi_remove_device+0x1e/0x40
but task is already holding lock:
 (s_active#336){.+}, at: [] kernfs_remove_self+0xde/0x140
which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:
-> #1 (s_active#336){.+}:
[] lock_acquire+0xe9/0x1d0
[] __kernfs_remove+0x24a/0x310
[] kernfs_remove_by_name_ns+0x40/0x90
[] remove_files.isra.1+0x30/0x70
[] sysfs_remove_group+0x3f/0x90
[] sysfs_remove_groups+0x29/0x40
[] device_remove_attrs+0x59/0x80
[] device_del+0x125/0x240
[] __scsi_remove_device+0x143/0x180
[] scsi_forget_host+0x64/0x70
[] scsi_remove_host+0x75/0x120
[] 0xa035dbbb
[] process_one_work+0x1f5/0x690
[] worker_thread+0x49/0x490
[] kthread+0xeb/0x110
[] ret_from_fork+0x27/0x40

-> #0 (>scan_mutex){+.+.+.}:
[] __lock_acquire+0x10fc/0x1270
[] lock_acquire+0xe9/0x1d0
[] mutex_lock_nested+0x5f/0x360
[] scsi_remove_device+0x1e/0x40
[] sdev_store_delete+0x22/0x30
[] dev_attr_store+0x13/0x20
[] sysfs_kf_write+0x40/0x50
[] kernfs_fop_write+0x137/0x1c0
[] __vfs_write+0x23/0x140
[] vfs_write+0xb0/0x190
[] SyS_write+0x44/0xa0
[] entry_SYSCALL_64_fastpath+0x18/0xad

other info that might help us debug this:

 Possible unsafe locking scenario:
   CPU0CPU1
   
  lock(s_active#336);
   lock(>scan_mutex);
   lock(s_active#336);
  lock(>scan_mutex);

 *** DEADLOCK ***
3 locks held by test_02_sdev_de/12586:
 #0:  (sb_writers#4){.+.+.+}, at: [] vfs_write+0x178/0x190
 #1:  (>mutex){+.+.+.}, at: [] 
kernfs_fop_write+0x101/0x1c0
 #2:  (s_active#336){.+}, at: [] 
kernfs_remove_self+0xde/0x140

stack backtrace:
CPU: 4 PID: 12586 Comm: test_02_sdev_de Not tainted 4.9.0-rc1-dbg+ #4
Call Trace:
 [] dump_stack+0x68/0x93
 [] print_circular_bug+0x1be/0x210
 [] __lock_acquire+0x10fc/0x1270
 [] lock_acquire+0xe9/0x1d0
 [] mutex_lock_nested+0x5f/0x360
 [] scsi_remove_device+0x1e/0x40
 [] sdev_store_delete+0x22/0x30
 [] dev_attr_store+0x13/0x20
 [] sysfs_kf_write+0x40/0x50
 [] kernfs_fop_write+0x137/0x1c0
 [] __vfs_write+0x23/0x140
 [] vfs_write+0xb0/0x190
 [] SyS_write+0x44/0xa0
 [] entry_SYSCALL_64_fastpath+0x18/0xad

References: http://www.spinics.net/lists/linux-scsi/msg86551.html
Signed-off-by: Bart Van Assche 
Cc: Greg Kroah-Hartman 
Cc: Eric Biederman 
Cc: Hannes Reinecke 
Cc: Johannes Thumshirn 
Cc: Sagi Grimberg 
Cc: 
---
 fs/kernfs/dir.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index cf4c636..44ec536 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -1410,7 +1410,7 @@ int kernfs_remove_by_name_ns(struct kernfs_node *parent, 
const char *name,
mutex_lock(_mutex);
 
kn = kernfs_find_ns(parent, name, ns);
-   if (kn)
+   if (kn && !(kn->flags & KERNFS_SUICIDED))
__kernfs_remove(kn);
 
mutex_unlock(_mutex);
-- 
2.10.1

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Avoid that SCSI device removal through sysfs triggers a deadlock

2016-11-07 Thread James Bottomley
On Fri, 2016-11-04 at 12:17 -0600, Bart Van Assche wrote:
> On 11/04/2016 07:47 AM, James Bottomley wrote:
> > You know after
> > 
> > if (device_remove_file_self(dev, attr))
> > 
> > returns true that s_active is held and also that KERNFS_SUICIDAL is 
> > set on the node, so the non-self remove paths can simply check for 
> > this flag and return without descending into __kernfs_remove(), 
> > which would mean they never take s_active.  That means we never get 
> > the inversion.
> 
> Hello James,
> 
> The lock inversion is not triggered by the non-self remove paths but 
> by the self-remove path.

I think we should agree first what the problem is.  The inversion
occurs between the sysfs delete path and the device node delete caused
by a remove host.  When both are happening the inversion is that when

if (device_remove_file_self(dev, attr))
scsi_remove_device(to_scsi_device(dev));

Happens, after the if, the s_active lock is held then
scsi_remove_device goes on to take the scan_mutex.

Conversely in scsi_remove_host, the mutex is taken first then
scsi_forget_host iterates removing the devices, but sysfs file removal
eventually takes s_active in kernfs_drain, which is called from
kernfs_remove via kernfs_remove_by_name_ns, hence the inversion.

This is therefore a conflict between the self and non-self remove
paths.

> Anyway, can you have a look at the two attached 
> patches?

Well, they're still overly complex, but perhaps due to the
misunderstanding above?  If you look through that trigger case, you'll
see that the fix is simply to check KERNFS_SUICIDAL in
kernfs_remove_by_name_ns and not descend into __kernfs_remove() if it's
set.  I think kernfs_mutex mediates this, but probably only if it's
moved lower down in kernfs_drain.

James

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Avoid that SCSI device removal through sysfs triggers a deadlock

2016-11-04 Thread Bart Van Assche

On 11/04/2016 07:47 AM, James Bottomley wrote:

You know after

if (device_remove_file_self(dev, attr))

returns true that s_active is held and also that KERNFS_SUICIDAL is set
on the node, so the non-self remove paths can simply check for this
flag and return without descending into __kernfs_remove(), which would
mean they never take s_active.  That means we never get the inversion.


Hello James,

The lock inversion is not triggered by the non-self remove paths but by 
the self-remove path. Anyway, can you have a look at the two attached 
patches?


Thanks,

Bart.

>From dfd0bfa8b15e8d56a4f1ca3dc781439d10d0b098 Mon Sep 17 00:00:00 2001
From: Bart Van Assche 
Date: Fri, 4 Nov 2016 10:18:52 -0600
Subject: [PATCH 1/2] kernfs: Introduce a local variable

This patch does not change any functionality.

Signed-off-by: Bart Van Assche 
---
 fs/kernfs/file.c | 16 +---
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index 78219d5..da58ea4 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -186,6 +186,7 @@ static ssize_t kernfs_file_direct_read(struct kernfs_open_file *of,
    loff_t *ppos)
 {
 	ssize_t len = min_t(size_t, count, PAGE_SIZE);
+	struct kernfs_node *kn = of->kn;
 	const struct kernfs_ops *ops;
 	char *buf;
 
@@ -202,20 +203,20 @@ static ssize_t kernfs_file_direct_read(struct kernfs_open_file *of,
 	 * the ops aren't called concurrently for the same open file.
 	 */
 	mutex_lock(>mutex);
-	if (!kernfs_get_active(of->kn)) {
+	if (!kernfs_get_active(kn)) {
 		len = -ENODEV;
 		mutex_unlock(>mutex);
 		goto out_free;
 	}
 
-	of->event = atomic_read(>kn->attr.open->event);
-	ops = kernfs_ops(of->kn);
+	of->event = atomic_read(>attr.open->event);
+	ops = kernfs_ops(kn);
 	if (ops->read)
 		len = ops->read(of, buf, len, *ppos);
 	else
 		len = -EINVAL;
 
-	kernfs_put_active(of->kn);
+	kernfs_put_active(kn);
 	mutex_unlock(>mutex);
 
 	if (len < 0)
@@ -274,6 +275,7 @@ static ssize_t kernfs_fop_write(struct file *file, const char __user *user_buf,
 size_t count, loff_t *ppos)
 {
 	struct kernfs_open_file *of = kernfs_of(file);
+	struct kernfs_node *kn = of->kn;
 	const struct kernfs_ops *ops;
 	size_t len;
 	char *buf;
@@ -305,19 +307,19 @@ static ssize_t kernfs_fop_write(struct file *file, const char __user *user_buf,
 	 * the ops aren't called concurrently for the same open file.
 	 */
 	mutex_lock(>mutex);
-	if (!kernfs_get_active(of->kn)) {
+	if (!kernfs_get_active(kn)) {
 		mutex_unlock(>mutex);
 		len = -ENODEV;
 		goto out_free;
 	}
 
-	ops = kernfs_ops(of->kn);
+	ops = kernfs_ops(kn);
 	if (ops->write)
 		len = ops->write(of, buf, len, *ppos);
 	else
 		len = -EINVAL;
 
-	kernfs_put_active(of->kn);
+	kernfs_put_active(kn);
 	mutex_unlock(>mutex);
 
 	if (len > 0)
-- 
2.10.1

>From f092fc1d40f3746c417e979985346ba169af7a6d Mon Sep 17 00:00:00 2001
From: Bart Van Assche 
Date: Fri, 4 Nov 2016 10:07:23 -0600
Subject: [PATCH 2/2] Avoid that SCSI device removal through sysfs triggers a
 deadlock

The SCSI core holds scan_mutex around SCSI device addition and
removal operations. sysfs serializes attribute read and write
operations against attribute removal through s_active. Avoid that
grabbing scan_mutex during self-removal of a SCSI device triggers
a deadlock by re-grabbing s_active after self-removal has finished
instead of after kernfs_remove_self() has finished.

Signed-off-by: Bart Van Assche 
---
 fs/kernfs/dir.c| 19 +--
 fs/kernfs/file.c   |  7 +++
 include/linux/kernfs.h |  1 +
 3 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index cf4c636..abd3481 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -426,6 +426,8 @@ void kernfs_put_active(struct kernfs_node *kn)
 	if (unlikely(!kn))
 		return;
 
+	WARN_ON_ONCE(kn->flags & KERNFS_BROKE_A_P);
+
 	if (kernfs_lockdep(kn))
 		rwsem_release(>dep_map, 1, _RET_IP_);
 	v = atomic_dec_return(>active);
@@ -1314,6 +1316,9 @@ void kernfs_unbreak_active_protection(struct kernfs_node *kn)
  * kernfs_remove_self - remove a kernfs_node from its own method
  * @kn: the self kernfs_node to remove
  *
+ * If kernfs_remove_self() sets KERNFS_BROKE_A_P the caller must invoke
+ * kernfs_unbreak_active_protection().
+ *
  * The caller must be running off of a kernfs operation which is invoked
  * with an active reference - e.g. one of kernfs_ops.  This can be used to
  * implement a file operation which deletes itself.
@@ -1354,6 +1359,7 @@ bool kernfs_remove_self(struct kernfs_node *kn)
 	 */
 	if (!(kn->flags & KERNFS_SUICIDAL)) {
 		kn->flags |= KERNFS_SUICIDAL;
+		kn->flags |= KERNFS_BROKE_A_P;
 		__kernfs_remove(kn);
 		kn->flags |= KERNFS_SUICIDED;
 		ret = true;
@@ -1375,13 +1381,14 @@ bool kernfs_remove_self(struct kernfs_node *kn)
 		finish_wait(waitq, );
 		WARN_ON_ONCE(!RB_EMPTY_NODE(>rb));
 

Re: [PATCH] Avoid that SCSI device removal through sysfs triggers a deadlock

2016-11-04 Thread James Bottomley
On Thu, 2016-11-03 at 16:27 -0600, Bart Van Assche wrote:
> On 10/28/2016 08:08 PM, James Bottomley wrote:
> > This is a deadlock caused by an inversion issue in kernfs (suicide
> > vs
> > non-suicide removes); so fixing it in SCSI alone really isn't
> > appropriate.  I count at least five other subsystems all using this
> > mechanism, so they'll all be similarly affected.  It looks to be
> > fairly
> > simply fixable inside kernfs, so please fix it that way.
> 
> Hello James,
> 
> How about fixing this deadlock with the below patch?

Without a description file, it's a bit hard to follow, but reading the
patch it's somewhat cumbersome and also incomplete: you change the API
for self removal, so every driver that uses the
device_remove_file_self() scheme (about five of them) has to be
changed.  I was actually thinking of something much simpler:

You know after

if (device_remove_file_self(dev, attr))

returns true that s_active is held and also that KERNFS_SUICIDAL is set
on the node, so the non-self remove paths can simply check for this
flag and return without descending into __kernfs_remove(), which would
mean they never take s_active.  That means we never get the inversion.

This should be about a 10 line patch because there are only two places
this check needs to be done: kernfs_remove() and
 kernfs_remove_by_name_ns().

It has the advantage that the api isn't altered so all the other driver
callers simply pick up the benefit.

There may be a bit of back end cleanup to do on the eventual parent
directory removal because I think it needs to wait for all the suicidal
nodes to signal they've finally suicided.

James

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Avoid that SCSI device removal through sysfs triggers a deadlock

2016-11-03 Thread Bart Van Assche
On 10/28/2016 08:08 PM, James Bottomley wrote:
> This is a deadlock caused by an inversion issue in kernfs (suicide vs
> non-suicide removes); so fixing it in SCSI alone really isn't
> appropriate.  I count at least five other subsystems all using this
> mechanism, so they'll all be similarly affected.  It looks to be fairly
> simply fixable inside kernfs, so please fix it that way.

Hello James,

How about fixing this deadlock with the below patch?

Thanks,

Bart.

---
 drivers/base/core.c   | 45 +
 drivers/scsi/scsi_sysfs.c | 10 +++---
 fs/kernfs/dir.c   | 11 ++-
 fs/sysfs/file.c   | 42 +++---
 include/linux/device.h|  6 ++
 include/linux/kernfs.h|  6 --
 include/linux/sysfs.h | 16 
 7 files changed, 123 insertions(+), 13 deletions(-)

diff --git a/drivers/base/core.c b/drivers/base/core.c
index ce057a5..8f74abe 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -616,6 +616,46 @@ void device_remove_file(struct device *dev,
 }
 EXPORT_SYMBOL_GPL(device_remove_file);
 
+struct device_self_removal_data {
+   void (*cb)(struct device *, const struct device_attribute *, void *);
+   struct device   *dev;
+   const struct device_attribute   *attr;
+   void*data;
+};
+
+static void device_call_cb(struct kobject *kobj, const struct attribute *attr,
+  void *data)
+{
+   const struct device_self_removal_data *srd = data;
+
+   srd->cb(srd->dev, srd->attr, srd->data);
+}
+
+/**
+ * device_remove_file_self_cb - remove sysfs attribute file from its own method
+ * @dev: device
+ * @attr: device attribute descriptor
+ * @cb: callback function called without holding the kernfs s_active lock
+ * @data: third argument passed to @cb
+ *
+ * See kernfs_remove_self() for details.
+ */
+bool device_remove_file_self_cb(struct device *dev,
+   const struct device_attribute *attr,
+   void (*cb)(struct device *,
+  const struct device_attribute *,
+  void *),
+   void *data)
+{
+   struct device_self_removal_data srd = { cb, dev, attr, data };
+
+   if (!dev)
+   return false;
+   return sysfs_remove_file_self_cb(>kobj, >attr, cb ?
+device_call_cb : NULL, );
+}
+EXPORT_SYMBOL_GPL(device_remove_file_self_cb);
+
 /**
  * device_remove_file_self - remove sysfs attribute file from its own method.
  * @dev: device.
@@ -626,10 +666,7 @@ EXPORT_SYMBOL_GPL(device_remove_file);
 bool device_remove_file_self(struct device *dev,
 const struct device_attribute *attr)
 {
-   if (dev)
-   return sysfs_remove_file_self(>kobj, >attr);
-   else
-   return false;
+   return device_remove_file_self_cb(dev, attr, NULL, NULL);
 }
 EXPORT_SYMBOL_GPL(device_remove_file_self);
 
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 0af9c91..2a30c9b 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -705,14 +705,18 @@ store_rescan_field (struct device *dev, struct 
device_attribute *attr,
 }
 static DEVICE_ATTR(rescan, S_IWUSR, NULL, store_rescan_field);
 
+static void sdev_delete_cb(struct device *dev,
+  const struct device_attribute *attr, void *data)
+{
+   scsi_remove_device(to_scsi_device(dev));
+}
 static ssize_t
 sdev_store_delete(struct device *dev, struct device_attribute *attr,
  const char *buf, size_t count)
 {
-   if (device_remove_file_self(dev, attr))
-   scsi_remove_device(to_scsi_device(dev));
+   device_remove_file_self_cb(dev, attr, sdev_delete_cb, NULL);
return count;
-};
+}
 static DEVICE_ATTR(delete, S_IWUSR, NULL, sdev_store_delete);
 
 static ssize_t
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index cf4c636..2831c8fa 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -1313,6 +1313,8 @@ void kernfs_unbreak_active_protection(struct kernfs_node 
*kn)
 /**
  * kernfs_remove_self - remove a kernfs_node from its own method
  * @kn: the self kernfs_node to remove
+ * @cb: callback function called without holding the kernfs s_active lock
+ * @data: second argument passed to @cb
  *
  * The caller must be running off of a kernfs operation which is invoked
  * with an active reference - e.g. one of kernfs_ops.  This can be used to
@@ -1336,7 +1338,8 @@ void kernfs_unbreak_active_protection(struct kernfs_node 
*kn)
  * guarantee, for example, all concurrent writes to a "delete" file to
  * finish only after the whole operation is complete.
  */
-bool kernfs_remove_self(struct kernfs_node *kn)
+bool kernfs_remove_self(struct kernfs_node *kn,
+   void (*cb)(struct 

Re: [PATCH] Avoid that SCSI device removal through sysfs triggers a deadlock

2016-10-30 Thread James Bottomley
On Sun, 2016-10-30 at 19:22 +, Bart Van Assche wrote:
> On 10/28/16 19:08, James Bottomley wrote:
> > This is a deadlock caused by an inversion issue in kernfs (suicide
> > vs
> > non-suicide removes); so fixing it in SCSI alone really isn't
> > appropriate.  I count at least five other subsystems all using this
> > mechanism, so they'll all be similarly affected.  It looks to be
> > fairly
> > simply fixable inside kernfs, so please fix it that way.
> 
> Hello James,
> 
> Can you clarify this further? To me this looks like the result of how
> the SCSI core works rather than an issue in the kernfs layer.

I'm at a bit of a loss, the problem looks clear from the original
trace, so I'm not really sure what's not clear to you.

The inversion is between the scan mutex and s_active which is the
rather fanciful name Tejun gave to the hand rolled mutex in
kernfs_node.

The reason for the inversion is that s_active is taken when you open a
sysfs file, including the delete one.  There's a special suidice path
to allow that file to be deleted while something else holds the lock. 
 However, if the delete path also takes any lock, and there's a way to
get into delete not via writing to sysfs (which is pretty much
universally true) then you get an inversion because kernfs_node mutex
is also taken when the file is removed, which is why it's not specific
to scsi.

Since you press the issue, I've got to say I'm not a huge fan of trying
to escape from a lock inversion by making some path asynchronous
because it usually leads to even more problems on down the road.  If
there's some problem with the generic fix, there is a way of fixing
this in SCSI without introducing asynchronicity.

James

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Avoid that SCSI device removal through sysfs triggers a deadlock

2016-10-30 Thread Bart Van Assche
On 10/28/16 19:08, James Bottomley wrote:
> This is a deadlock caused by an inversion issue in kernfs (suicide vs
> non-suicide removes); so fixing it in SCSI alone really isn't
> appropriate.  I count at least five other subsystems all using this
> mechanism, so they'll all be similarly affected.  It looks to be fairly
> simply fixable inside kernfs, so please fix it that way.

Hello James,

Can you clarify this further? To me this looks like the result of how 
the SCSI core works rather than an issue in the kernfs layer. My 
interpretation of the deadlock report produced by the lockdep code is as 
follows:
* The SCSI scanning code holds scan_mutex while creating sysfs
   attributes for a SCSI device. In this case scan_mutex is the outer
   mutex and s_active the inner locking object.
* scsi_remove_host() holds scan_mutex while removing sysfs attributes.
   Also in this case scan_mutex is the outer mutex and s_active the
   inner locking object.
* During self-removal (sysfs_remove_file_self() being called indirectly
   by kernfs_fop_write()), kernfs_fop_write() holds s_active while
   scsi_remove_device() is being called. In this case s_active is the
   outer locking object and scan_mutex the inner locking object.

I think that it is essential that kernfs_fop_write() holds s_active. So 
to me this looks like a lock inversion issue that cannot be fixed by 
modifying kernfs only. In other words, the SCSI core has to be modified 
to fix this. Do you agree with this?

Thanks,

Bart.

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Avoid that SCSI device removal through sysfs triggers a deadlock

2016-10-28 Thread James Bottomley
On Wed, 2016-10-26 at 11:44 -0700, Bart Van Assche wrote:
> The solution I prefer is to modify the SCSI scanning code such that
> the scan_mutex is only held while performing the actual LUN scanning
> and while ensuring that no SCSI device has been created yet for a
> certain LUN number but not while the Linux device and its sysfs
> attributes are created. Since that approach would require extensive
> changes in the SCSI scanning code, another approach has been chosen,
> namely to make self-removal asynchronous. This patch avoids that
> self-removal triggers the following deadlock:
> 
> ==
> [ INFO: possible circular locking dependency detected ]
> 4.9.0-rc1-dbg+ #4 Not tainted
> ---
> test_02_sdev_de/12586 is trying to acquire lock:
>  (>scan_mutex){+.+.+.}, at: []
> scsi_remove_device+0x1e/0x40
> but task is already holding lock:
>  (s_active#336){.+}, at: []
> kernfs_remove_self+0xde/0x140
> which lock already depends on the new lock.
> 
> the existing dependency chain (in reverse order) is:
> -> #1 (s_active#336){.+}:
> [] lock_acquire+0xe9/0x1d0
> [] __kernfs_remove+0x24a/0x310
> [] kernfs_remove_by_name_ns+0x40/0x90
> [] remove_files.isra.1+0x30/0x70
> [] sysfs_remove_group+0x3f/0x90
> [] sysfs_remove_groups+0x29/0x40
> [] device_remove_attrs+0x59/0x80
> [] device_del+0x125/0x240
> [] __scsi_remove_device+0x143/0x180
> [] scsi_forget_host+0x64/0x70
> [] scsi_remove_host+0x75/0x120
> [] 0xa035dbbb
> [] process_one_work+0x1f5/0x690
> [] worker_thread+0x49/0x490
> [] kthread+0xeb/0x110
> [] ret_from_fork+0x27/0x40
> 
> -> #0 (>scan_mutex){+.+.+.}:
> [] __lock_acquire+0x10fc/0x1270
> [] lock_acquire+0xe9/0x1d0
> [] mutex_lock_nested+0x5f/0x360
> [] scsi_remove_device+0x1e/0x40
> [] sdev_store_delete+0x22/0x30
> [] dev_attr_store+0x13/0x20
> [] sysfs_kf_write+0x40/0x50
> [] kernfs_fop_write+0x137/0x1c0
> [] __vfs_write+0x23/0x140
> [] vfs_write+0xb0/0x190
> [] SyS_write+0x44/0xa0
> [] entry_SYSCALL_64_fastpath+0x18/0xad
> 
> other info that might help us debug this:
> 
>  Possible unsafe locking scenario:
>CPU0CPU1
>
>   lock(s_active#336);
>lock(>scan_mutex);
>lock(s_active#336);
>   lock(>scan_mutex);
> 
>  *** DEADLOCK ***

This is a deadlock caused by an inversion issue in kernfs (suicide vs
non-suicide removes); so fixing it in SCSI alone really isn't
appropriate.  I count at least five other subsystems all using this
mechanism, so they'll all be similarly affected.  It looks to be fairly
simply fixable inside kernfs, so please fix it that way.

Thanks,

James


--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Avoid that SCSI device removal through sysfs triggers a deadlock

2016-10-28 Thread Johannes Thumshirn
On Thu, Oct 27, 2016 at 08:38:03AM -0700, Bart Van Assche wrote:
> On 10/27/2016 02:46 AM, Johannes Thumshirn wrote:
> > On Wed, Oct 26, 2016 at 11:44:51AM -0700, Bart Van Assche wrote:
> > > +static void scsi_remove_device_async(struct scsi_device *sdev)
> > > +{
> > > + if (scsi_device_get(sdev) < 0)
> > 
> > Nit: the < 0 could be dropped, scsi_device_get returns either -ENXIO or
> > 0. But no reason to respin.
> > 
> > Anyways,
> > Reviewed-by: Johannes Thumshirn 
> 
> Hello Johannes,
> 
> Thanks for the review. But I'd like to clarify that I added the "< 0" on
> purpose. Some *_get*() functions in the Linux kernel return 0 upon failure
> (e.g. kref_get_unless_zero()) and others a negative value (e.g.
> scsi_device_get()). The "< 0" part avoids that someone who reads this code
> has to look up what return value convention scsi_device_get() uses.

OK, that makes sense to me.

Johannes

-- 
Johannes Thumshirn  Storage
jthumsh...@suse.de+49 911 74053 689
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: Felix Imendörffer, Jane Smithard, Graham Norton
HRB 21284 (AG Nürnberg)
Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Avoid that SCSI device removal through sysfs triggers a deadlock

2016-10-27 Thread Bart Van Assche

On 10/27/2016 02:36 AM, Sagi Grimberg wrote:

The solution I prefer is to modify the SCSI scanning code such that
the scan_mutex is only held while performing the actual LUN scanning
and while ensuring that no SCSI device has been created yet for a
certain LUN number but not while the Linux device and its sysfs
attributes are created. Since that approach would require extensive
changes in the SCSI scanning code, another approach has been chosen,
namely to make self-removal asynchronous. This patch avoids that
self-removal triggers the following deadlock:


Is this a real deadlock? or just a lockdep complaint?

Wouldn't making scsi_remove_device() taking single depth
mutex_lock_nested suffice here?


Hello Sagi,

It's a real deadlock, and I can trigger it relatively easy by deleting a 
SCSI device that is managed by the dm-multipath driver.


Bart.
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Avoid that SCSI device removal through sysfs triggers a deadlock

2016-10-27 Thread Bart Van Assche

On 10/27/2016 02:46 AM, Johannes Thumshirn wrote:

On Wed, Oct 26, 2016 at 11:44:51AM -0700, Bart Van Assche wrote:

+static void scsi_remove_device_async(struct scsi_device *sdev)
+{
+   if (scsi_device_get(sdev) < 0)


Nit: the < 0 could be dropped, scsi_device_get returns either -ENXIO or
0. But no reason to respin.

Anyways,
Reviewed-by: Johannes Thumshirn 


Hello Johannes,

Thanks for the review. But I'd like to clarify that I added the "< 0" on 
purpose. Some *_get*() functions in the Linux kernel return 0 upon 
failure (e.g. kref_get_unless_zero()) and others a negative value (e.g. 
scsi_device_get()). The "< 0" part avoids that someone who reads this 
code has to look up what return value convention scsi_device_get() uses.


Bart.

--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Avoid that SCSI device removal through sysfs triggers a deadlock

2016-10-27 Thread Sagi Grimberg

Hey Bart,


The solution I prefer is to modify the SCSI scanning code such that
the scan_mutex is only held while performing the actual LUN scanning
and while ensuring that no SCSI device has been created yet for a
certain LUN number but not while the Linux device and its sysfs
attributes are created. Since that approach would require extensive
changes in the SCSI scanning code, another approach has been chosen,
namely to make self-removal asynchronous. This patch avoids that
self-removal triggers the following deadlock:


Is this a real deadlock? or just a lockdep complaint?

Wouldn't making scsi_remove_device() taking single depth
mutex_lock_nested suffice here?
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Avoid that SCSI device removal through sysfs triggers a deadlock

2016-10-27 Thread Johannes Thumshirn
Hi Bart,

On Wed, Oct 26, 2016 at 11:44:51AM -0700, Bart Van Assche wrote:
> The solution I prefer is to modify the SCSI scanning code such that
> the scan_mutex is only held while performing the actual LUN scanning
> and while ensuring that no SCSI device has been created yet for a
> certain LUN number but not while the Linux device and its sysfs
> attributes are created. Since that approach would require extensive
> changes in the SCSI scanning code, another approach has been chosen,
> namely to make self-removal asynchronous. This patch avoids that
> self-removal triggers the following deadlock:

I'm not sure I like yet another asynchronity in the scsi code, but your
reason is very much understandable and I agree that this is the faster way.

[...]

> 
> References: http://www.spinics.net/lists/linux-scsi/msg86551.html
> Signed-off-by: Bart Van Assche 
> Cc: Hannes Reinecke 
> Cc: Johannes Thumshirn 
> Cc: Sagi Grimberg 
> Cc: 
> ---

[...]

>  
> +static void scsi_remove_device_async(struct scsi_device *sdev)
> +{
> + if (scsi_device_get(sdev) < 0)

Nit: the < 0 could be dropped, scsi_device_get returns either -ENXIO or
0. But no reason to respin.

Anyways,
Reviewed-by: Johannes Thumshirn 

-- 
Johannes Thumshirn  Storage
jthumsh...@suse.de+49 911 74053 689
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: Felix Imendörffer, Jane Smithard, Graham Norton
HRB 21284 (AG Nürnberg)
Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Avoid that SCSI device removal through sysfs triggers a deadlock

2016-10-26 Thread Bart Van Assche
The solution I prefer is to modify the SCSI scanning code such that
the scan_mutex is only held while performing the actual LUN scanning
and while ensuring that no SCSI device has been created yet for a
certain LUN number but not while the Linux device and its sysfs
attributes are created. Since that approach would require extensive
changes in the SCSI scanning code, another approach has been chosen,
namely to make self-removal asynchronous. This patch avoids that
self-removal triggers the following deadlock:

==
[ INFO: possible circular locking dependency detected ]
4.9.0-rc1-dbg+ #4 Not tainted
---
test_02_sdev_de/12586 is trying to acquire lock:
 (>scan_mutex){+.+.+.}, at: [] 
scsi_remove_device+0x1e/0x40
but task is already holding lock:
 (s_active#336){.+}, at: [] kernfs_remove_self+0xde/0x140
which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:
-> #1 (s_active#336){.+}:
[] lock_acquire+0xe9/0x1d0
[] __kernfs_remove+0x24a/0x310
[] kernfs_remove_by_name_ns+0x40/0x90
[] remove_files.isra.1+0x30/0x70
[] sysfs_remove_group+0x3f/0x90
[] sysfs_remove_groups+0x29/0x40
[] device_remove_attrs+0x59/0x80
[] device_del+0x125/0x240
[] __scsi_remove_device+0x143/0x180
[] scsi_forget_host+0x64/0x70
[] scsi_remove_host+0x75/0x120
[] 0xa035dbbb
[] process_one_work+0x1f5/0x690
[] worker_thread+0x49/0x490
[] kthread+0xeb/0x110
[] ret_from_fork+0x27/0x40

-> #0 (>scan_mutex){+.+.+.}:
[] __lock_acquire+0x10fc/0x1270
[] lock_acquire+0xe9/0x1d0
[] mutex_lock_nested+0x5f/0x360
[] scsi_remove_device+0x1e/0x40
[] sdev_store_delete+0x22/0x30
[] dev_attr_store+0x13/0x20
[] sysfs_kf_write+0x40/0x50
[] kernfs_fop_write+0x137/0x1c0
[] __vfs_write+0x23/0x140
[] vfs_write+0xb0/0x190
[] SyS_write+0x44/0xa0
[] entry_SYSCALL_64_fastpath+0x18/0xad

other info that might help us debug this:

 Possible unsafe locking scenario:
   CPU0CPU1
   
  lock(s_active#336);
   lock(>scan_mutex);
   lock(s_active#336);
  lock(>scan_mutex);

 *** DEADLOCK ***
3 locks held by test_02_sdev_de/12586:
 #0:  (sb_writers#4){.+.+.+}, at: [] vfs_write+0x178/0x190
 #1:  (>mutex){+.+.+.}, at: [] 
kernfs_fop_write+0x101/0x1c0
 #2:  (s_active#336){.+}, at: [] 
kernfs_remove_self+0xde/0x140

stack backtrace:
CPU: 4 PID: 12586 Comm: test_02_sdev_de Not tainted 4.9.0-rc1-dbg+ #4
Call Trace:
 [] dump_stack+0x68/0x93
 [] print_circular_bug+0x1be/0x210
 [] __lock_acquire+0x10fc/0x1270
 [] lock_acquire+0xe9/0x1d0
 [] mutex_lock_nested+0x5f/0x360
 [] scsi_remove_device+0x1e/0x40
 [] sdev_store_delete+0x22/0x30
 [] dev_attr_store+0x13/0x20
 [] sysfs_kf_write+0x40/0x50
 [] kernfs_fop_write+0x137/0x1c0
 [] __vfs_write+0x23/0x140
 [] vfs_write+0xb0/0x190
 [] SyS_write+0x44/0xa0
 [] entry_SYSCALL_64_fastpath+0x18/0xad

References: http://www.spinics.net/lists/linux-scsi/msg86551.html
Signed-off-by: Bart Van Assche 
Cc: Hannes Reinecke 
Cc: Johannes Thumshirn 
Cc: Sagi Grimberg 
Cc: 
---
 drivers/scsi/scsi_priv.h   |  1 +
 drivers/scsi/scsi_scan.c   |  1 +
 drivers/scsi/scsi_sysfs.c  | 19 ++-
 include/scsi/scsi_device.h |  2 ++
 4 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h
index 77bf611..66646c7 100644
--- a/drivers/scsi/scsi_priv.h
+++ b/drivers/scsi/scsi_priv.h
@@ -142,6 +142,7 @@ extern void scsi_sysfs_device_initialize(struct scsi_device 
*);
 extern int scsi_sysfs_target_initialize(struct scsi_device *);
 extern struct scsi_transport_template blank_transport_template;
 extern void __scsi_remove_device(struct scsi_device *);
+extern void scsi_remove_device_work(struct work_struct *work);
 
 extern struct bus_type scsi_bus_type;
 extern const struct attribute_group *scsi_sysfs_shost_attr_groups[];
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 6f7128f..2b11061 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -241,6 +241,7 @@ static struct scsi_device *scsi_alloc_sdev(struct 
scsi_target *starget,
mutex_init(>inquiry_mutex);
INIT_WORK(>event_work, scsi_evt_thread);
INIT_WORK(>requeue_work, scsi_requeue_run_queue);
+   INIT_WORK(>remove_work, scsi_remove_device_work);
 
sdev->sdev_gendev.parent = get_device(>dev);
sdev->sdev_target = starget;
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 0af9c91..d5a7a92 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -705,12 +705,20 @@ store_rescan_field (struct device *dev, struct 
device_attribute *attr,
 }
 static DEVICE_ATTR(rescan, S_IWUSR, NULL, store_rescan_field);
 
+static void scsi_remove_device_async(struct scsi_device *sdev)