Re: [PATCH v2] driver core: Fix sleeping in invalid context during device link deletion

2020-07-17 Thread Saravana Kannan
On Fri, Jul 17, 2020 at 9:34 AM Guenter Roeck  wrote:
>
> On Fri, Jul 17, 2020 at 12:13:04AM +0200, Marek Szyprowski wrote:
> > Hi Saravana,
> >
> > On 16.07.2020 23:45, Saravana Kannan wrote:
> > > Marek and Guenter reported that commit 287905e68dd2 ("driver core:
> > > Expose device link details in sysfs") caused sleeping/scheduling while
> > > atomic warnings.
> > >
> > > BUG: sleeping function called from invalid context at 
> > > kernel/locking/mutex.c:935
> > > in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 12, name: 
> > > kworker/0:1
> > > 2 locks held by kworker/0:1/12:
> > >#0: ee8074a8 ((wq_completion)rcu_gp){+.+.}-{0:0}, at: 
> > > process_one_work+0x174/0x7dc
> > >#1: ee921f20 ((work_completion)(>work)){+.+.}-{0:0}, at: 
> > > process_one_work+0x174/0x7dc
> > > Preemption disabled at:
> > > [] srcu_invoke_callbacks+0xc0/0x154
> > > - 8< - SNIP
> > > [] (device_del) from [] (device_unregister+0x24/0x64)
> > > [] (device_unregister) from [] 
> > > (srcu_invoke_callbacks+0xcc/0x154)
> > > [] (srcu_invoke_callbacks) from [] 
> > > (process_one_work+0x234/0x7dc)
> > > [] (process_one_work) from [] 
> > > (worker_thread+0x44/0x51c)
> > > [] (worker_thread) from [] (kthread+0x158/0x1a0)
> > > [] (kthread) from [] (ret_from_fork+0x14/0x20)
> > > Exception stack(0xee921fb0 to 0xee921ff8)
> > >
> > > This was caused by the device link device being released in the context
> > > of srcu_invoke_callbacks().  There is no need to wait till the RCU
> > > callback to release the device link device.  So release the device
> > > earlier and move the call_srcu() into the device release code. That way,
> > > the memory will get freed only after the device is released AND the RCU
> > > callback is called.
> > >
> > > Fixes: 287905e68dd2 ("driver core: Expose device link details in sysfs")
> > > Reported-by: Marek Szyprowski 
> > > Reported-by: Guenter Roeck 
> > > Signed-off-by: Saravana Kannan 
> > > ---
> > >
> > > v1->v2:
> > > - Better fix
> > > - Changed subject
> > > - v1 is this patch 
> > > https://lore.kernel.org/lkml/20200716050846.2047110-1-sarava...@google.com/
> > >
> > > Marek and Guenter,
> > >
> > > I reproduced the original issue and tested this fix. Seems to work for
> > > me. Can you confirm?
> >
> > Confirmed, this one fixes the issue! :)
> >
> Same here.
>
> Tested-by: Guenter Roeck 
>
> Guenter
>
> > Tested-by: Marek Szyprowski 
> >

Thanks both.

Greg, can you review this and pull this into driver-core-next too?

-Saravana


Re: [PATCH v2] driver core: Fix sleeping in invalid context during device link deletion

2020-07-17 Thread Guenter Roeck
On Fri, Jul 17, 2020 at 12:13:04AM +0200, Marek Szyprowski wrote:
> Hi Saravana,
> 
> On 16.07.2020 23:45, Saravana Kannan wrote:
> > Marek and Guenter reported that commit 287905e68dd2 ("driver core:
> > Expose device link details in sysfs") caused sleeping/scheduling while
> > atomic warnings.
> >
> > BUG: sleeping function called from invalid context at 
> > kernel/locking/mutex.c:935
> > in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 12, name: kworker/0:1
> > 2 locks held by kworker/0:1/12:
> >#0: ee8074a8 ((wq_completion)rcu_gp){+.+.}-{0:0}, at: 
> > process_one_work+0x174/0x7dc
> >#1: ee921f20 ((work_completion)(>work)){+.+.}-{0:0}, at: 
> > process_one_work+0x174/0x7dc
> > Preemption disabled at:
> > [] srcu_invoke_callbacks+0xc0/0x154
> > - 8< - SNIP
> > [] (device_del) from [] (device_unregister+0x24/0x64)
> > [] (device_unregister) from [] 
> > (srcu_invoke_callbacks+0xcc/0x154)
> > [] (srcu_invoke_callbacks) from [] 
> > (process_one_work+0x234/0x7dc)
> > [] (process_one_work) from [] (worker_thread+0x44/0x51c)
> > [] (worker_thread) from [] (kthread+0x158/0x1a0)
> > [] (kthread) from [] (ret_from_fork+0x14/0x20)
> > Exception stack(0xee921fb0 to 0xee921ff8)
> >
> > This was caused by the device link device being released in the context
> > of srcu_invoke_callbacks().  There is no need to wait till the RCU
> > callback to release the device link device.  So release the device
> > earlier and move the call_srcu() into the device release code. That way,
> > the memory will get freed only after the device is released AND the RCU
> > callback is called.
> >
> > Fixes: 287905e68dd2 ("driver core: Expose device link details in sysfs")
> > Reported-by: Marek Szyprowski 
> > Reported-by: Guenter Roeck 
> > Signed-off-by: Saravana Kannan 
> > ---
> >
> > v1->v2:
> > - Better fix
> > - Changed subject
> > - v1 is this patch 
> > https://lore.kernel.org/lkml/20200716050846.2047110-1-sarava...@google.com/
> >
> > Marek and Guenter,
> >
> > I reproduced the original issue and tested this fix. Seems to work for
> > me. Can you confirm?
> 
> Confirmed, this one fixes the issue! :)
> 
Same here.

Tested-by: Guenter Roeck 

Guenter

> Tested-by: Marek Szyprowski 
> 
> > Thanks,
> > Saravana
> >
> >   drivers/base/core.c | 45 +++--
> >   1 file changed, 27 insertions(+), 18 deletions(-)
> >
> > diff --git a/drivers/base/core.c b/drivers/base/core.c
> > index 5373ddd029f6..ec16b97d45ed 100644
> > --- a/drivers/base/core.c
> > +++ b/drivers/base/core.c
> > @@ -306,10 +306,34 @@ static struct attribute *devlink_attrs[] = {
> >   };
> >   ATTRIBUTE_GROUPS(devlink);
> >   
> > +static void device_link_free(struct device_link *link)
> > +{
> > +   while (refcount_dec_not_one(>rpm_active))
> > +   pm_runtime_put(link->supplier);
> > +
> > +   put_device(link->consumer);
> > +   put_device(link->supplier);
> > +   kfree(link);
> > +}
> > +
> > +#ifdef CONFIG_SRCU
> > +static void __device_link_free_srcu(struct rcu_head *rhead)
> > +{
> > +   device_link_free(container_of(rhead, struct device_link, rcu_head));
> > +}
> > +
> >   static void devlink_dev_release(struct device *dev)
> >   {
> > -   kfree(to_devlink(dev));
> > +   struct device_link *link = to_devlink(dev);
> > +
> > +   call_srcu(_links_srcu, >rcu_head, __device_link_free_srcu);
> >   }
> > +#else
> > +static void devlink_dev_release(struct device *dev)
> > +{
> > +   device_link_free(to_devlink(dev));
> > +}
> > +#endif
> >   
> >   static struct class devlink_class = {
> > .name = "devlink",
> > @@ -730,22 +754,7 @@ static void 
> > device_link_add_missing_supplier_links(void)
> > mutex_unlock(_lock);
> >   }
> >   
> > -static void device_link_free(struct device_link *link)
> > -{
> > -   while (refcount_dec_not_one(>rpm_active))
> > -   pm_runtime_put(link->supplier);
> > -
> > -   put_device(link->consumer);
> > -   put_device(link->supplier);
> > -   device_unregister(>link_dev);
> > -}
> > -
> >   #ifdef CONFIG_SRCU
> > -static void __device_link_free_srcu(struct rcu_head *rhead)
> > -{
> > -   device_link_free(container_of(rhead, struct device_link, rcu_head));
> > -}
> > -
> >   static void __device_link_del(struct kref *kref)
> >   {
> > struct device_link *link = container_of(kref, struct device_link, kref);
> > @@ -758,7 +767,7 @@ static void __device_link_del(struct kref *kref)
> >   
> > list_del_rcu(>s_node);
> > list_del_rcu(>c_node);
> > -   call_srcu(_links_srcu, >rcu_head, __device_link_free_srcu);
> > +   device_unregister(>link_dev);
> >   }
> >   #else /* !CONFIG_SRCU */
> >   static void __device_link_del(struct kref *kref)
> > @@ -773,7 +782,7 @@ static void __device_link_del(struct kref *kref)
> >   
> > list_del(>s_node);
> > list_del(>c_node);
> > -   device_link_free(link);
> > +   device_unregister(>link_dev);
> >   }
> >   #endif /* !CONFIG_SRCU */
> >   
> 
> Best regards
> -- 
> Marek Szyprowski, PhD
> Samsung R 

Re: [PATCH v2] driver core: Fix sleeping in invalid context during device link deletion

2020-07-16 Thread Saravana Kannan
On Thu, Jul 16, 2020 at 3:13 PM Marek Szyprowski
 wrote:
>
> Hi Saravana,
>
> On 16.07.2020 23:45, Saravana Kannan wrote:
> > Marek and Guenter reported that commit 287905e68dd2 ("driver core:
> > Expose device link details in sysfs") caused sleeping/scheduling while
> > atomic warnings.
> >
> > BUG: sleeping function called from invalid context at 
> > kernel/locking/mutex.c:935
> > in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 12, name: kworker/0:1
> > 2 locks held by kworker/0:1/12:
> >#0: ee8074a8 ((wq_completion)rcu_gp){+.+.}-{0:0}, at: 
> > process_one_work+0x174/0x7dc
> >#1: ee921f20 ((work_completion)(>work)){+.+.}-{0:0}, at: 
> > process_one_work+0x174/0x7dc
> > Preemption disabled at:
> > [] srcu_invoke_callbacks+0xc0/0x154
> > - 8< - SNIP
> > [] (device_del) from [] (device_unregister+0x24/0x64)
> > [] (device_unregister) from [] 
> > (srcu_invoke_callbacks+0xcc/0x154)
> > [] (srcu_invoke_callbacks) from [] 
> > (process_one_work+0x234/0x7dc)
> > [] (process_one_work) from [] (worker_thread+0x44/0x51c)
> > [] (worker_thread) from [] (kthread+0x158/0x1a0)
> > [] (kthread) from [] (ret_from_fork+0x14/0x20)
> > Exception stack(0xee921fb0 to 0xee921ff8)
> >
> > This was caused by the device link device being released in the context
> > of srcu_invoke_callbacks().  There is no need to wait till the RCU
> > callback to release the device link device.  So release the device
> > earlier and move the call_srcu() into the device release code. That way,
> > the memory will get freed only after the device is released AND the RCU
> > callback is called.
> >
> > Fixes: 287905e68dd2 ("driver core: Expose device link details in sysfs")
> > Reported-by: Marek Szyprowski 
> > Reported-by: Guenter Roeck 
> > Signed-off-by: Saravana Kannan 
> > ---
> >
> > v1->v2:
> > - Better fix
> > - Changed subject
> > - v1 is this patch 
> > https://lore.kernel.org/lkml/20200716050846.2047110-1-sarava...@google.com/
> >
> > Marek and Guenter,
> >
> > I reproduced the original issue and tested this fix. Seems to work for
> > me. Can you confirm?
>
> Confirmed, this one fixes the issue! :)
>
> Tested-by: Marek Szyprowski 

Thanks!

-Saravana


Re: [PATCH v2] driver core: Fix sleeping in invalid context during device link deletion

2020-07-16 Thread Marek Szyprowski
Hi Saravana,

On 16.07.2020 23:45, Saravana Kannan wrote:
> Marek and Guenter reported that commit 287905e68dd2 ("driver core:
> Expose device link details in sysfs") caused sleeping/scheduling while
> atomic warnings.
>
> BUG: sleeping function called from invalid context at 
> kernel/locking/mutex.c:935
> in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 12, name: kworker/0:1
> 2 locks held by kworker/0:1/12:
>#0: ee8074a8 ((wq_completion)rcu_gp){+.+.}-{0:0}, at: 
> process_one_work+0x174/0x7dc
>#1: ee921f20 ((work_completion)(>work)){+.+.}-{0:0}, at: 
> process_one_work+0x174/0x7dc
> Preemption disabled at:
> [] srcu_invoke_callbacks+0xc0/0x154
> - 8< - SNIP
> [] (device_del) from [] (device_unregister+0x24/0x64)
> [] (device_unregister) from [] 
> (srcu_invoke_callbacks+0xcc/0x154)
> [] (srcu_invoke_callbacks) from [] 
> (process_one_work+0x234/0x7dc)
> [] (process_one_work) from [] (worker_thread+0x44/0x51c)
> [] (worker_thread) from [] (kthread+0x158/0x1a0)
> [] (kthread) from [] (ret_from_fork+0x14/0x20)
> Exception stack(0xee921fb0 to 0xee921ff8)
>
> This was caused by the device link device being released in the context
> of srcu_invoke_callbacks().  There is no need to wait till the RCU
> callback to release the device link device.  So release the device
> earlier and move the call_srcu() into the device release code. That way,
> the memory will get freed only after the device is released AND the RCU
> callback is called.
>
> Fixes: 287905e68dd2 ("driver core: Expose device link details in sysfs")
> Reported-by: Marek Szyprowski 
> Reported-by: Guenter Roeck 
> Signed-off-by: Saravana Kannan 
> ---
>
> v1->v2:
> - Better fix
> - Changed subject
> - v1 is this patch 
> https://lore.kernel.org/lkml/20200716050846.2047110-1-sarava...@google.com/
>
> Marek and Guenter,
>
> I reproduced the original issue and tested this fix. Seems to work for
> me. Can you confirm?

Confirmed, this one fixes the issue! :)

Tested-by: Marek Szyprowski 

> Thanks,
> Saravana
>
>   drivers/base/core.c | 45 +++--
>   1 file changed, 27 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/base/core.c b/drivers/base/core.c
> index 5373ddd029f6..ec16b97d45ed 100644
> --- a/drivers/base/core.c
> +++ b/drivers/base/core.c
> @@ -306,10 +306,34 @@ static struct attribute *devlink_attrs[] = {
>   };
>   ATTRIBUTE_GROUPS(devlink);
>   
> +static void device_link_free(struct device_link *link)
> +{
> + while (refcount_dec_not_one(>rpm_active))
> + pm_runtime_put(link->supplier);
> +
> + put_device(link->consumer);
> + put_device(link->supplier);
> + kfree(link);
> +}
> +
> +#ifdef CONFIG_SRCU
> +static void __device_link_free_srcu(struct rcu_head *rhead)
> +{
> + device_link_free(container_of(rhead, struct device_link, rcu_head));
> +}
> +
>   static void devlink_dev_release(struct device *dev)
>   {
> - kfree(to_devlink(dev));
> + struct device_link *link = to_devlink(dev);
> +
> + call_srcu(_links_srcu, >rcu_head, __device_link_free_srcu);
>   }
> +#else
> +static void devlink_dev_release(struct device *dev)
> +{
> + device_link_free(to_devlink(dev));
> +}
> +#endif
>   
>   static struct class devlink_class = {
>   .name = "devlink",
> @@ -730,22 +754,7 @@ static void device_link_add_missing_supplier_links(void)
>   mutex_unlock(_lock);
>   }
>   
> -static void device_link_free(struct device_link *link)
> -{
> - while (refcount_dec_not_one(>rpm_active))
> - pm_runtime_put(link->supplier);
> -
> - put_device(link->consumer);
> - put_device(link->supplier);
> - device_unregister(>link_dev);
> -}
> -
>   #ifdef CONFIG_SRCU
> -static void __device_link_free_srcu(struct rcu_head *rhead)
> -{
> - device_link_free(container_of(rhead, struct device_link, rcu_head));
> -}
> -
>   static void __device_link_del(struct kref *kref)
>   {
>   struct device_link *link = container_of(kref, struct device_link, kref);
> @@ -758,7 +767,7 @@ static void __device_link_del(struct kref *kref)
>   
>   list_del_rcu(>s_node);
>   list_del_rcu(>c_node);
> - call_srcu(_links_srcu, >rcu_head, __device_link_free_srcu);
> + device_unregister(>link_dev);
>   }
>   #else /* !CONFIG_SRCU */
>   static void __device_link_del(struct kref *kref)
> @@ -773,7 +782,7 @@ static void __device_link_del(struct kref *kref)
>   
>   list_del(>s_node);
>   list_del(>c_node);
> - device_link_free(link);
> + device_unregister(>link_dev);
>   }
>   #endif /* !CONFIG_SRCU */
>   

Best regards
-- 
Marek Szyprowski, PhD
Samsung R Institute Poland



[PATCH v2] driver core: Fix sleeping in invalid context during device link deletion

2020-07-16 Thread Saravana Kannan
Marek and Guenter reported that commit 287905e68dd2 ("driver core:
Expose device link details in sysfs") caused sleeping/scheduling while
atomic warnings.

BUG: sleeping function called from invalid context at kernel/locking/mutex.c:935
in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 12, name: kworker/0:1
2 locks held by kworker/0:1/12:
  #0: ee8074a8 ((wq_completion)rcu_gp){+.+.}-{0:0}, at: 
process_one_work+0x174/0x7dc
  #1: ee921f20 ((work_completion)(>work)){+.+.}-{0:0}, at: 
process_one_work+0x174/0x7dc
Preemption disabled at:
[] srcu_invoke_callbacks+0xc0/0x154
- 8< - SNIP
[] (device_del) from [] (device_unregister+0x24/0x64)
[] (device_unregister) from [] 
(srcu_invoke_callbacks+0xcc/0x154)
[] (srcu_invoke_callbacks) from [] 
(process_one_work+0x234/0x7dc)
[] (process_one_work) from [] (worker_thread+0x44/0x51c)
[] (worker_thread) from [] (kthread+0x158/0x1a0)
[] (kthread) from [] (ret_from_fork+0x14/0x20)
Exception stack(0xee921fb0 to 0xee921ff8)

This was caused by the device link device being released in the context
of srcu_invoke_callbacks().  There is no need to wait till the RCU
callback to release the device link device.  So release the device
earlier and move the call_srcu() into the device release code. That way,
the memory will get freed only after the device is released AND the RCU
callback is called.

Fixes: 287905e68dd2 ("driver core: Expose device link details in sysfs")
Reported-by: Marek Szyprowski 
Reported-by: Guenter Roeck 
Signed-off-by: Saravana Kannan 
---

v1->v2:
- Better fix
- Changed subject
- v1 is this patch 
https://lore.kernel.org/lkml/20200716050846.2047110-1-sarava...@google.com/

Marek and Guenter,

I reproduced the original issue and tested this fix. Seems to work for
me. Can you confirm?

Thanks,
Saravana

 drivers/base/core.c | 45 +++--
 1 file changed, 27 insertions(+), 18 deletions(-)

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 5373ddd029f6..ec16b97d45ed 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -306,10 +306,34 @@ static struct attribute *devlink_attrs[] = {
 };
 ATTRIBUTE_GROUPS(devlink);
 
+static void device_link_free(struct device_link *link)
+{
+   while (refcount_dec_not_one(>rpm_active))
+   pm_runtime_put(link->supplier);
+
+   put_device(link->consumer);
+   put_device(link->supplier);
+   kfree(link);
+}
+
+#ifdef CONFIG_SRCU
+static void __device_link_free_srcu(struct rcu_head *rhead)
+{
+   device_link_free(container_of(rhead, struct device_link, rcu_head));
+}
+
 static void devlink_dev_release(struct device *dev)
 {
-   kfree(to_devlink(dev));
+   struct device_link *link = to_devlink(dev);
+
+   call_srcu(_links_srcu, >rcu_head, __device_link_free_srcu);
 }
+#else
+static void devlink_dev_release(struct device *dev)
+{
+   device_link_free(to_devlink(dev));
+}
+#endif
 
 static struct class devlink_class = {
.name = "devlink",
@@ -730,22 +754,7 @@ static void device_link_add_missing_supplier_links(void)
mutex_unlock(_lock);
 }
 
-static void device_link_free(struct device_link *link)
-{
-   while (refcount_dec_not_one(>rpm_active))
-   pm_runtime_put(link->supplier);
-
-   put_device(link->consumer);
-   put_device(link->supplier);
-   device_unregister(>link_dev);
-}
-
 #ifdef CONFIG_SRCU
-static void __device_link_free_srcu(struct rcu_head *rhead)
-{
-   device_link_free(container_of(rhead, struct device_link, rcu_head));
-}
-
 static void __device_link_del(struct kref *kref)
 {
struct device_link *link = container_of(kref, struct device_link, kref);
@@ -758,7 +767,7 @@ static void __device_link_del(struct kref *kref)
 
list_del_rcu(>s_node);
list_del_rcu(>c_node);
-   call_srcu(_links_srcu, >rcu_head, __device_link_free_srcu);
+   device_unregister(>link_dev);
 }
 #else /* !CONFIG_SRCU */
 static void __device_link_del(struct kref *kref)
@@ -773,7 +782,7 @@ static void __device_link_del(struct kref *kref)
 
list_del(>s_node);
list_del(>c_node);
-   device_link_free(link);
+   device_unregister(>link_dev);
 }
 #endif /* !CONFIG_SRCU */
 
-- 
2.28.0.rc0.105.gf9edc3c819-goog