Re: [PATCH v2] driver core: Fix sleeping in invalid context during device link deletion
On Fri, Jul 17, 2020 at 9:34 AM Guenter Roeck wrote: > > On Fri, Jul 17, 2020 at 12:13:04AM +0200, Marek Szyprowski wrote: > > Hi Saravana, > > > > On 16.07.2020 23:45, Saravana Kannan wrote: > > > Marek and Guenter reported that commit 287905e68dd2 ("driver core: > > > Expose device link details in sysfs") caused sleeping/scheduling while > > > atomic warnings. > > > > > > BUG: sleeping function called from invalid context at > > > kernel/locking/mutex.c:935 > > > in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 12, name: > > > kworker/0:1 > > > 2 locks held by kworker/0:1/12: > > >#0: ee8074a8 ((wq_completion)rcu_gp){+.+.}-{0:0}, at: > > > process_one_work+0x174/0x7dc > > >#1: ee921f20 ((work_completion)(>work)){+.+.}-{0:0}, at: > > > process_one_work+0x174/0x7dc > > > Preemption disabled at: > > > [] srcu_invoke_callbacks+0xc0/0x154 > > > - 8< - SNIP > > > [] (device_del) from [] (device_unregister+0x24/0x64) > > > [] (device_unregister) from [] > > > (srcu_invoke_callbacks+0xcc/0x154) > > > [] (srcu_invoke_callbacks) from [] > > > (process_one_work+0x234/0x7dc) > > > [] (process_one_work) from [] > > > (worker_thread+0x44/0x51c) > > > [] (worker_thread) from [] (kthread+0x158/0x1a0) > > > [] (kthread) from [] (ret_from_fork+0x14/0x20) > > > Exception stack(0xee921fb0 to 0xee921ff8) > > > > > > This was caused by the device link device being released in the context > > > of srcu_invoke_callbacks(). There is no need to wait till the RCU > > > callback to release the device link device. So release the device > > > earlier and move the call_srcu() into the device release code. That way, > > > the memory will get freed only after the device is released AND the RCU > > > callback is called. > > > > > > Fixes: 287905e68dd2 ("driver core: Expose device link details in sysfs") > > > Reported-by: Marek Szyprowski > > > Reported-by: Guenter Roeck > > > Signed-off-by: Saravana Kannan > > > --- > > > > > > v1->v2: > > > - Better fix > > > - Changed subject > > > - v1 is this patch > > > https://lore.kernel.org/lkml/20200716050846.2047110-1-sarava...@google.com/ > > > > > > Marek and Guenter, > > > > > > I reproduced the original issue and tested this fix. Seems to work for > > > me. Can you confirm? > > > > Confirmed, this one fixes the issue! :) > > > Same here. > > Tested-by: Guenter Roeck > > Guenter > > > Tested-by: Marek Szyprowski > > Thanks both. Greg, can you review this and pull this into driver-core-next too? -Saravana
Re: [PATCH v2] driver core: Fix sleeping in invalid context during device link deletion
On Fri, Jul 17, 2020 at 12:13:04AM +0200, Marek Szyprowski wrote: > Hi Saravana, > > On 16.07.2020 23:45, Saravana Kannan wrote: > > Marek and Guenter reported that commit 287905e68dd2 ("driver core: > > Expose device link details in sysfs") caused sleeping/scheduling while > > atomic warnings. > > > > BUG: sleeping function called from invalid context at > > kernel/locking/mutex.c:935 > > in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 12, name: kworker/0:1 > > 2 locks held by kworker/0:1/12: > >#0: ee8074a8 ((wq_completion)rcu_gp){+.+.}-{0:0}, at: > > process_one_work+0x174/0x7dc > >#1: ee921f20 ((work_completion)(>work)){+.+.}-{0:0}, at: > > process_one_work+0x174/0x7dc > > Preemption disabled at: > > [] srcu_invoke_callbacks+0xc0/0x154 > > - 8< - SNIP > > [] (device_del) from [] (device_unregister+0x24/0x64) > > [] (device_unregister) from [] > > (srcu_invoke_callbacks+0xcc/0x154) > > [] (srcu_invoke_callbacks) from [] > > (process_one_work+0x234/0x7dc) > > [] (process_one_work) from [] (worker_thread+0x44/0x51c) > > [] (worker_thread) from [] (kthread+0x158/0x1a0) > > [] (kthread) from [] (ret_from_fork+0x14/0x20) > > Exception stack(0xee921fb0 to 0xee921ff8) > > > > This was caused by the device link device being released in the context > > of srcu_invoke_callbacks(). There is no need to wait till the RCU > > callback to release the device link device. So release the device > > earlier and move the call_srcu() into the device release code. That way, > > the memory will get freed only after the device is released AND the RCU > > callback is called. > > > > Fixes: 287905e68dd2 ("driver core: Expose device link details in sysfs") > > Reported-by: Marek Szyprowski > > Reported-by: Guenter Roeck > > Signed-off-by: Saravana Kannan > > --- > > > > v1->v2: > > - Better fix > > - Changed subject > > - v1 is this patch > > https://lore.kernel.org/lkml/20200716050846.2047110-1-sarava...@google.com/ > > > > Marek and Guenter, > > > > I reproduced the original issue and tested this fix. Seems to work for > > me. Can you confirm? > > Confirmed, this one fixes the issue! :) > Same here. Tested-by: Guenter Roeck Guenter > Tested-by: Marek Szyprowski > > > Thanks, > > Saravana > > > > drivers/base/core.c | 45 +++-- > > 1 file changed, 27 insertions(+), 18 deletions(-) > > > > diff --git a/drivers/base/core.c b/drivers/base/core.c > > index 5373ddd029f6..ec16b97d45ed 100644 > > --- a/drivers/base/core.c > > +++ b/drivers/base/core.c > > @@ -306,10 +306,34 @@ static struct attribute *devlink_attrs[] = { > > }; > > ATTRIBUTE_GROUPS(devlink); > > > > +static void device_link_free(struct device_link *link) > > +{ > > + while (refcount_dec_not_one(>rpm_active)) > > + pm_runtime_put(link->supplier); > > + > > + put_device(link->consumer); > > + put_device(link->supplier); > > + kfree(link); > > +} > > + > > +#ifdef CONFIG_SRCU > > +static void __device_link_free_srcu(struct rcu_head *rhead) > > +{ > > + device_link_free(container_of(rhead, struct device_link, rcu_head)); > > +} > > + > > static void devlink_dev_release(struct device *dev) > > { > > - kfree(to_devlink(dev)); > > + struct device_link *link = to_devlink(dev); > > + > > + call_srcu(_links_srcu, >rcu_head, __device_link_free_srcu); > > } > > +#else > > +static void devlink_dev_release(struct device *dev) > > +{ > > + device_link_free(to_devlink(dev)); > > +} > > +#endif > > > > static struct class devlink_class = { > > .name = "devlink", > > @@ -730,22 +754,7 @@ static void > > device_link_add_missing_supplier_links(void) > > mutex_unlock(_lock); > > } > > > > -static void device_link_free(struct device_link *link) > > -{ > > - while (refcount_dec_not_one(>rpm_active)) > > - pm_runtime_put(link->supplier); > > - > > - put_device(link->consumer); > > - put_device(link->supplier); > > - device_unregister(>link_dev); > > -} > > - > > #ifdef CONFIG_SRCU > > -static void __device_link_free_srcu(struct rcu_head *rhead) > > -{ > > - device_link_free(container_of(rhead, struct device_link, rcu_head)); > > -} > > - > > static void __device_link_del(struct kref *kref) > > { > > struct device_link *link = container_of(kref, struct device_link, kref); > > @@ -758,7 +767,7 @@ static void __device_link_del(struct kref *kref) > > > > list_del_rcu(>s_node); > > list_del_rcu(>c_node); > > - call_srcu(_links_srcu, >rcu_head, __device_link_free_srcu); > > + device_unregister(>link_dev); > > } > > #else /* !CONFIG_SRCU */ > > static void __device_link_del(struct kref *kref) > > @@ -773,7 +782,7 @@ static void __device_link_del(struct kref *kref) > > > > list_del(>s_node); > > list_del(>c_node); > > - device_link_free(link); > > + device_unregister(>link_dev); > > } > > #endif /* !CONFIG_SRCU */ > > > > Best regards > -- > Marek Szyprowski, PhD > Samsung R
Re: [PATCH v2] driver core: Fix sleeping in invalid context during device link deletion
On Thu, Jul 16, 2020 at 3:13 PM Marek Szyprowski wrote: > > Hi Saravana, > > On 16.07.2020 23:45, Saravana Kannan wrote: > > Marek and Guenter reported that commit 287905e68dd2 ("driver core: > > Expose device link details in sysfs") caused sleeping/scheduling while > > atomic warnings. > > > > BUG: sleeping function called from invalid context at > > kernel/locking/mutex.c:935 > > in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 12, name: kworker/0:1 > > 2 locks held by kworker/0:1/12: > >#0: ee8074a8 ((wq_completion)rcu_gp){+.+.}-{0:0}, at: > > process_one_work+0x174/0x7dc > >#1: ee921f20 ((work_completion)(>work)){+.+.}-{0:0}, at: > > process_one_work+0x174/0x7dc > > Preemption disabled at: > > [] srcu_invoke_callbacks+0xc0/0x154 > > - 8< - SNIP > > [] (device_del) from [] (device_unregister+0x24/0x64) > > [] (device_unregister) from [] > > (srcu_invoke_callbacks+0xcc/0x154) > > [] (srcu_invoke_callbacks) from [] > > (process_one_work+0x234/0x7dc) > > [] (process_one_work) from [] (worker_thread+0x44/0x51c) > > [] (worker_thread) from [] (kthread+0x158/0x1a0) > > [] (kthread) from [] (ret_from_fork+0x14/0x20) > > Exception stack(0xee921fb0 to 0xee921ff8) > > > > This was caused by the device link device being released in the context > > of srcu_invoke_callbacks(). There is no need to wait till the RCU > > callback to release the device link device. So release the device > > earlier and move the call_srcu() into the device release code. That way, > > the memory will get freed only after the device is released AND the RCU > > callback is called. > > > > Fixes: 287905e68dd2 ("driver core: Expose device link details in sysfs") > > Reported-by: Marek Szyprowski > > Reported-by: Guenter Roeck > > Signed-off-by: Saravana Kannan > > --- > > > > v1->v2: > > - Better fix > > - Changed subject > > - v1 is this patch > > https://lore.kernel.org/lkml/20200716050846.2047110-1-sarava...@google.com/ > > > > Marek and Guenter, > > > > I reproduced the original issue and tested this fix. Seems to work for > > me. Can you confirm? > > Confirmed, this one fixes the issue! :) > > Tested-by: Marek Szyprowski Thanks! -Saravana
Re: [PATCH v2] driver core: Fix sleeping in invalid context during device link deletion
Hi Saravana, On 16.07.2020 23:45, Saravana Kannan wrote: > Marek and Guenter reported that commit 287905e68dd2 ("driver core: > Expose device link details in sysfs") caused sleeping/scheduling while > atomic warnings. > > BUG: sleeping function called from invalid context at > kernel/locking/mutex.c:935 > in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 12, name: kworker/0:1 > 2 locks held by kworker/0:1/12: >#0: ee8074a8 ((wq_completion)rcu_gp){+.+.}-{0:0}, at: > process_one_work+0x174/0x7dc >#1: ee921f20 ((work_completion)(>work)){+.+.}-{0:0}, at: > process_one_work+0x174/0x7dc > Preemption disabled at: > [] srcu_invoke_callbacks+0xc0/0x154 > - 8< - SNIP > [] (device_del) from [] (device_unregister+0x24/0x64) > [] (device_unregister) from [] > (srcu_invoke_callbacks+0xcc/0x154) > [] (srcu_invoke_callbacks) from [] > (process_one_work+0x234/0x7dc) > [] (process_one_work) from [] (worker_thread+0x44/0x51c) > [] (worker_thread) from [] (kthread+0x158/0x1a0) > [] (kthread) from [] (ret_from_fork+0x14/0x20) > Exception stack(0xee921fb0 to 0xee921ff8) > > This was caused by the device link device being released in the context > of srcu_invoke_callbacks(). There is no need to wait till the RCU > callback to release the device link device. So release the device > earlier and move the call_srcu() into the device release code. That way, > the memory will get freed only after the device is released AND the RCU > callback is called. > > Fixes: 287905e68dd2 ("driver core: Expose device link details in sysfs") > Reported-by: Marek Szyprowski > Reported-by: Guenter Roeck > Signed-off-by: Saravana Kannan > --- > > v1->v2: > - Better fix > - Changed subject > - v1 is this patch > https://lore.kernel.org/lkml/20200716050846.2047110-1-sarava...@google.com/ > > Marek and Guenter, > > I reproduced the original issue and tested this fix. Seems to work for > me. Can you confirm? Confirmed, this one fixes the issue! :) Tested-by: Marek Szyprowski > Thanks, > Saravana > > drivers/base/core.c | 45 +++-- > 1 file changed, 27 insertions(+), 18 deletions(-) > > diff --git a/drivers/base/core.c b/drivers/base/core.c > index 5373ddd029f6..ec16b97d45ed 100644 > --- a/drivers/base/core.c > +++ b/drivers/base/core.c > @@ -306,10 +306,34 @@ static struct attribute *devlink_attrs[] = { > }; > ATTRIBUTE_GROUPS(devlink); > > +static void device_link_free(struct device_link *link) > +{ > + while (refcount_dec_not_one(>rpm_active)) > + pm_runtime_put(link->supplier); > + > + put_device(link->consumer); > + put_device(link->supplier); > + kfree(link); > +} > + > +#ifdef CONFIG_SRCU > +static void __device_link_free_srcu(struct rcu_head *rhead) > +{ > + device_link_free(container_of(rhead, struct device_link, rcu_head)); > +} > + > static void devlink_dev_release(struct device *dev) > { > - kfree(to_devlink(dev)); > + struct device_link *link = to_devlink(dev); > + > + call_srcu(_links_srcu, >rcu_head, __device_link_free_srcu); > } > +#else > +static void devlink_dev_release(struct device *dev) > +{ > + device_link_free(to_devlink(dev)); > +} > +#endif > > static struct class devlink_class = { > .name = "devlink", > @@ -730,22 +754,7 @@ static void device_link_add_missing_supplier_links(void) > mutex_unlock(_lock); > } > > -static void device_link_free(struct device_link *link) > -{ > - while (refcount_dec_not_one(>rpm_active)) > - pm_runtime_put(link->supplier); > - > - put_device(link->consumer); > - put_device(link->supplier); > - device_unregister(>link_dev); > -} > - > #ifdef CONFIG_SRCU > -static void __device_link_free_srcu(struct rcu_head *rhead) > -{ > - device_link_free(container_of(rhead, struct device_link, rcu_head)); > -} > - > static void __device_link_del(struct kref *kref) > { > struct device_link *link = container_of(kref, struct device_link, kref); > @@ -758,7 +767,7 @@ static void __device_link_del(struct kref *kref) > > list_del_rcu(>s_node); > list_del_rcu(>c_node); > - call_srcu(_links_srcu, >rcu_head, __device_link_free_srcu); > + device_unregister(>link_dev); > } > #else /* !CONFIG_SRCU */ > static void __device_link_del(struct kref *kref) > @@ -773,7 +782,7 @@ static void __device_link_del(struct kref *kref) > > list_del(>s_node); > list_del(>c_node); > - device_link_free(link); > + device_unregister(>link_dev); > } > #endif /* !CONFIG_SRCU */ > Best regards -- Marek Szyprowski, PhD Samsung R Institute Poland
[PATCH v2] driver core: Fix sleeping in invalid context during device link deletion
Marek and Guenter reported that commit 287905e68dd2 ("driver core: Expose device link details in sysfs") caused sleeping/scheduling while atomic warnings. BUG: sleeping function called from invalid context at kernel/locking/mutex.c:935 in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 12, name: kworker/0:1 2 locks held by kworker/0:1/12: #0: ee8074a8 ((wq_completion)rcu_gp){+.+.}-{0:0}, at: process_one_work+0x174/0x7dc #1: ee921f20 ((work_completion)(>work)){+.+.}-{0:0}, at: process_one_work+0x174/0x7dc Preemption disabled at: [] srcu_invoke_callbacks+0xc0/0x154 - 8< - SNIP [] (device_del) from [] (device_unregister+0x24/0x64) [] (device_unregister) from [] (srcu_invoke_callbacks+0xcc/0x154) [] (srcu_invoke_callbacks) from [] (process_one_work+0x234/0x7dc) [] (process_one_work) from [] (worker_thread+0x44/0x51c) [] (worker_thread) from [] (kthread+0x158/0x1a0) [] (kthread) from [] (ret_from_fork+0x14/0x20) Exception stack(0xee921fb0 to 0xee921ff8) This was caused by the device link device being released in the context of srcu_invoke_callbacks(). There is no need to wait till the RCU callback to release the device link device. So release the device earlier and move the call_srcu() into the device release code. That way, the memory will get freed only after the device is released AND the RCU callback is called. Fixes: 287905e68dd2 ("driver core: Expose device link details in sysfs") Reported-by: Marek Szyprowski Reported-by: Guenter Roeck Signed-off-by: Saravana Kannan --- v1->v2: - Better fix - Changed subject - v1 is this patch https://lore.kernel.org/lkml/20200716050846.2047110-1-sarava...@google.com/ Marek and Guenter, I reproduced the original issue and tested this fix. Seems to work for me. Can you confirm? Thanks, Saravana drivers/base/core.c | 45 +++-- 1 file changed, 27 insertions(+), 18 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 5373ddd029f6..ec16b97d45ed 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -306,10 +306,34 @@ static struct attribute *devlink_attrs[] = { }; ATTRIBUTE_GROUPS(devlink); +static void device_link_free(struct device_link *link) +{ + while (refcount_dec_not_one(>rpm_active)) + pm_runtime_put(link->supplier); + + put_device(link->consumer); + put_device(link->supplier); + kfree(link); +} + +#ifdef CONFIG_SRCU +static void __device_link_free_srcu(struct rcu_head *rhead) +{ + device_link_free(container_of(rhead, struct device_link, rcu_head)); +} + static void devlink_dev_release(struct device *dev) { - kfree(to_devlink(dev)); + struct device_link *link = to_devlink(dev); + + call_srcu(_links_srcu, >rcu_head, __device_link_free_srcu); } +#else +static void devlink_dev_release(struct device *dev) +{ + device_link_free(to_devlink(dev)); +} +#endif static struct class devlink_class = { .name = "devlink", @@ -730,22 +754,7 @@ static void device_link_add_missing_supplier_links(void) mutex_unlock(_lock); } -static void device_link_free(struct device_link *link) -{ - while (refcount_dec_not_one(>rpm_active)) - pm_runtime_put(link->supplier); - - put_device(link->consumer); - put_device(link->supplier); - device_unregister(>link_dev); -} - #ifdef CONFIG_SRCU -static void __device_link_free_srcu(struct rcu_head *rhead) -{ - device_link_free(container_of(rhead, struct device_link, rcu_head)); -} - static void __device_link_del(struct kref *kref) { struct device_link *link = container_of(kref, struct device_link, kref); @@ -758,7 +767,7 @@ static void __device_link_del(struct kref *kref) list_del_rcu(>s_node); list_del_rcu(>c_node); - call_srcu(_links_srcu, >rcu_head, __device_link_free_srcu); + device_unregister(>link_dev); } #else /* !CONFIG_SRCU */ static void __device_link_del(struct kref *kref) @@ -773,7 +782,7 @@ static void __device_link_del(struct kref *kref) list_del(>s_node); list_del(>c_node); - device_link_free(link); + device_unregister(>link_dev); } #endif /* !CONFIG_SRCU */ -- 2.28.0.rc0.105.gf9edc3c819-goog