Re: [PATCH v2] block: Ensure that a request queue is dissociated from the cgroup controller

2018-04-12 Thread Alexandru Moise
On Thu, Apr 12, 2018 at 01:11:11PM -0600, Bart Van Assche wrote:
> Several block drivers call alloc_disk() followed by put_disk() if
> something fails before device_add_disk() is called without calling
> blk_cleanup_queue(). Make sure that also for this scenario a request
> queue is dissociated from the cgroup controller. This patch avoids
> that loading the parport_pc, paride and pf drivers triggers the
> following kernel crash:
> 
> BUG: KASAN: null-ptr-deref in pi_init+0x42e/0x580 [paride]
> Read of size 4 at addr 0008 by task modprobe/744
> Call Trace:
> dump_stack+0x9a/0xeb
> kasan_report+0x139/0x350
> pi_init+0x42e/0x580 [paride]
> pf_init+0x2bb/0x1000 [pf]
> do_one_initcall+0x8e/0x405
> do_init_module+0xd9/0x2f2
> load_module+0x3ab4/0x4700
> SYSC_finit_module+0x176/0x1a0
> do_syscall_64+0xee/0x2b0
> entry_SYSCALL_64_after_hwframe+0x42/0xb7
> 
> Reported-by: Alexandru Moise <00moses.alexande...@gmail.com>
> Fixes: a063057d7c73 ("block: Fix a race between request queue removal and the 
> block cgroup controller")
> Signed-off-by: Bart Van Assche <bart.vanass...@wdc.com>
> Tested-by: Alexandru Moise <00moses.alexande...@gmail.com>
> Cc: Tejun Heo <t...@kernel.org>
> Cc: Alexandru Moise <00moses.alexande...@gmail.com>
> Cc: Joseph Qi <joseph...@linux.alibaba.com>
> ---
> 
> Changes compared to v1:
> - Surrounded use of blkcg_root with #ifdef CONFIG_BLK_CGROUP / #endif.
> - Added Alex' Tested-by.
> 
>  block/blk-sysfs.c | 31 +++
>  1 file changed, 31 insertions(+)
> 
> diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
> index f8457d6f0190..46bb932721dc 100644
> --- a/block/blk-sysfs.c
> +++ b/block/blk-sysfs.c
> @@ -793,6 +793,37 @@ static void __blk_release_queue(struct work_struct *work)
>   blk_stat_remove_callback(q, q->poll_cb);
>   blk_stat_free_callback(q->poll_cb);
>  
> + if (!blk_queue_dead(q)) {
> + /*
> +  * Last reference was dropped without having called
> +  * blk_cleanup_queue().
> +  */
> + WARN_ONCE(blk_queue_init_done(q),
> +   "request queue %p has been registered but 
> blk_cleanup_queue() has not been called for that queue\n",
> +   q);
> + bdi_put(q->backing_dev_info);
> + blkcg_exit_queue(q);
> +
> + if (q->elevator) {
> + ioc_clear_queue(q);
> + elevator_exit(q, q->elevator);
> + }
> + }
> +
> +#ifdef CONFIG_BLK_CGROUP
> + {
> + struct blkcg_gq *blkg;
> +
> + rcu_read_lock();
> + blkg = blkg_lookup(_root, q);
> + rcu_read_unlock();
> +
> + WARN(blkg,
> +  "request queue %p is being released but it has not yet 
> been removed from the blkcg controller\n",
> +  q);
> + }
> +#endif
> +
>   blk_free_queue_stats(q->stats);
>  
>   blk_exit_rl(q, >root_rl);
I'm not qualified enough to review it but build and boot are good.

Tested-by: Alexandru Moise <00moses.alexande...@gmail.com>

Thanks,
../Alex
> -- 
> 2.16.2
> 


Re: [PATCH] block: Ensure that a request queue is dissociated from the cgroup controller

2018-04-11 Thread Alexandru Moise
On Thu, Apr 12, 2018 at 06:20:34AM +0200, Alexandru Moise wrote:
> On Wed, Apr 11, 2018 at 07:58:52PM -0600, Bart Van Assche wrote:
> > Several block drivers call alloc_disk() followed by put_disk() if
> > something fails before device_add_disk() is called without calling
> > blk_cleanup_queue(). Make sure that also for this scenario a request
> > queue is dissociated from the cgroup controller. This patch avoids
> > that loading the parport_pc, paride and pf drivers triggers the
> > following kernel crash:
> > 
> > BUG: KASAN: null-ptr-deref in pi_init+0x42e/0x580 [paride]
> > Read of size 4 at addr 0008 by task modprobe/744
> > Call Trace:
> > dump_stack+0x9a/0xeb
> > kasan_report+0x139/0x350
> > pi_init+0x42e/0x580 [paride]
> > pf_init+0x2bb/0x1000 [pf]
> > do_one_initcall+0x8e/0x405
> > do_init_module+0xd9/0x2f2
> > load_module+0x3ab4/0x4700
> > SYSC_finit_module+0x176/0x1a0
> > do_syscall_64+0xee/0x2b0
> > entry_SYSCALL_64_after_hwframe+0x42/0xb7
> > 
> > Reported-by: Alexandru Moise <00moses.alexande...@gmail.com>
> > Fixes: a063057d7c73 ("block: Fix a race between request queue removal and 
> > the block cgroup controller")
> > Signed-off-by: Bart Van Assche <bart.vanass...@wdc.com>
> > Cc: Alexandru Moise <00moses.alexande...@gmail.com>
> > Cc: Tejun Heo <t...@kernel.org>
> > ---
> >  block/blk-sysfs.c | 26 ++
> >  1 file changed, 26 insertions(+)
> > 
> > diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
> > index f8457d6f0190..2e134da78f82 100644
> > --- a/block/blk-sysfs.c
> > +++ b/block/blk-sysfs.c
> > @@ -788,11 +788,37 @@ static void blk_free_queue_rcu(struct rcu_head 
> > *rcu_head)
> >  static void __blk_release_queue(struct work_struct *work)
> >  {
> > struct request_queue *q = container_of(work, typeof(*q), release_work);
> > +   struct blkcg_gq *gq;
> >  
> > if (test_bit(QUEUE_FLAG_POLL_STATS, >queue_flags))
> > blk_stat_remove_callback(q, q->poll_cb);
> > blk_stat_free_callback(q->poll_cb);
> >  
> > +   if (!blk_queue_dead(q)) {
> > +   /*
> > +* Last reference was dropped without having called
> > +* blk_cleanup_queue().
> > +*/
> > +   WARN_ONCE(blk_queue_init_done(q),
> > + "request queue %p has been registered but 
> > blk_cleanup_queue() has not been called for that queue\n",
> > + q);
> > +   bdi_put(q->backing_dev_info);
> > +   blkcg_exit_queue(q);
> > +
> > +   if (q->elevator) {
> > +   ioc_clear_queue(q);
> > +   elevator_exit(q, q->elevator);
> > +   }
> > +   }
> > +
> > +   rcu_read_lock();
> > +   gq = blkg_lookup(_root, q);
> > +   rcu_read_unlock();
> > +
> > +   WARN(gq,
> > +"request queue %p is being released but it has not yet been 
> > removed from the blkcg controller\n",
> > +q);
> > +
> > blk_free_queue_stats(q->stats);
> 
> This solution is good. Thanks for fixing this!
> 
> Tested-by: Alexandru Moise <00moses.alexande...@gmail.com>
> 
> ../Alex

Oh, you also need the no-ops for !CONFIG_BLK_CGROUP, also there is no
blkcg_root then as well.

Thanks,
../Alex
> 
> >  
> > blk_exit_rl(q, >root_rl);
> > -- 
> > 2.16.2
> > 


Re: [PATCH] block: Ensure that a request queue is dissociated from the cgroup controller

2018-04-11 Thread Alexandru Moise
On Wed, Apr 11, 2018 at 07:58:52PM -0600, Bart Van Assche wrote:
> Several block drivers call alloc_disk() followed by put_disk() if
> something fails before device_add_disk() is called without calling
> blk_cleanup_queue(). Make sure that also for this scenario a request
> queue is dissociated from the cgroup controller. This patch avoids
> that loading the parport_pc, paride and pf drivers triggers the
> following kernel crash:
> 
> BUG: KASAN: null-ptr-deref in pi_init+0x42e/0x580 [paride]
> Read of size 4 at addr 0008 by task modprobe/744
> Call Trace:
> dump_stack+0x9a/0xeb
> kasan_report+0x139/0x350
> pi_init+0x42e/0x580 [paride]
> pf_init+0x2bb/0x1000 [pf]
> do_one_initcall+0x8e/0x405
> do_init_module+0xd9/0x2f2
> load_module+0x3ab4/0x4700
> SYSC_finit_module+0x176/0x1a0
> do_syscall_64+0xee/0x2b0
> entry_SYSCALL_64_after_hwframe+0x42/0xb7
> 
> Reported-by: Alexandru Moise <00moses.alexande...@gmail.com>
> Fixes: a063057d7c73 ("block: Fix a race between request queue removal and the 
> block cgroup controller")
> Signed-off-by: Bart Van Assche <bart.vanass...@wdc.com>
> Cc: Alexandru Moise <00moses.alexande...@gmail.com>
> Cc: Tejun Heo <t...@kernel.org>
> ---
>  block/blk-sysfs.c | 26 ++
>  1 file changed, 26 insertions(+)
> 
> diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
> index f8457d6f0190..2e134da78f82 100644
> --- a/block/blk-sysfs.c
> +++ b/block/blk-sysfs.c
> @@ -788,11 +788,37 @@ static void blk_free_queue_rcu(struct rcu_head 
> *rcu_head)
>  static void __blk_release_queue(struct work_struct *work)
>  {
>   struct request_queue *q = container_of(work, typeof(*q), release_work);
> + struct blkcg_gq *gq;
>  
>   if (test_bit(QUEUE_FLAG_POLL_STATS, >queue_flags))
>   blk_stat_remove_callback(q, q->poll_cb);
>   blk_stat_free_callback(q->poll_cb);
>  
> + if (!blk_queue_dead(q)) {
> + /*
> +  * Last reference was dropped without having called
> +  * blk_cleanup_queue().
> +  */
> + WARN_ONCE(blk_queue_init_done(q),
> +   "request queue %p has been registered but 
> blk_cleanup_queue() has not been called for that queue\n",
> +   q);
> + bdi_put(q->backing_dev_info);
> + blkcg_exit_queue(q);
> +
> + if (q->elevator) {
> + ioc_clear_queue(q);
> + elevator_exit(q, q->elevator);
> + }
> + }
> +
> + rcu_read_lock();
> + gq = blkg_lookup(_root, q);
> + rcu_read_unlock();
> +
> + WARN(gq,
> +  "request queue %p is being released but it has not yet been 
> removed from the blkcg controller\n",
> +  q);
> +
>   blk_free_queue_stats(q->stats);

This solution is good. Thanks for fixing this!

Tested-by: Alexandru Moise <00moses.alexande...@gmail.com>

../Alex

>  
>   blk_exit_rl(q, >root_rl);
> -- 
> 2.16.2
> 


Re: [PATCH] block: Ensure that a request queue is dissociated from the cgroup controller

2018-04-11 Thread Alexandru Moise
On Wed, Apr 11, 2018 at 07:58:52PM -0600, Bart Van Assche wrote:
> Several block drivers call alloc_disk() followed by put_disk() if
> something fails before device_add_disk() is called without calling
> blk_cleanup_queue(). Make sure that also for this scenario a request
> queue is dissociated from the cgroup controller. This patch avoids
> that loading the parport_pc, paride and pf drivers triggers the
> following kernel crash:
> 
> BUG: KASAN: null-ptr-deref in pi_init+0x42e/0x580 [paride]
> Read of size 4 at addr 0008 by task modprobe/744
> Call Trace:
> dump_stack+0x9a/0xeb
> kasan_report+0x139/0x350
> pi_init+0x42e/0x580 [paride]
> pf_init+0x2bb/0x1000 [pf]
> do_one_initcall+0x8e/0x405
> do_init_module+0xd9/0x2f2
> load_module+0x3ab4/0x4700
> SYSC_finit_module+0x176/0x1a0
> do_syscall_64+0xee/0x2b0
> entry_SYSCALL_64_after_hwframe+0x42/0xb7
> 
> Reported-by: Alexandru Moise <00moses.alexande...@gmail.com>
> Fixes: a063057d7c73 ("block: Fix a race between request queue removal and the 
> block cgroup controller")
> Signed-off-by: Bart Van Assche <bart.vanass...@wdc.com>
> Cc: Alexandru Moise <00moses.alexande...@gmail.com>
> Cc: Tejun Heo <t...@kernel.org>
> ---
>  block/blk-sysfs.c | 26 ++
>  1 file changed, 26 insertions(+)
> 
> diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
> index f8457d6f0190..2e134da78f82 100644
> --- a/block/blk-sysfs.c
> +++ b/block/blk-sysfs.c
> @@ -788,11 +788,37 @@ static void blk_free_queue_rcu(struct rcu_head 
> *rcu_head)
>  static void __blk_release_queue(struct work_struct *work)
>  {
>   struct request_queue *q = container_of(work, typeof(*q), release_work);
> + struct blkcg_gq *gq;
>  
>   if (test_bit(QUEUE_FLAG_POLL_STATS, >queue_flags))
>   blk_stat_remove_callback(q, q->poll_cb);
>   blk_stat_free_callback(q->poll_cb);
>  
> + if (!blk_queue_dead(q)) {
> + /*
> +  * Last reference was dropped without having called
> +  * blk_cleanup_queue().
> +  */
> + WARN_ONCE(blk_queue_init_done(q),
> +   "request queue %p has been registered but 
> blk_cleanup_queue() has not been called for that queue\n",
> +   q);
> + bdi_put(q->backing_dev_info);
> + blkcg_exit_queue(q);
> +
> + if (q->elevator) {
> + ioc_clear_queue(q);
> + elevator_exit(q, q->elevator);
> + }
> + }
> +
> + rcu_read_lock();
> + gq = blkg_lookup(_root, q);
> + rcu_read_unlock();
> +
> + WARN(gq,
> +  "request queue %p is being released but it has not yet been 
> removed from the blkcg controller\n",
> +  q);
> +
>   blk_free_queue_stats(q->stats);

This solution is good. Thanks for fixing this!

Tested-by: Alexandru Moise <00moses.alexande...@gmail.com>

../Alex

>  
>   blk_exit_rl(q, >root_rl);
> -- 
> 2.16.2
> 


Re: [PATCH v2] blk-cgroup: remove entries in blkg_tree before queue release

2018-04-11 Thread Alexandru Moise
On Wed, Apr 11, 2018 at 01:55:25PM -0600, Bart Van Assche wrote:
> On 04/11/18 13:00, Alexandru Moise wrote:
> > But the root cause of it is in blkcg_init_queue() when blkg_create() returns
> > an ERR ptr, because it tries to insert into a populated index into 
> > blkcg->blkg_tree,
> > the entry that we fail to remove at __blk_release_queue().
> 
> Hello Alex,
> 
> Had you considered something like the untested patch below?
> 
> Thanks,
> 
> Bart.
> 
> 
> diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
> index 1c16694ae145..f2ced19e74b8 100644
> --- a/block/blk-cgroup.c
> +++ b/block/blk-cgroup.c
> @@ -1191,14 +1191,17 @@ int blkcg_init_queue(struct request_queue *q)
>   if (preloaded)
>   radix_tree_preload_end();
> 
> - if (IS_ERR(blkg))
> - return PTR_ERR(blkg);
> + if (IS_ERR(blkg)) {
> + ret = PTR_ERR(blkg);
> + goto destroy_all;
> + }
> 
>   q->root_blkg = blkg;
>   q->root_rl.blkg = blkg;
> 
>   ret = blk_throtl_init(q);
>   if (ret) {
> +destroy_all:
>   spin_lock_irq(q->queue_lock);
>   blkg_destroy_all(q);
>   spin_unlock_irq(q->queue_lock);
> 

Hi, I tested it, it doesn't solve the problem.
By the time you get here it's already too late, my patch
prevents this from failing in the first place.

I would have liked this more than my solution though.

../Alex



Re: [PATCH v2] blk-cgroup: remove entries in blkg_tree before queue release

2018-04-11 Thread Alexandru Moise
On Wed, Apr 11, 2018 at 03:54:53PM +, Bart Van Assche wrote:
> On Wed, 2018-04-11 at 16:28 +0200, Alexandru Moise wrote:
> > [0.76] BUG: unable to handle kernel NULL pointer dereference at 
> > 01b4
> > [0.763350] Kernel panic - not syncing: Attempted to kill init! 
> > exitcode=0x0009
> > [0.763350]
> > [0.76] PGD 0 P4D 0
> > [0.76] Oops:  [#2] PREEMPT SMP
> > [0.76] CPU: 0 PID: 6 Comm: kworker/u12:0 Tainted: G  D  
> > 4.16.0-ARCH+ #81
> > [0.76] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
> > 1.11.0-20171110_100015-anatol 04/01/20$
> > [0.76] Workqueue: nvme-reset-wq nvme_reset_work
> > [0.76] RIP: 0010:blk_queue_flag_set+0xf/0x40
> > [0.76] RSP: :c91bfcb0 EFLAGS: 00010246
> > [0.76] RAX: 88003b698000 RBX:  RCX: 
> > 
> > [0.76] RDX: 88003b698000 RSI: fff4 RDI: 
> > 001c
> > [0.76] RBP: c91bfcc0 R08:  R09: 
> > 
> > [0.76] R10: eaeaa980 R11: 814e0970 R12: 
> > 001c
> > [0.76] R13:  R14:  R15: 
> > 88003aad8010
> > [0.76] FS:  () GS:88003e40() 
> > knlGS:
> > [0.76] CS:  0010 DS:  ES:  CR0: 80050033
> > [0.76] CR2: 01b4 CR3: 02209001 CR4: 
> > 000606f0
> > [0.76] Call Trace:
> > [0.76]  blk_mq_quiesce_queue+0x23/0x80
> > [0.76]  nvme_dev_disable+0x34f/0x480
> > [0.76]  ? nvme_irq+0x50/0x50
> > [0.76]  ? dev_warn+0x64/0x80
> > [0.76]  nvme_reset_work+0x13de/0x1570
> > [0.76]  ? __switch_to_asm+0x34/0x70
> > [0.76]  ? __switch_to_asm+0x40/0x70
> > [0.76]  ? _raw_spin_unlock_irq+0x15/0x30
> > [0.76]  ? finish_task_switch+0x156/0x210
> > [0.76]  process_one_work+0x20c/0x3d0
> > [0.76]  worker_thread+0x216/0x400
> > [0.76]  kthread+0x125/0x130
> > [0.76]  ? process_one_work+0x3d0/0x3d0
> > [0.76]  ? __kthread_bind_mask+0x60/0x60
> > [0.76]  ret_from_fork+0x3a/0x50
> 
> Hello Alexandru,
> 
> What made you look at cgroups? In the above register dump I see that %rbx == 
> NULL.
> I think that means that the queue pointer argument of blk_queue_flag_set() is 
> NULL.
> The NVMe initiator driver should never pass a NULL pointer to 
> blk_mq_quiesce_queue().
> Please ask the NVMe driver maintainers for their opinion on the linux-nvme 
> mailing
> list.
> 
> Thanks,
> 
> Bart.

The %rbx == NULL is only a symptom of the cgroup mishandling, perhaps we could
improve error handling in the NVMe driver, but I can say that about a lot of 
block
drivers actually, perhaps I will write some patches in the future to improve the
error handling.

But the root cause of it is in blkcg_init_queue() when blkg_create() returns
an ERR ptr, because it tries to insert into a populated index into 
blkcg->blkg_tree,
the entry that we fail to remove at __blk_release_queue().

Thanks,
../Alex


> 
> 
> 


Re: [PATCH v2] blk-cgroup: remove entries in blkg_tree before queue release

2018-04-11 Thread Alexandru Moise
On Wed, Apr 11, 2018 at 07:20:19AM -0700, Tejun Heo wrote:
> Hello,
> 
> On Wed, Apr 11, 2018 at 12:12:56PM +0200, Alexandru Moise wrote:
> > > But we already do this through calling blkcg_exit_queue() from
> > > __blk_release_queue().  What's missing?
> > 
> > Hi,
> > 
> > It might be the jetlag but I can't see how you end up calling
> > blkcg_exit_queue() from __blk_release_queue().
> > 
> > As I see it the only way to reach blkcg_exit_queue() is from
> > blk_cleanup_queue(), which I don't see anywhere in __blk_release_queue().
> > 
> > I suspect that I'm just fixing a corner case though and
> > the general case is what you describe or similar.
> 
> Ah, that changed recently.  Can you please check out the current
> upstream git master?
> 
> Thanks.
> 
Just did, without my patch I see this crash:

[0.75] CPU: 4 PID: 1 Comm: swapper/0 Not tainted 4.16.0-ARCH+ #81   
  [7/1949]
[0.75] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
1.11.0-20171110_100015-anatol 04/01/204
[0.75] RIP: 0010:pi_init+0x23f/0x2f0
[0.75] RSP: :c9197d90 EFLAGS: 00010246
[0.75] RAX:  RBX: 0020 RCX: 0038
[0.75] RDX: 0001 RSI: 0001 RDI: 
[0.75] RBP: c9197e18 R08:  R09: 
[0.75] R10: eaeda600 R11: 88003b69f164 R12: 82e2d740
[0.75] R13:  R14:  R15: 
[0.75] FS:  () GS:88003e50() 
knlGS:
[0.75] CS:  0010 DS:  ES:  CR0: 80050033
[0.75] CR2: 0008 CR3: 02209001 CR4: 000606e0
[0.75] Call Trace:
[0.75]  pf_init+0x1db/0x3be
[0.75]  ? pcd_init+0x4e8/0x4e8
[0.75]  do_one_initcall+0x9e/0x1b0
[0.75]  ? do_early_param+0x97/0x97
[0.75]  kernel_init_freeable+0x259/0x2fd
[0.75]  ? rest_init+0xd0/0xd0
[0.75]  ? syscall_slow_exit_work+0x1c/0x160
[0.75]  kernel_init+0xe/0x100
[0.75]  ret_from_fork+0x3a/0x50
[0.75] Code: 75 6a 49 8b 06 48 8b 40 78 48 85 c0 74 08 4c 89 f7 e8 46 
76 51 00 83 c3 01 3b 5d a8 7d 0d 49
[0.75] RIP: pi_init+0x23f/0x2f0 RSP: c9197d90
[0.75] CR2: 0008
[0.75] ---[ end trace 12004f267bb8bf7d ]---
[0.76] BUG: unable to handle kernel NULL pointer dereference at 
01b4
[0.763350] Kernel panic - not syncing: Attempted to kill init! 
exitcode=0x0009
[0.763350]
[0.76] PGD 0 P4D 0
[0.76] Oops:  [#2] PREEMPT SMP
[0.76] CPU: 0 PID: 6 Comm: kworker/u12:0 Tainted: G  D  
4.16.0-ARCH+ #81
[0.76] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
1.11.0-20171110_100015-anatol 04/01/20$
[0.76] Workqueue: nvme-reset-wq nvme_reset_work
[0.76] RIP: 0010:blk_queue_flag_set+0xf/0x40
[0.76] RSP: :c91bfcb0 EFLAGS: 00010246
[0.76] RAX: 88003b698000 RBX:  RCX: 
[0.76] RDX: 88003b698000 RSI: fff4 RDI: 001c
[0.76] RBP: c91bfcc0 R08:  R09: 
[0.76] R10: eaeaa980 R11: 814e0970 R12: 001c
[0.76] R13:  R14:  R15: 88003aad8010
[0.76] FS:  () GS:88003e40() 
knlGS:
[0.76] CS:  0010 DS:  ES:  CR0: 80050033
[0.76] CR2: 01b4 CR3: 02209001 CR4: 000606f0
[0.76] Call Trace:
[0.76]  blk_mq_quiesce_queue+0x23/0x80
[0.76]  nvme_dev_disable+0x34f/0x480
[0.76]  ? nvme_irq+0x50/0x50
[0.76]  ? dev_warn+0x64/0x80
[0.76]  nvme_reset_work+0x13de/0x1570
[0.76]  ? __switch_to_asm+0x34/0x70
[0.76]  ? __switch_to_asm+0x40/0x70
[0.76]  ? _raw_spin_unlock_irq+0x15/0x30
[0.76]  ? finish_task_switch+0x156/0x210
[0.76]  process_one_work+0x20c/0x3d0
[0.76]  worker_thread+0x216/0x400
[0.76]  kthread+0x125/0x130
[0.76]  ? process_one_work+0x3d0/0x3d0
[0.76]  ? __kthread_bind_mask+0x60/0x60
[0.76]  ret_from_fork+0x3a/0x50


With the patch the crash goes away,

Thanks,
../Alex

> -- 
> tejun


Re: [PATCH v2] blk-cgroup: remove entries in blkg_tree before queue release

2018-04-11 Thread Alexandru Moise
On Mon, Apr 09, 2018 at 03:09:38PM -0700, Tejun Heo wrote:
> (cc'ing Joseph as he worked on the area recently, hi!)
> 
> Hello,
> 
> On Sat, Apr 07, 2018 at 12:21:48PM +0200, Alexandru Moise wrote:
> > The q->id is used as an index within the blkg_tree radix tree.
> > 
> > If the entry is not released before reclaiming the blk_queue_ida's id
> > blkcg_init_queue() within a different driver from which this id
> > was originally for can fail due to the entry at that index within
> > the radix tree already existing.
> > 
> > Signed-off-by: Alexandru Moise <00moses.alexande...@gmail.com>
> > ---
> > v2: Added no-op for !CONFIG_BLK_CGROUP
> > 
> >  block/blk-cgroup.c | 2 +-
> >  block/blk-sysfs.c  | 4 
> >  include/linux/blk-cgroup.h | 3 +++
> >  3 files changed, 8 insertions(+), 1 deletion(-)
> > 
> > diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
> > index 1c16694ae145..224e937dbb59 100644
> > --- a/block/blk-cgroup.c
> > +++ b/block/blk-cgroup.c
> > @@ -369,7 +369,7 @@ static void blkg_destroy(struct blkcg_gq *blkg)
> >   *
> >   * Destroy all blkgs associated with @q.
> >   */
> > -static void blkg_destroy_all(struct request_queue *q)
> > +void blkg_destroy_all(struct request_queue *q)
> >  {
> > struct blkcg_gq *blkg, *n;
> >  
> > diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
> > index d00d1b0ec109..a72866458f22 100644
> > --- a/block/blk-sysfs.c
> > +++ b/block/blk-sysfs.c
> > @@ -816,6 +816,10 @@ static void __blk_release_queue(struct work_struct 
> > *work)
> > if (q->bio_split)
> > bioset_free(q->bio_split);
> >  
> > +   spin_lock_irq(q->queue_lock);
> > +   blkg_destroy_all(q);
> > +   spin_unlock_irq(q->queue_lock);
> > +
> > ida_simple_remove(_queue_ida, q->id);
> > call_rcu(>rcu_head, blk_free_queue_rcu);
> 
> But we already do this through calling blkcg_exit_queue() from
> __blk_release_queue().  What's missing?

Hi,

It might be the jetlag but I can't see how you end up calling
blkcg_exit_queue() from __blk_release_queue().

As I see it the only way to reach blkcg_exit_queue() is from
blk_cleanup_queue(), which I don't see anywhere in __blk_release_queue().

I suspect that I'm just fixing a corner case though and
the general case is what you describe or similar.

../Alex
> 
> Thanks.
> 
> -- 
> tejun


[PATCH v2] blk-cgroup: remove entries in blkg_tree before queue release

2018-04-07 Thread Alexandru Moise
The q->id is used as an index within the blkg_tree radix tree.

If the entry is not released before reclaiming the blk_queue_ida's id
blkcg_init_queue() within a different driver from which this id
was originally for can fail due to the entry at that index within
the radix tree already existing.

Signed-off-by: Alexandru Moise <00moses.alexande...@gmail.com>
---
v2: Added no-op for !CONFIG_BLK_CGROUP

 block/blk-cgroup.c | 2 +-
 block/blk-sysfs.c  | 4 
 include/linux/blk-cgroup.h | 3 +++
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 1c16694ae145..224e937dbb59 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -369,7 +369,7 @@ static void blkg_destroy(struct blkcg_gq *blkg)
  *
  * Destroy all blkgs associated with @q.
  */
-static void blkg_destroy_all(struct request_queue *q)
+void blkg_destroy_all(struct request_queue *q)
 {
struct blkcg_gq *blkg, *n;
 
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index d00d1b0ec109..a72866458f22 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -816,6 +816,10 @@ static void __blk_release_queue(struct work_struct *work)
if (q->bio_split)
bioset_free(q->bio_split);
 
+   spin_lock_irq(q->queue_lock);
+   blkg_destroy_all(q);
+   spin_unlock_irq(q->queue_lock);
+
ida_simple_remove(_queue_ida, q->id);
call_rcu(>rcu_head, blk_free_queue_rcu);
 }
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 6c666fd7de3c..3d60b1d1973d 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -179,6 +179,8 @@ struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
 int blkcg_init_queue(struct request_queue *q);
 void blkcg_drain_queue(struct request_queue *q);
 void blkcg_exit_queue(struct request_queue *q);
+void blkg_destroy_all(struct request_queue *q);
+
 
 /* Blkio controller policy registration */
 int blkcg_policy_register(struct blkcg_policy *pol);
@@ -740,6 +742,7 @@ static inline struct blkcg_gq *blkg_lookup(struct blkcg 
*blkcg, void *key) { ret
 static inline int blkcg_init_queue(struct request_queue *q) { return 0; }
 static inline void blkcg_drain_queue(struct request_queue *q) { }
 static inline void blkcg_exit_queue(struct request_queue *q) { }
+static inline void blkg_destroy_all(struct request_queue *q) { }
 static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; }
 static inline void blkcg_policy_unregister(struct blkcg_policy *pol) { }
 static inline int blkcg_activate_policy(struct request_queue *q,
-- 
2.16.2



[PATCH] blk-cgroup: remove entries in blkg_tree before queue release

2018-04-06 Thread Alexandru Moise
The q->id is used as an index within the blkg_tree radix tree.

If the entry is not released before reclaiming the blk_queue_ida's id
blkcg_init_queue() within a different driver from which this id
was originally for can fail due to the entry at that index within
the radix tree already existing.

Signed-off-by: Alexandru Moise <00moses.alexande...@gmail.com>
---
 block/blk-cgroup.c | 2 +-
 block/blk-sysfs.c  | 4 
 include/linux/blk-cgroup.h | 2 ++
 3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 1c16694ae145..224e937dbb59 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -369,7 +369,7 @@ static void blkg_destroy(struct blkcg_gq *blkg)
  *
  * Destroy all blkgs associated with @q.
  */
-static void blkg_destroy_all(struct request_queue *q)
+void blkg_destroy_all(struct request_queue *q)
 {
struct blkcg_gq *blkg, *n;
 
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index d00d1b0ec109..a72866458f22 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -816,6 +816,10 @@ static void __blk_release_queue(struct work_struct *work)
if (q->bio_split)
bioset_free(q->bio_split);
 
+   spin_lock_irq(q->queue_lock);
+   blkg_destroy_all(q);
+   spin_unlock_irq(q->queue_lock);
+
ida_simple_remove(_queue_ida, q->id);
call_rcu(>rcu_head, blk_free_queue_rcu);
 }
diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
index 6c666fd7de3c..4470fdb6ea8f 100644
--- a/include/linux/blk-cgroup.h
+++ b/include/linux/blk-cgroup.h
@@ -179,6 +179,8 @@ struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
 int blkcg_init_queue(struct request_queue *q);
 void blkcg_drain_queue(struct request_queue *q);
 void blkcg_exit_queue(struct request_queue *q);
+void blkg_destroy_all(struct request_queue *q);
+
 
 /* Blkio controller policy registration */
 int blkcg_policy_register(struct blkcg_policy *pol);
-- 
2.16.3