Re: [PATCH v2] block: Ensure that a request queue is dissociated from the cgroup controller
On Thu, Apr 12, 2018 at 01:11:11PM -0600, Bart Van Assche wrote: > Several block drivers call alloc_disk() followed by put_disk() if > something fails before device_add_disk() is called without calling > blk_cleanup_queue(). Make sure that also for this scenario a request > queue is dissociated from the cgroup controller. This patch avoids > that loading the parport_pc, paride and pf drivers triggers the > following kernel crash: > > BUG: KASAN: null-ptr-deref in pi_init+0x42e/0x580 [paride] > Read of size 4 at addr 0008 by task modprobe/744 > Call Trace: > dump_stack+0x9a/0xeb > kasan_report+0x139/0x350 > pi_init+0x42e/0x580 [paride] > pf_init+0x2bb/0x1000 [pf] > do_one_initcall+0x8e/0x405 > do_init_module+0xd9/0x2f2 > load_module+0x3ab4/0x4700 > SYSC_finit_module+0x176/0x1a0 > do_syscall_64+0xee/0x2b0 > entry_SYSCALL_64_after_hwframe+0x42/0xb7 > > Reported-by: Alexandru Moise <00moses.alexande...@gmail.com> > Fixes: a063057d7c73 ("block: Fix a race between request queue removal and the > block cgroup controller") > Signed-off-by: Bart Van Assche <bart.vanass...@wdc.com> > Tested-by: Alexandru Moise <00moses.alexande...@gmail.com> > Cc: Tejun Heo <t...@kernel.org> > Cc: Alexandru Moise <00moses.alexande...@gmail.com> > Cc: Joseph Qi <joseph...@linux.alibaba.com> > --- > > Changes compared to v1: > - Surrounded use of blkcg_root with #ifdef CONFIG_BLK_CGROUP / #endif. > - Added Alex' Tested-by. > > block/blk-sysfs.c | 31 +++ > 1 file changed, 31 insertions(+) > > diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c > index f8457d6f0190..46bb932721dc 100644 > --- a/block/blk-sysfs.c > +++ b/block/blk-sysfs.c > @@ -793,6 +793,37 @@ static void __blk_release_queue(struct work_struct *work) > blk_stat_remove_callback(q, q->poll_cb); > blk_stat_free_callback(q->poll_cb); > > + if (!blk_queue_dead(q)) { > + /* > + * Last reference was dropped without having called > + * blk_cleanup_queue(). > + */ > + WARN_ONCE(blk_queue_init_done(q), > + "request queue %p has been registered but > blk_cleanup_queue() has not been called for that queue\n", > + q); > + bdi_put(q->backing_dev_info); > + blkcg_exit_queue(q); > + > + if (q->elevator) { > + ioc_clear_queue(q); > + elevator_exit(q, q->elevator); > + } > + } > + > +#ifdef CONFIG_BLK_CGROUP > + { > + struct blkcg_gq *blkg; > + > + rcu_read_lock(); > + blkg = blkg_lookup(_root, q); > + rcu_read_unlock(); > + > + WARN(blkg, > + "request queue %p is being released but it has not yet > been removed from the blkcg controller\n", > + q); > + } > +#endif > + > blk_free_queue_stats(q->stats); > > blk_exit_rl(q, >root_rl); I'm not qualified enough to review it but build and boot are good. Tested-by: Alexandru Moise <00moses.alexande...@gmail.com> Thanks, ../Alex > -- > 2.16.2 >
Re: [PATCH] block: Ensure that a request queue is dissociated from the cgroup controller
On Thu, Apr 12, 2018 at 06:20:34AM +0200, Alexandru Moise wrote: > On Wed, Apr 11, 2018 at 07:58:52PM -0600, Bart Van Assche wrote: > > Several block drivers call alloc_disk() followed by put_disk() if > > something fails before device_add_disk() is called without calling > > blk_cleanup_queue(). Make sure that also for this scenario a request > > queue is dissociated from the cgroup controller. This patch avoids > > that loading the parport_pc, paride and pf drivers triggers the > > following kernel crash: > > > > BUG: KASAN: null-ptr-deref in pi_init+0x42e/0x580 [paride] > > Read of size 4 at addr 0008 by task modprobe/744 > > Call Trace: > > dump_stack+0x9a/0xeb > > kasan_report+0x139/0x350 > > pi_init+0x42e/0x580 [paride] > > pf_init+0x2bb/0x1000 [pf] > > do_one_initcall+0x8e/0x405 > > do_init_module+0xd9/0x2f2 > > load_module+0x3ab4/0x4700 > > SYSC_finit_module+0x176/0x1a0 > > do_syscall_64+0xee/0x2b0 > > entry_SYSCALL_64_after_hwframe+0x42/0xb7 > > > > Reported-by: Alexandru Moise <00moses.alexande...@gmail.com> > > Fixes: a063057d7c73 ("block: Fix a race between request queue removal and > > the block cgroup controller") > > Signed-off-by: Bart Van Assche <bart.vanass...@wdc.com> > > Cc: Alexandru Moise <00moses.alexande...@gmail.com> > > Cc: Tejun Heo <t...@kernel.org> > > --- > > block/blk-sysfs.c | 26 ++ > > 1 file changed, 26 insertions(+) > > > > diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c > > index f8457d6f0190..2e134da78f82 100644 > > --- a/block/blk-sysfs.c > > +++ b/block/blk-sysfs.c > > @@ -788,11 +788,37 @@ static void blk_free_queue_rcu(struct rcu_head > > *rcu_head) > > static void __blk_release_queue(struct work_struct *work) > > { > > struct request_queue *q = container_of(work, typeof(*q), release_work); > > + struct blkcg_gq *gq; > > > > if (test_bit(QUEUE_FLAG_POLL_STATS, >queue_flags)) > > blk_stat_remove_callback(q, q->poll_cb); > > blk_stat_free_callback(q->poll_cb); > > > > + if (!blk_queue_dead(q)) { > > + /* > > +* Last reference was dropped without having called > > +* blk_cleanup_queue(). > > +*/ > > + WARN_ONCE(blk_queue_init_done(q), > > + "request queue %p has been registered but > > blk_cleanup_queue() has not been called for that queue\n", > > + q); > > + bdi_put(q->backing_dev_info); > > + blkcg_exit_queue(q); > > + > > + if (q->elevator) { > > + ioc_clear_queue(q); > > + elevator_exit(q, q->elevator); > > + } > > + } > > + > > + rcu_read_lock(); > > + gq = blkg_lookup(_root, q); > > + rcu_read_unlock(); > > + > > + WARN(gq, > > +"request queue %p is being released but it has not yet been > > removed from the blkcg controller\n", > > +q); > > + > > blk_free_queue_stats(q->stats); > > This solution is good. Thanks for fixing this! > > Tested-by: Alexandru Moise <00moses.alexande...@gmail.com> > > ../Alex Oh, you also need the no-ops for !CONFIG_BLK_CGROUP, also there is no blkcg_root then as well. Thanks, ../Alex > > > > > blk_exit_rl(q, >root_rl); > > -- > > 2.16.2 > >
Re: [PATCH] block: Ensure that a request queue is dissociated from the cgroup controller
On Wed, Apr 11, 2018 at 07:58:52PM -0600, Bart Van Assche wrote: > Several block drivers call alloc_disk() followed by put_disk() if > something fails before device_add_disk() is called without calling > blk_cleanup_queue(). Make sure that also for this scenario a request > queue is dissociated from the cgroup controller. This patch avoids > that loading the parport_pc, paride and pf drivers triggers the > following kernel crash: > > BUG: KASAN: null-ptr-deref in pi_init+0x42e/0x580 [paride] > Read of size 4 at addr 0008 by task modprobe/744 > Call Trace: > dump_stack+0x9a/0xeb > kasan_report+0x139/0x350 > pi_init+0x42e/0x580 [paride] > pf_init+0x2bb/0x1000 [pf] > do_one_initcall+0x8e/0x405 > do_init_module+0xd9/0x2f2 > load_module+0x3ab4/0x4700 > SYSC_finit_module+0x176/0x1a0 > do_syscall_64+0xee/0x2b0 > entry_SYSCALL_64_after_hwframe+0x42/0xb7 > > Reported-by: Alexandru Moise <00moses.alexande...@gmail.com> > Fixes: a063057d7c73 ("block: Fix a race between request queue removal and the > block cgroup controller") > Signed-off-by: Bart Van Assche <bart.vanass...@wdc.com> > Cc: Alexandru Moise <00moses.alexande...@gmail.com> > Cc: Tejun Heo <t...@kernel.org> > --- > block/blk-sysfs.c | 26 ++ > 1 file changed, 26 insertions(+) > > diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c > index f8457d6f0190..2e134da78f82 100644 > --- a/block/blk-sysfs.c > +++ b/block/blk-sysfs.c > @@ -788,11 +788,37 @@ static void blk_free_queue_rcu(struct rcu_head > *rcu_head) > static void __blk_release_queue(struct work_struct *work) > { > struct request_queue *q = container_of(work, typeof(*q), release_work); > + struct blkcg_gq *gq; > > if (test_bit(QUEUE_FLAG_POLL_STATS, >queue_flags)) > blk_stat_remove_callback(q, q->poll_cb); > blk_stat_free_callback(q->poll_cb); > > + if (!blk_queue_dead(q)) { > + /* > + * Last reference was dropped without having called > + * blk_cleanup_queue(). > + */ > + WARN_ONCE(blk_queue_init_done(q), > + "request queue %p has been registered but > blk_cleanup_queue() has not been called for that queue\n", > + q); > + bdi_put(q->backing_dev_info); > + blkcg_exit_queue(q); > + > + if (q->elevator) { > + ioc_clear_queue(q); > + elevator_exit(q, q->elevator); > + } > + } > + > + rcu_read_lock(); > + gq = blkg_lookup(_root, q); > + rcu_read_unlock(); > + > + WARN(gq, > + "request queue %p is being released but it has not yet been > removed from the blkcg controller\n", > + q); > + > blk_free_queue_stats(q->stats); This solution is good. Thanks for fixing this! Tested-by: Alexandru Moise <00moses.alexande...@gmail.com> ../Alex > > blk_exit_rl(q, >root_rl); > -- > 2.16.2 >
Re: [PATCH] block: Ensure that a request queue is dissociated from the cgroup controller
On Wed, Apr 11, 2018 at 07:58:52PM -0600, Bart Van Assche wrote: > Several block drivers call alloc_disk() followed by put_disk() if > something fails before device_add_disk() is called without calling > blk_cleanup_queue(). Make sure that also for this scenario a request > queue is dissociated from the cgroup controller. This patch avoids > that loading the parport_pc, paride and pf drivers triggers the > following kernel crash: > > BUG: KASAN: null-ptr-deref in pi_init+0x42e/0x580 [paride] > Read of size 4 at addr 0008 by task modprobe/744 > Call Trace: > dump_stack+0x9a/0xeb > kasan_report+0x139/0x350 > pi_init+0x42e/0x580 [paride] > pf_init+0x2bb/0x1000 [pf] > do_one_initcall+0x8e/0x405 > do_init_module+0xd9/0x2f2 > load_module+0x3ab4/0x4700 > SYSC_finit_module+0x176/0x1a0 > do_syscall_64+0xee/0x2b0 > entry_SYSCALL_64_after_hwframe+0x42/0xb7 > > Reported-by: Alexandru Moise <00moses.alexande...@gmail.com> > Fixes: a063057d7c73 ("block: Fix a race between request queue removal and the > block cgroup controller") > Signed-off-by: Bart Van Assche <bart.vanass...@wdc.com> > Cc: Alexandru Moise <00moses.alexande...@gmail.com> > Cc: Tejun Heo <t...@kernel.org> > --- > block/blk-sysfs.c | 26 ++ > 1 file changed, 26 insertions(+) > > diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c > index f8457d6f0190..2e134da78f82 100644 > --- a/block/blk-sysfs.c > +++ b/block/blk-sysfs.c > @@ -788,11 +788,37 @@ static void blk_free_queue_rcu(struct rcu_head > *rcu_head) > static void __blk_release_queue(struct work_struct *work) > { > struct request_queue *q = container_of(work, typeof(*q), release_work); > + struct blkcg_gq *gq; > > if (test_bit(QUEUE_FLAG_POLL_STATS, >queue_flags)) > blk_stat_remove_callback(q, q->poll_cb); > blk_stat_free_callback(q->poll_cb); > > + if (!blk_queue_dead(q)) { > + /* > + * Last reference was dropped without having called > + * blk_cleanup_queue(). > + */ > + WARN_ONCE(blk_queue_init_done(q), > + "request queue %p has been registered but > blk_cleanup_queue() has not been called for that queue\n", > + q); > + bdi_put(q->backing_dev_info); > + blkcg_exit_queue(q); > + > + if (q->elevator) { > + ioc_clear_queue(q); > + elevator_exit(q, q->elevator); > + } > + } > + > + rcu_read_lock(); > + gq = blkg_lookup(_root, q); > + rcu_read_unlock(); > + > + WARN(gq, > + "request queue %p is being released but it has not yet been > removed from the blkcg controller\n", > + q); > + > blk_free_queue_stats(q->stats); This solution is good. Thanks for fixing this! Tested-by: Alexandru Moise <00moses.alexande...@gmail.com> ../Alex > > blk_exit_rl(q, >root_rl); > -- > 2.16.2 >
Re: [PATCH v2] blk-cgroup: remove entries in blkg_tree before queue release
On Wed, Apr 11, 2018 at 01:55:25PM -0600, Bart Van Assche wrote: > On 04/11/18 13:00, Alexandru Moise wrote: > > But the root cause of it is in blkcg_init_queue() when blkg_create() returns > > an ERR ptr, because it tries to insert into a populated index into > > blkcg->blkg_tree, > > the entry that we fail to remove at __blk_release_queue(). > > Hello Alex, > > Had you considered something like the untested patch below? > > Thanks, > > Bart. > > > diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c > index 1c16694ae145..f2ced19e74b8 100644 > --- a/block/blk-cgroup.c > +++ b/block/blk-cgroup.c > @@ -1191,14 +1191,17 @@ int blkcg_init_queue(struct request_queue *q) > if (preloaded) > radix_tree_preload_end(); > > - if (IS_ERR(blkg)) > - return PTR_ERR(blkg); > + if (IS_ERR(blkg)) { > + ret = PTR_ERR(blkg); > + goto destroy_all; > + } > > q->root_blkg = blkg; > q->root_rl.blkg = blkg; > > ret = blk_throtl_init(q); > if (ret) { > +destroy_all: > spin_lock_irq(q->queue_lock); > blkg_destroy_all(q); > spin_unlock_irq(q->queue_lock); > Hi, I tested it, it doesn't solve the problem. By the time you get here it's already too late, my patch prevents this from failing in the first place. I would have liked this more than my solution though. ../Alex
Re: [PATCH v2] blk-cgroup: remove entries in blkg_tree before queue release
On Wed, Apr 11, 2018 at 03:54:53PM +, Bart Van Assche wrote: > On Wed, 2018-04-11 at 16:28 +0200, Alexandru Moise wrote: > > [0.76] BUG: unable to handle kernel NULL pointer dereference at > > 01b4 > > [0.763350] Kernel panic - not syncing: Attempted to kill init! > > exitcode=0x0009 > > [0.763350] > > [0.76] PGD 0 P4D 0 > > [0.76] Oops: [#2] PREEMPT SMP > > [0.76] CPU: 0 PID: 6 Comm: kworker/u12:0 Tainted: G D > > 4.16.0-ARCH+ #81 > > [0.76] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS > > 1.11.0-20171110_100015-anatol 04/01/20$ > > [0.76] Workqueue: nvme-reset-wq nvme_reset_work > > [0.76] RIP: 0010:blk_queue_flag_set+0xf/0x40 > > [0.76] RSP: :c91bfcb0 EFLAGS: 00010246 > > [0.76] RAX: 88003b698000 RBX: RCX: > > > > [0.76] RDX: 88003b698000 RSI: fff4 RDI: > > 001c > > [0.76] RBP: c91bfcc0 R08: R09: > > > > [0.76] R10: eaeaa980 R11: 814e0970 R12: > > 001c > > [0.76] R13: R14: R15: > > 88003aad8010 > > [0.76] FS: () GS:88003e40() > > knlGS: > > [0.76] CS: 0010 DS: ES: CR0: 80050033 > > [0.76] CR2: 01b4 CR3: 02209001 CR4: > > 000606f0 > > [0.76] Call Trace: > > [0.76] blk_mq_quiesce_queue+0x23/0x80 > > [0.76] nvme_dev_disable+0x34f/0x480 > > [0.76] ? nvme_irq+0x50/0x50 > > [0.76] ? dev_warn+0x64/0x80 > > [0.76] nvme_reset_work+0x13de/0x1570 > > [0.76] ? __switch_to_asm+0x34/0x70 > > [0.76] ? __switch_to_asm+0x40/0x70 > > [0.76] ? _raw_spin_unlock_irq+0x15/0x30 > > [0.76] ? finish_task_switch+0x156/0x210 > > [0.76] process_one_work+0x20c/0x3d0 > > [0.76] worker_thread+0x216/0x400 > > [0.76] kthread+0x125/0x130 > > [0.76] ? process_one_work+0x3d0/0x3d0 > > [0.76] ? __kthread_bind_mask+0x60/0x60 > > [0.76] ret_from_fork+0x3a/0x50 > > Hello Alexandru, > > What made you look at cgroups? In the above register dump I see that %rbx == > NULL. > I think that means that the queue pointer argument of blk_queue_flag_set() is > NULL. > The NVMe initiator driver should never pass a NULL pointer to > blk_mq_quiesce_queue(). > Please ask the NVMe driver maintainers for their opinion on the linux-nvme > mailing > list. > > Thanks, > > Bart. The %rbx == NULL is only a symptom of the cgroup mishandling, perhaps we could improve error handling in the NVMe driver, but I can say that about a lot of block drivers actually, perhaps I will write some patches in the future to improve the error handling. But the root cause of it is in blkcg_init_queue() when blkg_create() returns an ERR ptr, because it tries to insert into a populated index into blkcg->blkg_tree, the entry that we fail to remove at __blk_release_queue(). Thanks, ../Alex > > >
Re: [PATCH v2] blk-cgroup: remove entries in blkg_tree before queue release
On Wed, Apr 11, 2018 at 07:20:19AM -0700, Tejun Heo wrote: > Hello, > > On Wed, Apr 11, 2018 at 12:12:56PM +0200, Alexandru Moise wrote: > > > But we already do this through calling blkcg_exit_queue() from > > > __blk_release_queue(). What's missing? > > > > Hi, > > > > It might be the jetlag but I can't see how you end up calling > > blkcg_exit_queue() from __blk_release_queue(). > > > > As I see it the only way to reach blkcg_exit_queue() is from > > blk_cleanup_queue(), which I don't see anywhere in __blk_release_queue(). > > > > I suspect that I'm just fixing a corner case though and > > the general case is what you describe or similar. > > Ah, that changed recently. Can you please check out the current > upstream git master? > > Thanks. > Just did, without my patch I see this crash: [0.75] CPU: 4 PID: 1 Comm: swapper/0 Not tainted 4.16.0-ARCH+ #81 [7/1949] [0.75] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-20171110_100015-anatol 04/01/204 [0.75] RIP: 0010:pi_init+0x23f/0x2f0 [0.75] RSP: :c9197d90 EFLAGS: 00010246 [0.75] RAX: RBX: 0020 RCX: 0038 [0.75] RDX: 0001 RSI: 0001 RDI: [0.75] RBP: c9197e18 R08: R09: [0.75] R10: eaeda600 R11: 88003b69f164 R12: 82e2d740 [0.75] R13: R14: R15: [0.75] FS: () GS:88003e50() knlGS: [0.75] CS: 0010 DS: ES: CR0: 80050033 [0.75] CR2: 0008 CR3: 02209001 CR4: 000606e0 [0.75] Call Trace: [0.75] pf_init+0x1db/0x3be [0.75] ? pcd_init+0x4e8/0x4e8 [0.75] do_one_initcall+0x9e/0x1b0 [0.75] ? do_early_param+0x97/0x97 [0.75] kernel_init_freeable+0x259/0x2fd [0.75] ? rest_init+0xd0/0xd0 [0.75] ? syscall_slow_exit_work+0x1c/0x160 [0.75] kernel_init+0xe/0x100 [0.75] ret_from_fork+0x3a/0x50 [0.75] Code: 75 6a 49 8b 06 48 8b 40 78 48 85 c0 74 08 4c 89 f7 e8 46 76 51 00 83 c3 01 3b 5d a8 7d 0d 49 [0.75] RIP: pi_init+0x23f/0x2f0 RSP: c9197d90 [0.75] CR2: 0008 [0.75] ---[ end trace 12004f267bb8bf7d ]--- [0.76] BUG: unable to handle kernel NULL pointer dereference at 01b4 [0.763350] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0009 [0.763350] [0.76] PGD 0 P4D 0 [0.76] Oops: [#2] PREEMPT SMP [0.76] CPU: 0 PID: 6 Comm: kworker/u12:0 Tainted: G D 4.16.0-ARCH+ #81 [0.76] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-20171110_100015-anatol 04/01/20$ [0.76] Workqueue: nvme-reset-wq nvme_reset_work [0.76] RIP: 0010:blk_queue_flag_set+0xf/0x40 [0.76] RSP: :c91bfcb0 EFLAGS: 00010246 [0.76] RAX: 88003b698000 RBX: RCX: [0.76] RDX: 88003b698000 RSI: fff4 RDI: 001c [0.76] RBP: c91bfcc0 R08: R09: [0.76] R10: eaeaa980 R11: 814e0970 R12: 001c [0.76] R13: R14: R15: 88003aad8010 [0.76] FS: () GS:88003e40() knlGS: [0.76] CS: 0010 DS: ES: CR0: 80050033 [0.76] CR2: 01b4 CR3: 02209001 CR4: 000606f0 [0.76] Call Trace: [0.76] blk_mq_quiesce_queue+0x23/0x80 [0.76] nvme_dev_disable+0x34f/0x480 [0.76] ? nvme_irq+0x50/0x50 [0.76] ? dev_warn+0x64/0x80 [0.76] nvme_reset_work+0x13de/0x1570 [0.76] ? __switch_to_asm+0x34/0x70 [0.76] ? __switch_to_asm+0x40/0x70 [0.76] ? _raw_spin_unlock_irq+0x15/0x30 [0.76] ? finish_task_switch+0x156/0x210 [0.76] process_one_work+0x20c/0x3d0 [0.76] worker_thread+0x216/0x400 [0.76] kthread+0x125/0x130 [0.76] ? process_one_work+0x3d0/0x3d0 [0.76] ? __kthread_bind_mask+0x60/0x60 [0.76] ret_from_fork+0x3a/0x50 With the patch the crash goes away, Thanks, ../Alex > -- > tejun
Re: [PATCH v2] blk-cgroup: remove entries in blkg_tree before queue release
On Mon, Apr 09, 2018 at 03:09:38PM -0700, Tejun Heo wrote: > (cc'ing Joseph as he worked on the area recently, hi!) > > Hello, > > On Sat, Apr 07, 2018 at 12:21:48PM +0200, Alexandru Moise wrote: > > The q->id is used as an index within the blkg_tree radix tree. > > > > If the entry is not released before reclaiming the blk_queue_ida's id > > blkcg_init_queue() within a different driver from which this id > > was originally for can fail due to the entry at that index within > > the radix tree already existing. > > > > Signed-off-by: Alexandru Moise <00moses.alexande...@gmail.com> > > --- > > v2: Added no-op for !CONFIG_BLK_CGROUP > > > > block/blk-cgroup.c | 2 +- > > block/blk-sysfs.c | 4 > > include/linux/blk-cgroup.h | 3 +++ > > 3 files changed, 8 insertions(+), 1 deletion(-) > > > > diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c > > index 1c16694ae145..224e937dbb59 100644 > > --- a/block/blk-cgroup.c > > +++ b/block/blk-cgroup.c > > @@ -369,7 +369,7 @@ static void blkg_destroy(struct blkcg_gq *blkg) > > * > > * Destroy all blkgs associated with @q. > > */ > > -static void blkg_destroy_all(struct request_queue *q) > > +void blkg_destroy_all(struct request_queue *q) > > { > > struct blkcg_gq *blkg, *n; > > > > diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c > > index d00d1b0ec109..a72866458f22 100644 > > --- a/block/blk-sysfs.c > > +++ b/block/blk-sysfs.c > > @@ -816,6 +816,10 @@ static void __blk_release_queue(struct work_struct > > *work) > > if (q->bio_split) > > bioset_free(q->bio_split); > > > > + spin_lock_irq(q->queue_lock); > > + blkg_destroy_all(q); > > + spin_unlock_irq(q->queue_lock); > > + > > ida_simple_remove(_queue_ida, q->id); > > call_rcu(>rcu_head, blk_free_queue_rcu); > > But we already do this through calling blkcg_exit_queue() from > __blk_release_queue(). What's missing? Hi, It might be the jetlag but I can't see how you end up calling blkcg_exit_queue() from __blk_release_queue(). As I see it the only way to reach blkcg_exit_queue() is from blk_cleanup_queue(), which I don't see anywhere in __blk_release_queue(). I suspect that I'm just fixing a corner case though and the general case is what you describe or similar. ../Alex > > Thanks. > > -- > tejun
[PATCH v2] blk-cgroup: remove entries in blkg_tree before queue release
The q->id is used as an index within the blkg_tree radix tree. If the entry is not released before reclaiming the blk_queue_ida's id blkcg_init_queue() within a different driver from which this id was originally for can fail due to the entry at that index within the radix tree already existing. Signed-off-by: Alexandru Moise <00moses.alexande...@gmail.com> --- v2: Added no-op for !CONFIG_BLK_CGROUP block/blk-cgroup.c | 2 +- block/blk-sysfs.c | 4 include/linux/blk-cgroup.h | 3 +++ 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 1c16694ae145..224e937dbb59 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -369,7 +369,7 @@ static void blkg_destroy(struct blkcg_gq *blkg) * * Destroy all blkgs associated with @q. */ -static void blkg_destroy_all(struct request_queue *q) +void blkg_destroy_all(struct request_queue *q) { struct blkcg_gq *blkg, *n; diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index d00d1b0ec109..a72866458f22 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -816,6 +816,10 @@ static void __blk_release_queue(struct work_struct *work) if (q->bio_split) bioset_free(q->bio_split); + spin_lock_irq(q->queue_lock); + blkg_destroy_all(q); + spin_unlock_irq(q->queue_lock); + ida_simple_remove(_queue_ida, q->id); call_rcu(>rcu_head, blk_free_queue_rcu); } diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 6c666fd7de3c..3d60b1d1973d 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -179,6 +179,8 @@ struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, int blkcg_init_queue(struct request_queue *q); void blkcg_drain_queue(struct request_queue *q); void blkcg_exit_queue(struct request_queue *q); +void blkg_destroy_all(struct request_queue *q); + /* Blkio controller policy registration */ int blkcg_policy_register(struct blkcg_policy *pol); @@ -740,6 +742,7 @@ static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { ret static inline int blkcg_init_queue(struct request_queue *q) { return 0; } static inline void blkcg_drain_queue(struct request_queue *q) { } static inline void blkcg_exit_queue(struct request_queue *q) { } +static inline void blkg_destroy_all(struct request_queue *q) { } static inline int blkcg_policy_register(struct blkcg_policy *pol) { return 0; } static inline void blkcg_policy_unregister(struct blkcg_policy *pol) { } static inline int blkcg_activate_policy(struct request_queue *q, -- 2.16.2
[PATCH] blk-cgroup: remove entries in blkg_tree before queue release
The q->id is used as an index within the blkg_tree radix tree. If the entry is not released before reclaiming the blk_queue_ida's id blkcg_init_queue() within a different driver from which this id was originally for can fail due to the entry at that index within the radix tree already existing. Signed-off-by: Alexandru Moise <00moses.alexande...@gmail.com> --- block/blk-cgroup.c | 2 +- block/blk-sysfs.c | 4 include/linux/blk-cgroup.h | 2 ++ 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 1c16694ae145..224e937dbb59 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -369,7 +369,7 @@ static void blkg_destroy(struct blkcg_gq *blkg) * * Destroy all blkgs associated with @q. */ -static void blkg_destroy_all(struct request_queue *q) +void blkg_destroy_all(struct request_queue *q) { struct blkcg_gq *blkg, *n; diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index d00d1b0ec109..a72866458f22 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -816,6 +816,10 @@ static void __blk_release_queue(struct work_struct *work) if (q->bio_split) bioset_free(q->bio_split); + spin_lock_irq(q->queue_lock); + blkg_destroy_all(q); + spin_unlock_irq(q->queue_lock); + ida_simple_remove(_queue_ida, q->id); call_rcu(>rcu_head, blk_free_queue_rcu); } diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 6c666fd7de3c..4470fdb6ea8f 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -179,6 +179,8 @@ struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, int blkcg_init_queue(struct request_queue *q); void blkcg_drain_queue(struct request_queue *q); void blkcg_exit_queue(struct request_queue *q); +void blkg_destroy_all(struct request_queue *q); + /* Blkio controller policy registration */ int blkcg_policy_register(struct blkcg_policy *pol); -- 2.16.3