On Thu, 2018-12-20 at 14:48 -0700, Jens Axboe wrote:
> -void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
> - unsigned int hctx_idx)
> +static void blk_mq_rcu_free_pages(struct work_struct *work)
> {
> + struct blk_mq_tags *tags = container_of(to_rcu_work(work),
> + struct blk_mq_tags, rcu_work);
> struct page *page;
>
> + while (!list_empty(&tags->page_list)) {
> + page = list_first_entry(&tags->page_list, struct page, lru);
> + list_del_init(&page->lru);
> + /*
> + * Remove kmemleak object previously allocated in
> + * blk_mq_init_rq_map().
> + */
> + kmemleak_free(page_address(page));
> + __free_pages(page, page->private);
> + }
> +}
> +
> +void blk_mq_free_rqs(struct blk_mq_tag_set *set, struct blk_mq_tags *tags,
> + unsigned int hctx_idx)
> +{
> if (tags->rqs && set->ops->exit_request) {
> int i;
>
> @@ -2038,16 +2061,9 @@ void blk_mq_free_rqs(struct blk_mq_tag_set *set,
> struct blk_mq_tags *tags,
> }
> }
>
> - while (!list_empty(&tags->page_list)) {
> - page = list_first_entry(&tags->page_list, struct page, lru);
> - list_del_init(&page->lru);
> - /*
> - * Remove kmemleak object previously allocated in
> - * blk_mq_init_rq_map().
> - */
> - kmemleak_free(page_address(page));
> - __free_pages(page, page->private);
> - }
> + /* Punt to RCU free, so we don't race with tag iteration */
> + INIT_RCU_WORK(&tags->rcu_work, blk_mq_rcu_free_pages);
> + queue_rcu_work(system_wq, &tags->rcu_work);
> }
This can only work correctly if blk_mq_rcu_free_pages() is called before
INIT_RCU_WORK() is called a second time for the same bkl_mq_tags structure
and if blk_mq_rcu_free_pages() is called before struct blk_mq_tags is freed.
What provides these guarantees? Did I perhaps miss something?
Thanks,
Bart.