On 12/21/18 6:50 AM, Jens Axboe wrote:
...
> @@ -228,13 +228,15 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned
> int bitnr, void *data)
>
> if (!reserved)
> bitnr += tags->nr_reserved_tags;
> - rq = tags->rqs[bitnr];
> + if (tags->rqs[bitnr].queue != hctx->queue)
> + return true;
>
> /*
> * We can hit rq == NULL here, because the tagging functions
> * test and set the bit before assigning ->rqs[].
> */
> - if (rq && rq->q == hctx->queue)
> + rq = tags->rqs[bitnr].rq;
> + if (rq)
> return iter_data->fn(hctx, rq, iter_data->data, reserved);
> return true;
> }
> @@ -263,7 +265,9 @@ static void bt_for_each(struct blk_mq_hw_ctx *hctx,
> struct sbitmap_queue *bt,
> .reserved = reserved,
> };
>
> + rcu_read_lock();
> sbitmap_for_each_set(&bt->sb, bt_iter, &iter_data);
> + rcu_read_unlock();
> }
>
> struct bt_tags_iter_data {
...
> @@ -2038,15 +2071,30 @@ void blk_mq_free_rqs(struct blk_mq_tag_set *set,
> struct blk_mq_tags *tags,
> }
> }
Just checking request_queue match in bt_iter seems not enough.
The blk_mq_free_rqs could be invoked by switching io scheduler.
At the moment, request_queue could match. Maybe we still need to clear the
associated tags->rqs[] entries.
With cleaning the associated tags->rqs[] entries and synchronize_rcu after that,
bt_iter would only see some stable request with idle state but not a freed one.
Thanks
Jianchao
>
> - while (!list_empty(&tags->page_list)) {
> - page = list_first_entry(&tags->page_list, struct page, lru);
> - list_del_init(&page->lru);
> + if (list_empty(&tags->page_list))
> + return;
> +
> + rpl = kmalloc(sizeof(*rpl), GFP_NOIO);
> + if (rpl) {
> + INIT_LIST_HEAD(&rpl->list);
> + list_splice_init(&tags->page_list, &rpl->list);
> +
> + /* Punt to RCU free, so we don't race with tag iteration */
> + INIT_RCU_WORK(&rpl->rcu_work, blk_mq_rcu_free_pages);
> + rpl->on_stack = false;
> + queue_rcu_work(system_wq, &rpl->rcu_work);
> + } else {