On 12/21/18 6:50 AM, Jens Axboe wrote:
...
> @@ -228,13 +228,15 @@ static bool bt_iter(struct sbitmap *bitmap, unsigned 
> int bitnr, void *data)
>  
>       if (!reserved)
>               bitnr += tags->nr_reserved_tags;
> -     rq = tags->rqs[bitnr];
> +     if (tags->rqs[bitnr].queue != hctx->queue)
> +             return true;
>  
>       /*
>        * We can hit rq == NULL here, because the tagging functions
>        * test and set the bit before assigning ->rqs[].
>        */
> -     if (rq && rq->q == hctx->queue)
> +     rq = tags->rqs[bitnr].rq;
> +     if (rq)
>               return iter_data->fn(hctx, rq, iter_data->data, reserved);
>       return true;
>  }
> @@ -263,7 +265,9 @@ static void bt_for_each(struct blk_mq_hw_ctx *hctx, 
> struct sbitmap_queue *bt,
>               .reserved = reserved,
>       };
>  
> +     rcu_read_lock();
>       sbitmap_for_each_set(&bt->sb, bt_iter, &iter_data);
> +     rcu_read_unlock();
>  }
>  
>  struct bt_tags_iter_data {
...
> @@ -2038,15 +2071,30 @@ void blk_mq_free_rqs(struct blk_mq_tag_set *set, 
> struct blk_mq_tags *tags,
>               }
>       }

Just checking request_queue match in bt_iter seems not enough.
The blk_mq_free_rqs could be invoked by switching io scheduler.
At the moment, request_queue could match. Maybe we still need to clear the
associated tags->rqs[] entries.

With cleaning the associated tags->rqs[] entries and synchronize_rcu after that,
bt_iter would only see some stable request with idle state but not a freed one.


Thanks
Jianchao

>  
> -     while (!list_empty(&tags->page_list)) {
> -             page = list_first_entry(&tags->page_list, struct page, lru);
> -             list_del_init(&page->lru);
> +     if (list_empty(&tags->page_list))
> +             return;
> +
> +     rpl = kmalloc(sizeof(*rpl), GFP_NOIO);
> +     if (rpl) {
> +             INIT_LIST_HEAD(&rpl->list);
> +             list_splice_init(&tags->page_list, &rpl->list);
> +
> +             /* Punt to RCU free, so we don't race with tag iteration */
> +             INIT_RCU_WORK(&rpl->rcu_work, blk_mq_rcu_free_pages);
> +             rpl->on_stack = false;
> +             queue_rcu_work(system_wq, &rpl->rcu_work);
> +     } else {

Reply via email to