Calling mempool_alloc in a lot is not safe unless the maximum allocation size times the maximum number of threads using it is less than the minimum pool size. Use the new mempool_alloc_bulk helper to allocate all missing elements in one pass to remove this deadlock risk. This also means that non-pool allocations now use alloc_pages_bulk which can be significantly faster than a loop over individual page allocations.
Signed-off-by: Christoph Hellwig <[email protected]> --- block/blk-crypto-fallback.c | 54 ++++++++++++++++++++++++++++++------- 1 file changed, 45 insertions(+), 9 deletions(-) diff --git a/block/blk-crypto-fallback.c b/block/blk-crypto-fallback.c index 33aa7b26ed37..2f78027f0cce 100644 --- a/block/blk-crypto-fallback.c +++ b/block/blk-crypto-fallback.c @@ -22,7 +22,7 @@ #include "blk-cgroup.h" #include "blk-crypto-internal.h" -static unsigned int num_prealloc_bounce_pg = 32; +static unsigned int num_prealloc_bounce_pg = BIO_MAX_VECS; module_param(num_prealloc_bounce_pg, uint, 0); MODULE_PARM_DESC(num_prealloc_bounce_pg, "Number of preallocated bounce pages for the blk-crypto crypto API fallback"); @@ -164,11 +164,21 @@ static bool blk_crypto_fallback_bio_valid(struct bio *bio) static void blk_crypto_fallback_encrypt_endio(struct bio *enc_bio) { struct bio *src_bio = enc_bio->bi_private; - int i; + struct page **pages = (struct page **)enc_bio->bi_io_vec; + struct bio_vec *bv; + unsigned int i; - for (i = 0; i < enc_bio->bi_vcnt; i++) - mempool_free(enc_bio->bi_io_vec[i].bv_page, - blk_crypto_bounce_page_pool); + /* + * Use the same trick as the alloc side to avoid the need for an extra + * pages array. + */ + bio_for_each_bvec_all(bv, enc_bio, i) + pages[i] = bv->bv_page; + + i = mempool_free_bulk(blk_crypto_bounce_page_pool, (void **)pages, + enc_bio->bi_vcnt); + if (i < enc_bio->bi_vcnt) + release_pages(pages + i, enc_bio->bi_vcnt - i); src_bio->bi_status = enc_bio->bi_status; @@ -176,9 +186,12 @@ static void blk_crypto_fallback_encrypt_endio(struct bio *enc_bio) bio_endio(src_bio); } +#define PAGE_PTRS_PER_BVEC (sizeof(struct bio_vec) / sizeof(struct page *)) + static struct bio *blk_crypto_alloc_enc_bio(struct bio *bio_src, - unsigned int nr_segs) + unsigned int nr_segs, struct page ***pages_ret) { + struct page **pages; struct bio *bio; bio = bio_alloc_bioset(bio_src->bi_bdev, nr_segs, bio_src->bi_opf, @@ -192,6 +205,29 @@ static struct bio *blk_crypto_alloc_enc_bio(struct bio *bio_src, bio->bi_write_stream = bio_src->bi_write_stream; bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector; bio_clone_blkg_association(bio, bio_src); + + /* + * Move page array up in the allocated memory for the bio vecs as far as + * possible so that we can start filling biovecs from the beginning + * without overwriting the temporary page array. + */ + static_assert(PAGE_PTRS_PER_BVEC > 1); + pages = (struct page **)bio->bi_io_vec; + pages += nr_segs * (PAGE_PTRS_PER_BVEC - 1); + + /* + * Try a bulk allocation first. This could leave random pages in the + * array unallocated, but we'll fix that up later in mempool_alloc_bulk. + * + * Note: alloc_pages_bulk needs the array to be zeroed, as it assumes + * any non-zero slot already contains a valid allocation. + */ + memset(pages, 0, sizeof(struct page *) * nr_segs); + if (alloc_pages_bulk(GFP_NOFS, nr_segs, pages) < nr_segs) { + mempool_alloc_bulk(blk_crypto_bounce_page_pool, (void **)pages, + nr_segs, GFP_NOIO); + } + *pages_ret = pages; return bio; } @@ -234,6 +270,7 @@ static blk_status_t __blk_crypto_fallback_encrypt_bio(struct bio *src_bio, struct scatterlist src, dst; union blk_crypto_iv iv; struct bio *enc_bio = NULL; + struct page **enc_pages; unsigned int nr_segs; unsigned int enc_idx = 0; unsigned int j; @@ -259,11 +296,10 @@ static blk_status_t __blk_crypto_fallback_encrypt_bio(struct bio *src_bio, if (!enc_bio) { enc_bio = blk_crypto_alloc_enc_bio(src_bio, - min(nr_segs, BIO_MAX_VECS)); + min(nr_segs, BIO_MAX_VECS), &enc_pages); } - enc_page = mempool_alloc(blk_crypto_bounce_page_pool, - GFP_NOIO); + enc_page = enc_pages[enc_idx]; __bio_add_page(enc_bio, enc_page, src_bv.bv_len, src_bv.bv_offset); -- 2.47.3
