On Thu, Jun 27, 2019 at 11:49:08AM +0900, Damien Le Moal wrote:
> To allow the SCSI subsystem scsi_execute_req() function to issue
> requests using large buffers that are better allocated with vmalloc()
> rather than kmalloc(), modify bio_map_kern() and bio_copy_kern() to
> allow passing a buffer allocated with vmalloc(). To do so, detect
> vmalloc-ed buffers using is_vmalloc_addr(). For vmalloc-ed buffers,
> flush the buffer using flush_kernel_vmap_range(), use vmalloc_to_page()
> instead of virt_to_page() to obtain the pages of the buffer, and
> invalidate the buffer addresses with invalidate_kernel_vmap_range() on
> completion of read BIOs. This last point is executed using the function
> bio_invalidate_vmalloc_pages() which is defined only if the
> architecture defines ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE, that is, if the
> architecture actually needs the invalidation done.
>
> Fixes: 515ce6061312 ("scsi: sd_zbc: Fix sd_zbc_report_zones() buffer
> allocation")
> Fixes: e76239a3748c ("block: add a report_zones method")
> Cc: [email protected]
> Signed-off-by: Damien Le Moal <[email protected]>
> ---
> block/bio.c | 43 ++++++++++++++++++++++++++++++++++++++++++-
> 1 file changed, 42 insertions(+), 1 deletion(-)
>
> diff --git a/block/bio.c b/block/bio.c
> index ce797d73bb43..1c21d1e7f1b8 100644
> --- a/block/bio.c
> +++ b/block/bio.c
> @@ -16,6 +16,7 @@
> #include <linux/workqueue.h>
> #include <linux/cgroup.h>
> #include <linux/blk-cgroup.h>
> +#include <linux/highmem.h>
>
> #include <trace/events/block.h>
> #include "blk.h"
> @@ -1479,8 +1480,26 @@ void bio_unmap_user(struct bio *bio)
> bio_put(bio);
> }
>
> +#ifdef ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE
> +static void bio_invalidate_vmalloc_pages(struct bio *bio)
> +{
> + if (bio->bi_private) {
> + struct bvec_iter_all iter_all;
> + struct bio_vec *bvec;
> + unsigned long len = 0;
> +
> + bio_for_each_segment_all(bvec, bio, iter_all)
> + len += bvec->bv_len;
> + invalidate_kernel_vmap_range(bio->bi_private, len);
> + }
> +}
> +#else
> +static void bio_invalidate_vmalloc_pages(struct bio *bio) {}
> +#endif
> +
> static void bio_map_kern_endio(struct bio *bio)
> {
> + bio_invalidate_vmalloc_pages(bio);
> bio_put(bio);
> }
>
> @@ -1501,6 +1520,8 @@ struct bio *bio_map_kern(struct request_queue *q, void
> *data, unsigned int len,
> unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
> unsigned long start = kaddr >> PAGE_SHIFT;
> const int nr_pages = end - start;
> + bool is_vmalloc = is_vmalloc_addr(data);
> + struct page *page;
> int offset, i;
> struct bio *bio;
>
> @@ -1508,6 +1529,12 @@ struct bio *bio_map_kern(struct request_queue *q, void
> *data, unsigned int len,
> if (!bio)
> return ERR_PTR(-ENOMEM);
>
> + if (is_vmalloc) {
> + flush_kernel_vmap_range(data, len);
> + if ((!op_is_write(bio_op(bio))))
> + bio->bi_private = data;
> + }
> +
> offset = offset_in_page(kaddr);
> for (i = 0; i < nr_pages; i++) {
> unsigned int bytes = PAGE_SIZE - offset;
> @@ -1518,7 +1545,11 @@ struct bio *bio_map_kern(struct request_queue *q, void
> *data, unsigned int len,
> if (bytes > len)
> bytes = len;
>
> - if (bio_add_pc_page(q, bio, virt_to_page(data), bytes,
> + if (!is_vmalloc)
> + page = virt_to_page(data);
> + else
> + page = vmalloc_to_page(data);
> + if (bio_add_pc_page(q, bio, page, bytes,
> offset) < bytes) {
> /* we don't support partial mappings */
> bio_put(bio);
> @@ -1531,6 +1562,7 @@ struct bio *bio_map_kern(struct request_queue *q, void
> *data, unsigned int len,
> }
>
> bio->bi_end_io = bio_map_kern_endio;
> +
> return bio;
> }
> EXPORT_SYMBOL(bio_map_kern);
> @@ -1543,6 +1575,7 @@ static void bio_copy_kern_endio(struct bio *bio)
>
> static void bio_copy_kern_endio_read(struct bio *bio)
> {
> + unsigned long len = 0;
> char *p = bio->bi_private;
> struct bio_vec *bvec;
> struct bvec_iter_all iter_all;
> @@ -1550,8 +1583,12 @@ static void bio_copy_kern_endio_read(struct bio *bio)
> bio_for_each_segment_all(bvec, bio, iter_all) {
> memcpy(p, page_address(bvec->bv_page), bvec->bv_len);
> p += bvec->bv_len;
> + len += bvec->bv_len;
> }
>
> + if (is_vmalloc_addr(bio->bi_private))
> + invalidate_kernel_vmap_range(bio->bi_private, len);
> +
> bio_copy_kern_endio(bio);
> }
>
> @@ -1572,6 +1609,7 @@ struct bio *bio_copy_kern(struct request_queue *q, void
> *data, unsigned int len,
> unsigned long kaddr = (unsigned long)data;
> unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
> unsigned long start = kaddr >> PAGE_SHIFT;
> + bool is_vmalloc = is_vmalloc_addr(data);
> struct bio *bio;
> void *p = data;
> int nr_pages = 0;
> @@ -1587,6 +1625,9 @@ struct bio *bio_copy_kern(struct request_queue *q, void
> *data, unsigned int len,
> if (!bio)
> return ERR_PTR(-ENOMEM);
>
> + if (is_vmalloc)
> + flush_kernel_vmap_range(data, len);
> +
Are your sure that invalidate[|flush]_kernel_vmap_range is needed for
bio_copy_kernel? The vmalloc buffer isn't involved in IO, and only
accessed by CPU.
Thanks,
Ming