For an ITER_BVEC, we can just iterate the iov and add the pages
to the bio directly. This requires that the caller doesn't releases
the pages on IO completion, we add a BIO_HOLD_PAGES flag for that.

The current two callers of bio_iov_iter_get_pages() are updated to
check if they need to release pages on completion. This makes them
work with bvecs that contain kernel mapped pages already.

Signed-off-by: Jens Axboe <ax...@kernel.dk>
---
 block/bio.c               | 59 ++++++++++++++++++++++++++++++++-------
 fs/block_dev.c            |  5 ++--
 fs/iomap.c                |  5 ++--
 include/linux/blk_types.h |  1 +
 4 files changed, 56 insertions(+), 14 deletions(-)

diff --git a/block/bio.c b/block/bio.c
index 4db1008309ed..7af4f45d2ed6 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -828,6 +828,23 @@ int bio_add_page(struct bio *bio, struct page *page,
 }
 EXPORT_SYMBOL(bio_add_page);
 
+static int __bio_iov_bvec_add_pages(struct bio *bio, struct iov_iter *iter)
+{
+       const struct bio_vec *bv = iter->bvec;
+       unsigned int len;
+       size_t size;
+
+       len = min_t(size_t, bv->bv_len, iter->count);
+       size = bio_add_page(bio, bv->bv_page, len,
+                               bv->bv_offset + iter->iov_offset);
+       if (size == len) {
+               iov_iter_advance(iter, size);
+               return 0;
+       }
+
+       return -EINVAL;
+}
+
 #define PAGE_PTRS_PER_BVEC     (sizeof(struct bio_vec) / sizeof(struct page *))
 
 /**
@@ -876,23 +893,43 @@ static int __bio_iov_iter_get_pages(struct bio *bio, 
struct iov_iter *iter)
 }
 
 /**
- * bio_iov_iter_get_pages - pin user or kernel pages and add them to a bio
+ * bio_iov_iter_get_pages - add user or kernel pages to a bio
  * @bio: bio to add pages to
- * @iter: iov iterator describing the region to be mapped
+ * @iter: iov iterator describing the region to be added
+ *
+ * This takes either an iterator pointing to user memory, or one pointing to
+ * kernel pages (BVEC iterator). If we're adding user pages, we pin them and
+ * map them into the kernel. On IO completion, the caller should put those
+ * pages. If we're adding kernel pages, we just have to add the pages to the
+ * bio directly. We don't grab an extra reference to those pages (the user
+ * should already have that), and we don't put the page on IO completion.
+ * The caller needs to check if the bio is flagged BIO_HOLD_PAGES on IO
+ * completion. If it isn't, then pages should be released.
  *
- * Pins pages from *iter and appends them to @bio's bvec array. The
- * pages will have to be released using put_page() when done.
  * The function tries, but does not guarantee, to pin as many pages as
- * fit into the bio, or are requested in *iter, whatever is smaller.
- * If MM encounters an error pinning the requested pages, it stops.
- * Error is returned only if 0 pages could be pinned.
+ * fit into the bio, or are requested in *iter, whatever is smaller. If
+ * MM encounters an error pinning the requested pages, it stops. Error
+ * is returned only if 0 pages could be pinned.
  */
 int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
 {
+       const bool is_bvec = iov_iter_is_bvec(iter);
        unsigned short orig_vcnt = bio->bi_vcnt;
 
+       /*
+        * If this is a BVEC iter, then the pages are kernel pages. Don't
+        * release them on IO completion.
+        */
+       if (is_bvec)
+               bio_set_flag(bio, BIO_HOLD_PAGES);
+
        do {
-               int ret = __bio_iov_iter_get_pages(bio, iter);
+               int ret;
+
+               if (is_bvec)
+                       ret = __bio_iov_bvec_add_pages(bio, iter);
+               else
+                       ret = __bio_iov_iter_get_pages(bio, iter);
 
                if (unlikely(ret))
                        return bio->bi_vcnt > orig_vcnt ? 0 : ret;
@@ -1634,7 +1671,8 @@ static void bio_dirty_fn(struct work_struct *work)
                next = bio->bi_private;
 
                bio_set_pages_dirty(bio);
-               bio_release_pages(bio);
+               if (!bio_flagged(bio, BIO_HOLD_PAGES))
+                       bio_release_pages(bio);
                bio_put(bio);
        }
 }
@@ -1650,7 +1688,8 @@ void bio_check_pages_dirty(struct bio *bio)
                        goto defer;
        }
 
-       bio_release_pages(bio);
+       if (!bio_flagged(bio, BIO_HOLD_PAGES))
+               bio_release_pages(bio);
        bio_put(bio);
        return;
 defer:
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 392e2bfb636f..fa2720bc0243 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -338,8 +338,9 @@ static void blkdev_bio_end_io(struct bio *bio)
                struct bio_vec *bvec;
                int i;
 
-               bio_for_each_segment_all(bvec, bio, i)
-                       put_page(bvec->bv_page);
+               if (!bio_flagged(bio, BIO_HOLD_PAGES))
+                       bio_for_each_segment_all(bvec, bio, i)
+                               put_page(bvec->bv_page);
                bio_put(bio);
        }
 }
diff --git a/fs/iomap.c b/fs/iomap.c
index 4ee50b76b4a1..0a64c9c51203 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -1582,8 +1582,9 @@ static void iomap_dio_bio_end_io(struct bio *bio)
                struct bio_vec *bvec;
                int i;
 
-               bio_for_each_segment_all(bvec, bio, i)
-                       put_page(bvec->bv_page);
+               if (!bio_flagged(bio, BIO_HOLD_PAGES))
+                       bio_for_each_segment_all(bvec, bio, i)
+                               put_page(bvec->bv_page);
                bio_put(bio);
        }
 }
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 5c7e7f859a24..97e206855cd3 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -215,6 +215,7 @@ struct bio {
 /*
  * bio flags
  */
+#define BIO_HOLD_PAGES 0       /* don't put O_DIRECT pages */
 #define BIO_SEG_VALID  1       /* bi_phys_segments valid */
 #define BIO_CLONED     2       /* doesn't own data */
 #define BIO_BOUNCED    3       /* bio is a bounce bio */
-- 
2.17.1

Reply via email to