As reported, we must not do pressure stall information accounting for
direct IO, because otherwise it tells that it's thrashing a page when
actually doing IO on hot data.

Apparently, bio_iov_iter_get_pages() is used only by paths doing direct
IO, so just make it avoid setting BIO_WORKINGSET, it also saves us CPU
cycles on doing that. For fs/direct-io.c just clear the flag before
submit_bio(), it's not of much concern performance-wise.

Reported-by: Christoph Hellwig <h...@infradead.org>
Suggested-by: Christoph Hellwig <h...@infradead.org>
Suggested-by: Johannes Weiner <han...@cmpxchg.org>
Signed-off-by: Pavel Begunkov <asml.sile...@gmail.com>
---
 block/bio.c    | 25 ++++++++++++++++---------
 fs/direct-io.c |  2 ++
 2 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/block/bio.c b/block/bio.c
index 4a8f77bb3956..3192358c411f 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -963,18 +963,22 @@ EXPORT_SYMBOL_GPL(bio_release_pages);
 static int __bio_iov_bvec_add_pages(struct bio *bio, struct iov_iter *iter)
 {
        const struct bio_vec *bv = iter->bvec;
-       unsigned int len;
-       size_t size;
+       struct page *page = bv->bv_page;
+       bool same_page = false;
+       unsigned int off, len;
 
        if (WARN_ON_ONCE(iter->iov_offset > bv->bv_len))
                return -EINVAL;
 
        len = min_t(size_t, bv->bv_len - iter->iov_offset, iter->count);
-       size = bio_add_page(bio, bv->bv_page, len,
-                               bv->bv_offset + iter->iov_offset);
-       if (unlikely(size != len))
-               return -EINVAL;
-       iov_iter_advance(iter, size);
+       off = bv->bv_offset + iter->iov_offset;
+
+       if (!__bio_try_merge_page(bio, page, len, off, &same_page)) {
+               if (bio_full(bio, len))
+                       return -EINVAL;
+               bio_add_page_noaccount(bio, page, len, off);
+       }
+       iov_iter_advance(iter, len);
        return 0;
 }
 
@@ -1023,8 +1027,8 @@ static int __bio_iov_iter_get_pages(struct bio *bio, 
struct iov_iter *iter)
                                put_page(page);
                } else {
                        if (WARN_ON_ONCE(bio_full(bio, len)))
-                                return -EINVAL;
-                       __bio_add_page(bio, page, len, offset);
+                               return -EINVAL;
+                       bio_add_page_noaccount(bio, page, len, offset);
                }
                offset = 0;
        }
@@ -1099,6 +1103,9 @@ static int __bio_iov_append_get_pages(struct bio *bio, 
struct iov_iter *iter)
  * fit into the bio, or are requested in @iter, whatever is smaller. If
  * MM encounters an error pinning the requested pages, it stops. Error
  * is returned only if 0 pages could be pinned.
+ *
+ * It also doesn't set BIO_WORKINGSET, so is intended for direct IO. If used
+ * otherwise the caller is responsible to do that to keep PSI happy.
  */
 int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
 {
diff --git a/fs/direct-io.c b/fs/direct-io.c
index d53fa92a1ab6..914a7f600ecd 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -426,6 +426,8 @@ static inline void dio_bio_submit(struct dio *dio, struct 
dio_submit *sdio)
        unsigned long flags;
 
        bio->bi_private = dio;
+       /* PSI is only for paging IO */
+       bio_clear_flag(bio, BIO_WORKINGSET);
 
        spin_lock_irqsave(&dio->bio_lock, flags);
        dio->refcount++;
-- 
2.24.0

Reply via email to